From 10e2e586b28b6d9d43529e1b6b2ea7206f402738 Mon Sep 17 00:00:00 2001
From: Quentin Dufour <quentin@deuxfleurs.fr>
Date: Fri, 16 Jun 2023 12:07:17 +0200
Subject: [PATCH] add compatibility with obsolete syntax

---
 src/misc_token.rs | 16 +++++++++++++---
 src/quoted.rs     | 13 +++++++++----
 src/whitespace.rs | 19 ++++++++++++++++++-
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/misc_token.rs b/src/misc_token.rs
index a21cdbc..44fabf2 100644
--- a/src/misc_token.rs
+++ b/src/misc_token.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use nom::{
     IResult,
     branch::alt,
+    bytes::complete::take_while1,
     character::complete::space0,
     combinator::{into, opt},
     multi::{many0, many1},
@@ -9,8 +10,8 @@ use nom::{
 };
 
 use crate::quoted::quoted_string;
-use crate::whitespace::fws;
-use crate::words::{atom, vchar_seq};
+use crate::whitespace::{fws, is_obs_no_ws_ctl};
+use crate::words::{atom, is_vchar};
 
 /// Word
 ///
@@ -32,13 +33,22 @@ pub fn phrase(input: &str) -> IResult<&str, String> {
     Ok((input, phrase))
 }
 
+/// Compatible unstructured input
+///
+/// ```abnf
+/// obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
+/// ```
+fn is_unstructured(c: char) -> bool {
+    is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00'
+}
+
 /// Unstructured header field body
 ///
 /// ```abnf
 /// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
 /// ```
 pub fn unstructured(input: &str) -> IResult<&str, String> {
-    let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
+    let (input, r) = many0(tuple((opt(fws), take_while1(is_unstructured))))(input)?;
     let (input, _) = space0(input)?;
 
     // Try to optimize for the most common cases
diff --git a/src/quoted.rs b/src/quoted.rs
index 4b8af27..8dc5986 100644
--- a/src/quoted.rs
+++ b/src/quoted.rs
@@ -2,22 +2,23 @@ use nom::{
     IResult,
     branch::alt,
     bytes::complete::tag,
-    character::complete::satisfy,
+    character::complete::{anychar, satisfy},
     combinator::opt,
     multi::many0,
     sequence::{pair, preceded},
 };
 
 use crate::words::is_vchar;
-use crate::whitespace::{fws, cfws};
+use crate::whitespace::{fws, cfws, is_obs_no_ws_ctl};
 
 /// Quoted pair
 ///
 /// ```abnf
 ///    quoted-pair     =   ("\" (VCHAR / WSP)) / obs-qp
+///    obs-qp          =   "\" (%d0 / obs-NO-WS-CTL / LF / CR)
 /// ```
 pub fn quoted_pair(input: &str) -> IResult<&str, char> {
-    preceded(tag("\\"), satisfy(|c| is_vchar(c) || c == '\t' || c == ' '))(input)
+    preceded(tag("\\"), anychar)(input)
 }
 
 /// Allowed characters in quote
@@ -28,10 +29,14 @@ pub fn quoted_pair(input: &str) -> IResult<&str, char> {
 ///                       %d93-126 /         ;  "\" or the quote character
 ///                       obs-qtext
 /// ```
-fn is_qtext(c: char) -> bool {
+fn is_restr_qtext(c: char) -> bool {
     c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E')
 }
 
+fn is_qtext(c: char) -> bool {
+    is_restr_qtext(c) || is_obs_no_ws_ctl(c)
+}
+
 /// Quoted pair content
 ///
 /// ```abnf
diff --git a/src/whitespace.rs b/src/whitespace.rs
index 22589ac..f9a79d1 100644
--- a/src/whitespace.rs
+++ b/src/whitespace.rs
@@ -91,10 +91,27 @@ pub fn ctext(input: &str) -> IResult<&str, char> {
 ///                       %d93-126 /         ;  "(", ")", or "\"
 ///                       obs-ctext
 ///```
-pub fn is_ctext(c: char) -> bool {
+pub fn is_restr_ctext(c: char) -> bool {
     (c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii()
 }
 
+pub fn is_ctext(c: char) -> bool {
+    is_restr_ctext(c) || is_obs_no_ws_ctl(c)
+}
+
+/// US ASCII control characters without effect 
+///
+/// ```abnf
+///   obs-NO-WS-CTL   =   %d1-8 /            ; US-ASCII control
+///                       %d11 /             ;  characters that do not
+///                       %d12 /             ;  include the carriage
+///                       %d14-31 /          ;  return, line feed, and
+///                       %d127              ;  white space characters
+/// ```
+pub fn is_obs_no_ws_ctl(c: char) -> bool {
+    (c >= '\x01' && c <= '\x08') || c == '\x0b' || c == '\x0b' || (c >= '\x0e' && c<= '\x1f') || c == '\x7F'
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;