diff --git a/src/misc_token.rs b/src/misc_token.rs index a21cdbc..44fabf2 100644 --- a/src/misc_token.rs +++ b/src/misc_token.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use nom::{ IResult, branch::alt, + bytes::complete::take_while1, character::complete::space0, combinator::{into, opt}, multi::{many0, many1}, @@ -9,8 +10,8 @@ use nom::{ }; use crate::quoted::quoted_string; -use crate::whitespace::fws; -use crate::words::{atom, vchar_seq}; +use crate::whitespace::{fws, is_obs_no_ws_ctl}; +use crate::words::{atom, is_vchar}; /// Word /// @@ -32,13 +33,22 @@ pub fn phrase(input: &str) -> IResult<&str, String> { Ok((input, phrase)) } +/// Compatible unstructured input +/// +/// ```abnf +/// obs-utext = %d0 / obs-NO-WS-CTL / VCHAR +/// ``` +fn is_unstructured(c: char) -> bool { + is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00' +} + /// Unstructured header field body /// /// ```abnf /// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct /// ``` pub fn unstructured(input: &str) -> IResult<&str, String> { - let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?; + let (input, r) = many0(tuple((opt(fws), take_while1(is_unstructured))))(input)?; let (input, _) = space0(input)?; // Try to optimize for the most common cases diff --git a/src/quoted.rs b/src/quoted.rs index 4b8af27..8dc5986 100644 --- a/src/quoted.rs +++ b/src/quoted.rs @@ -2,22 +2,23 @@ use nom::{ IResult, branch::alt, bytes::complete::tag, - character::complete::satisfy, + character::complete::{anychar, satisfy}, combinator::opt, multi::many0, sequence::{pair, preceded}, }; use crate::words::is_vchar; -use crate::whitespace::{fws, cfws}; +use crate::whitespace::{fws, cfws, is_obs_no_ws_ctl}; /// Quoted pair /// /// ```abnf /// quoted-pair = ("\" (VCHAR / WSP)) / obs-qp +/// obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) /// ``` pub fn quoted_pair(input: &str) -> IResult<&str, char> { - preceded(tag("\\"), satisfy(|c| is_vchar(c) || c == '\t' || c == ' '))(input) + preceded(tag("\\"), anychar)(input) } /// Allowed characters in quote @@ -28,10 +29,14 @@ pub fn quoted_pair(input: &str) -> IResult<&str, char> { /// %d93-126 / ; "\" or the quote character /// obs-qtext /// ``` -fn is_qtext(c: char) -> bool { +fn is_restr_qtext(c: char) -> bool { c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') } +fn is_qtext(c: char) -> bool { + is_restr_qtext(c) || is_obs_no_ws_ctl(c) +} + /// Quoted pair content /// /// ```abnf diff --git a/src/whitespace.rs b/src/whitespace.rs index 22589ac..f9a79d1 100644 --- a/src/whitespace.rs +++ b/src/whitespace.rs @@ -91,10 +91,27 @@ pub fn ctext(input: &str) -> IResult<&str, char> { /// %d93-126 / ; "(", ")", or "\" /// obs-ctext ///``` -pub fn is_ctext(c: char) -> bool { +pub fn is_restr_ctext(c: char) -> bool { (c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii() } +pub fn is_ctext(c: char) -> bool { + is_restr_ctext(c) || is_obs_no_ws_ctl(c) +} + +/// US ASCII control characters without effect +/// +/// ```abnf +/// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control +/// %d11 / ; characters that do not +/// %d12 / ; include the carriage +/// %d14-31 / ; return, line feed, and +/// %d127 ; white space characters +/// ``` +pub fn is_obs_no_ws_ctl(c: char) -> bool { + (c >= '\x01' && c <= '\x08') || c == '\x0b' || c == '\x0b' || (c >= '\x0e' && c<= '\x1f') || c == '\x7F' +} + #[cfg(test)] mod tests { use super::*;