quoted string

This commit is contained in:
Quentin 2023-06-12 21:19:38 +02:00
parent 1e6b18de5a
commit 7052443bb5
Signed by: quentin
GPG key ID: E9602264D639FF68
2 changed files with 96 additions and 29 deletions

View file

@ -11,7 +11,7 @@ use nom::{
sequence::tuple, sequence::tuple,
}; };
use crate::tokens::{perm_fws, vchar_seq, perm_crlf}; use crate::tokens::{fws, vchar_seq, perm_crlf};
use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef}; use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef};
/// HEADERS /// HEADERS
@ -130,7 +130,7 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct /// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ``` /// ```
fn unstructured(input: &str) -> IResult<&str, String> { fn unstructured(input: &str) -> IResult<&str, String> {
let (input, r) = many0(tuple((opt(perm_fws), vchar_seq)))(input)?; let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
// Try to optimize for the most common cases // Try to optimize for the most common cases
@ -140,7 +140,7 @@ fn unstructured(input: &str) -> IResult<&str, String> {
lines => lines.iter().fold(String::with_capacity(255), |acc, item| { lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
let (may_ws, content) = item; let (may_ws, content) = item;
match may_ws { match may_ws {
Some(ws) => acc + " " + content, Some(_) => acc + " " + content,
None => acc + content, None => acc + content,
} }
}), }),

View file

@ -14,19 +14,7 @@ use nom::{
/// https://www.rfc-editor.org/rfc/rfc5322#section-3.2 /// https://www.rfc-editor.org/rfc/rfc5322#section-3.2
/// Also https://datatracker.ietf.org/doc/html/rfc6532 /// Also https://datatracker.ietf.org/doc/html/rfc6532
/// Permissive CRLF // quoted characters and strings
///
/// Theoretically, all lines must end with \r\n
/// but some mail servers like Dovecot support malformated emails,
/// for example with only \n eol. It works because
/// \r or \n is allowed nowhere else, so we also add this support.
pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
alt((crlf, tag("\r"), tag("\n")))(input)
}
// Note: WSP = SP / HTAB = %x20 / %x09
// nom::*::space0 = *WSP
// nom::*::space1 = 1*WSP
/// Quoted pair /// Quoted pair
/// ///
@ -37,13 +25,84 @@ pub fn quoted_pair(input: &str) -> IResult<&str, char> {
preceded(tag("\\"), satisfy(|c| is_vchar(c) || c == '\t' || c == ' '))(input) preceded(tag("\\"), satisfy(|c| is_vchar(c) || c == '\t' || c == ' '))(input)
} }
/// Allowed characters in quote
///
/// ```abnf
/// qtext = %d33 / ; Printable US-ASCII
/// %d35-91 / ; characters not including
/// %d93-126 / ; "\" or the quote character
/// obs-qtext
/// ```
fn is_qtext(c: char) -> bool {
c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E')
}
/// Quoted pair content
///
/// ```abnf
/// qcontent = qtext / quoted-pair
/// ```
fn qcontent(input: &str) -> IResult<&str, char> {
alt((satisfy(is_qtext), quoted_pair))(input)
}
/// Quoted string
///
/// ```abnf
/// quoted-string = [CFWS]
/// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
/// [CFWS]
/// ```
pub fn quoted_string(input: &str) -> IResult<&str, String> {
let (input, _) = opt(cfws)(input)?;
let (input, _) = tag("\"")(input)?;
let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
let mut qstring = content.iter().fold(
String::with_capacity(16),
|mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp {
acc.push(*wsp);
}
acc.push(*c);
acc
});
let (input, maybe_wsp) = opt(fws)(input)?;
if let Some(wsp) = maybe_wsp {
qstring.push(wsp);
}
let (input, _) = tag("\"")(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((input, qstring))
}
// --- whitespaces and comments
// Note: WSP = SP / HTAB = %x20 / %x09
// nom::*::space0 = *WSP
// nom::*::space1 = 1*WSP
/// Permissive CRLF
///
/// Theoretically, all lines must end with \r\n
/// but some mail servers like Dovecot support malformated emails,
/// for example with only \n eol. It works because
/// \r or \n is allowed nowhere else, so we also add this support.
pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
alt((crlf, tag("\r"), tag("\n")))(input)
}
/// Permissive foldable white space /// Permissive foldable white space
/// ///
/// Folding white space are used for long headers splitted on multiple lines. /// Folding white space are used for long headers splitted on multiple lines.
/// The obsolete syntax allowes multiple lines without content; implemented for compatibility /// The obsolete syntax allowes multiple lines without content; implemented for compatibility
/// reasons /// reasons
pub fn perm_fws(input: &str) -> IResult<&str, &str> { pub fn fws(input: &str) -> IResult<&str, char> {
alt((recognize(many1(fold_marker)), space1))(input) let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
Ok((input, ' '))
} }
fn fold_marker(input: &str) -> IResult<&str, &str> { fn fold_marker(input: &str) -> IResult<&str, &str> {
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
@ -68,19 +127,19 @@ fn fold_marker(input: &str) -> IResult<&str, &str> {
/// CFWS = (1*([FWS] comment) [FWS]) / FWS /// CFWS = (1*([FWS] comment) [FWS]) / FWS
/// ``` /// ```
pub fn cfws(input: &str) -> IResult<&str, &str> { pub fn cfws(input: &str) -> IResult<&str, &str> {
alt((recognize(comments), perm_fws))(input) alt((recognize(comments), recognize(fws)))(input)
} }
pub fn comments(input: &str) -> IResult<&str, ()> { pub fn comments(input: &str) -> IResult<&str, ()> {
let (input, _) = many1(tuple((opt(perm_fws), comment)))(input)?; let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
let (input, _) = opt(perm_fws)(input)?; let (input, _) = opt(fws)(input)?;
Ok((input, ())) Ok((input, ()))
} }
pub fn comment(input: &str) -> IResult<&str, ()> { pub fn comment(input: &str) -> IResult<&str, ()> {
let (input, _) = tag("(")(input)?; let (input, _) = tag("(")(input)?;
let (input, _) = many0(tuple((opt(perm_fws), ccontent)))(input)?; let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
let (input, _) = opt(perm_fws)(input)?; let (input, _) = opt(fws)(input)?;
let (input, _) = tag(")")(input)?; let (input, _) = tag(")")(input)?;
Ok((input, ())) Ok((input, ()))
} }
@ -105,6 +164,8 @@ pub fn is_ctext(c: char) -> bool {
(c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii() (c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii()
} }
// atoms, words, phrases, vchar
/// VCHAR definition /// VCHAR definition
pub fn is_vchar(c: char) -> bool { pub fn is_vchar(c: char) -> bool {
(c >= '\x21' && c <= '\x7E') || !c.is_ascii() (c >= '\x21' && c <= '\x7E') || !c.is_ascii()
@ -122,11 +183,12 @@ pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
take_while1(is_vchar)(input) take_while1(is_vchar)(input)
} }
/// Atom allowed characters
fn is_atext(c: char) -> bool { fn is_atext(c: char) -> bool {
c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c)
} }
/// atom /// Atom
/// ///
/// `[CFWS] 1*atext [CFWS]` /// `[CFWS] 1*atext [CFWS]`
fn atom(input: &str) -> IResult<&str, &str> { fn atom(input: &str) -> IResult<&str, &str> {
@ -166,11 +228,11 @@ mod tests {
} }
#[test] #[test]
fn test_perm_fws() { fn test_fws() {
assert_eq!(perm_fws("\r\n world"), Ok(("world", "\r\n "))); assert_eq!(fws("\r\n world"), Ok(("world", ' ')));
assert_eq!(perm_fws(" \r\n \r\n world"), Ok(("world", " \r\n \r\n "))); assert_eq!(fws(" \r\n \r\n world"), Ok(("world", ' ')));
assert_eq!(perm_fws(" world"), Ok(("world", " "))); assert_eq!(fws(" world"), Ok(("world", ' ')));
assert!(perm_fws("\r\nFrom: test").is_err()); assert!(fws("\r\nFrom: test").is_err());
} }
#[test] #[test]
@ -203,4 +265,9 @@ mod tests {
fn test_dot_atom() { fn test_dot_atom() {
assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io"))); assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
} }
#[test]
fn test_quoted_string() {
assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string())));
}
} }