2023-06-22 15:08:50 +02:00
|
|
|
use crate::fragments::quoted::quoted_pair;
|
2023-06-12 22:08:34 +02:00
|
|
|
use nom::{
|
|
|
|
branch::alt,
|
2023-07-16 09:55:47 +02:00
|
|
|
bytes::complete::{is_not, tag},
|
2023-06-12 22:08:34 +02:00
|
|
|
character::complete::{crlf, satisfy, space0, space1},
|
2023-06-22 15:08:50 +02:00
|
|
|
combinator::{opt, recognize},
|
2023-06-12 22:08:34 +02:00
|
|
|
multi::{many0, many1},
|
2023-07-16 09:55:47 +02:00
|
|
|
sequence::{pair, tuple},
|
2023-06-22 15:08:50 +02:00
|
|
|
IResult,
|
2023-06-12 22:08:34 +02:00
|
|
|
};
|
2023-07-14 10:43:31 +02:00
|
|
|
use crate::fragments::encoding::encoded_word;
|
2023-06-12 22:08:34 +02:00
|
|
|
|
2023-07-16 09:55:47 +02:00
|
|
|
// Bytes CRLF
|
|
|
|
const CR: u8 = 0x0D;
|
|
|
|
const LF: u8 = 0x0A;
|
2023-07-17 11:44:55 +02:00
|
|
|
pub const CRLF: &[u8] = &[CR, LF];
|
2023-07-16 09:55:47 +02:00
|
|
|
|
|
|
|
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
2023-07-17 11:44:55 +02:00
|
|
|
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
|
|
|
// and not "is not CRLF". In other words, it continues while
|
|
|
|
// it does not encounter 0x0D or 0x0A.
|
2023-07-16 09:55:47 +02:00
|
|
|
pair(is_not(CRLF), obs_crlf)(input)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
|
|
|
alt((tag(CRLF), tag(&[CR]), tag(&[LF])))(input)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-06-12 22:08:34 +02:00
|
|
|
// --- whitespaces and comments
|
|
|
|
|
|
|
|
// Note: WSP = SP / HTAB = %x20 / %x09
|
|
|
|
// nom::*::space0 = *WSP
|
|
|
|
// nom::*::space1 = 1*WSP
|
|
|
|
|
|
|
|
/// Permissive CRLF
|
|
|
|
///
|
|
|
|
/// Theoretically, all lines must end with \r\n
|
|
|
|
/// but some mail servers like Dovecot support malformated emails,
|
|
|
|
/// for example with only \n eol. It works because
|
|
|
|
/// \r or \n is allowed nowhere else, so we also add this support.
|
|
|
|
pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
|
|
|
|
alt((crlf, tag("\r"), tag("\n")))(input)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Permissive foldable white space
|
|
|
|
///
|
|
|
|
/// Folding white space are used for long headers splitted on multiple lines.
|
|
|
|
/// The obsolete syntax allowes multiple lines without content; implemented for compatibility
|
|
|
|
/// reasons
|
|
|
|
pub fn fws(input: &str) -> IResult<&str, char> {
|
|
|
|
let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
|
|
|
|
Ok((input, ' '))
|
|
|
|
}
|
|
|
|
fn fold_marker(input: &str) -> IResult<&str, &str> {
|
2023-06-22 15:08:50 +02:00
|
|
|
let (input, _) = space0(input)?;
|
|
|
|
let (input, _) = perm_crlf(input)?;
|
|
|
|
space1(input)
|
2023-06-12 22:08:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Folding White Space with Comment
|
|
|
|
///
|
|
|
|
/// Note: we drop the comments for now...
|
|
|
|
///
|
2023-06-13 09:18:36 +02:00
|
|
|
/// ```abnf
|
2023-06-12 22:08:34 +02:00
|
|
|
/// ctext = %d33-39 / ; Printable US-ASCII
|
|
|
|
/// %d42-91 / ; characters not including
|
|
|
|
/// %d93-126 / ; "(", ")", or "\"
|
|
|
|
/// obs-ctext
|
|
|
|
///
|
|
|
|
/// ccontent = ctext / quoted-pair / comment
|
|
|
|
///
|
|
|
|
/// comment = "(" *([FWS] ccontent) [FWS] ")"
|
|
|
|
///
|
|
|
|
/// CFWS = (1*([FWS] comment) [FWS]) / FWS
|
|
|
|
/// ```
|
|
|
|
pub fn cfws(input: &str) -> IResult<&str, &str> {
|
|
|
|
alt((recognize(comments), recognize(fws)))(input)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn comments(input: &str) -> IResult<&str, ()> {
|
|
|
|
let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
|
|
|
|
let (input, _) = opt(fws)(input)?;
|
|
|
|
Ok((input, ()))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn comment(input: &str) -> IResult<&str, ()> {
|
|
|
|
let (input, _) = tag("(")(input)?;
|
|
|
|
let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
|
|
|
|
let (input, _) = opt(fws)(input)?;
|
|
|
|
let (input, _) = tag(")")(input)?;
|
|
|
|
Ok((input, ()))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn ccontent(input: &str) -> IResult<&str, &str> {
|
2023-07-14 10:43:31 +02:00
|
|
|
alt((recognize(ctext), recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
|
2023-06-12 22:08:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn ctext(input: &str) -> IResult<&str, char> {
|
|
|
|
satisfy(is_ctext)(input)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if it's a comment text character
|
|
|
|
///
|
|
|
|
/// ```abnf
|
|
|
|
/// ctext = %d33-39 / ; Printable US-ASCII
|
|
|
|
/// %d42-91 / ; characters not including
|
|
|
|
/// %d93-126 / ; "(", ")", or "\"
|
|
|
|
/// obs-ctext
|
|
|
|
///```
|
2023-06-16 12:07:17 +02:00
|
|
|
pub fn is_restr_ctext(c: char) -> bool {
|
2023-06-22 15:08:50 +02:00
|
|
|
(c >= '\x21' && c <= '\x27')
|
|
|
|
|| (c >= '\x2A' && c <= '\x5B')
|
|
|
|
|| (c >= '\x5D' && c <= '\x7E')
|
|
|
|
|| !c.is_ascii()
|
2023-06-12 22:08:34 +02:00
|
|
|
}
|
|
|
|
|
2023-06-16 12:07:17 +02:00
|
|
|
pub fn is_ctext(c: char) -> bool {
|
|
|
|
is_restr_ctext(c) || is_obs_no_ws_ctl(c)
|
|
|
|
}
|
|
|
|
|
2023-06-22 15:08:50 +02:00
|
|
|
/// US ASCII control characters without effect
|
2023-06-16 12:07:17 +02:00
|
|
|
///
|
|
|
|
/// ```abnf
|
|
|
|
/// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
|
|
|
|
/// %d11 / ; characters that do not
|
|
|
|
/// %d12 / ; include the carriage
|
|
|
|
/// %d14-31 / ; return, line feed, and
|
|
|
|
/// %d127 ; white space characters
|
|
|
|
/// ```
|
|
|
|
pub fn is_obs_no_ws_ctl(c: char) -> bool {
|
2023-06-22 15:08:50 +02:00
|
|
|
(c >= '\x01' && c <= '\x08')
|
|
|
|
|| c == '\x0b'
|
|
|
|
|| c == '\x0b'
|
|
|
|
|| (c >= '\x0e' && c <= '\x1f')
|
|
|
|
|| c == '\x7F'
|
2023-06-16 12:07:17 +02:00
|
|
|
}
|
|
|
|
|
2023-06-12 22:08:34 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_perm_crlf() {
|
|
|
|
assert_eq!(perm_crlf("\rworld"), Ok(("world", "\r")));
|
|
|
|
assert_eq!(perm_crlf("\r\nworld"), Ok(("world", "\r\n")));
|
|
|
|
assert_eq!(perm_crlf("\nworld"), Ok(("world", "\n")));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fws() {
|
|
|
|
assert_eq!(fws("\r\n world"), Ok(("world", ' ')));
|
|
|
|
assert_eq!(fws(" \r\n \r\n world"), Ok(("world", ' ')));
|
|
|
|
assert_eq!(fws(" world"), Ok(("world", ' ')));
|
|
|
|
assert!(fws("\r\nFrom: test").is_err());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_cfws() {
|
2023-06-22 15:08:50 +02:00
|
|
|
assert_eq!(
|
|
|
|
cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"),
|
|
|
|
Ok((
|
|
|
|
"<pete(his account)@silly.test(his host)>",
|
|
|
|
"(A nice \\) chap) "
|
|
|
|
))
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
cfws("(Chris's host.)public.example>,"),
|
|
|
|
Ok(("public.example>,", "(Chris's host.)"))
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
cfws("(double (comment) is fun) wouch"),
|
|
|
|
Ok(("wouch", "(double (comment) is fun) "))
|
|
|
|
);
|
2023-06-12 22:08:34 +02:00
|
|
|
}
|
2023-07-14 10:43:31 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_cfws_encoded_word() {
|
|
|
|
assert_eq!(
|
|
|
|
cfws("(=?US-ASCII?Q?Keith_Moore?=)"),
|
|
|
|
Ok(("", "(=?US-ASCII?Q?Keith_Moore?=)")),
|
|
|
|
);
|
|
|
|
}
|
2023-06-12 22:08:34 +02:00
|
|
|
}
|