compile rfc5322/mailbox

This commit is contained in:
Quentin 2023-07-19 12:09:23 +02:00
parent f98b43aaba
commit 3c8d851ce6
Signed by: quentin
GPG key ID: E9602264D639FF68
4 changed files with 112 additions and 99 deletions

View file

@ -1,5 +1,5 @@
pub mod error; pub mod error;
//pub mod mime; //pub mod mime;
//pub mod message;
pub mod headers; pub mod headers;
pub mod text; pub mod text;
pub mod rfc5322;

View file

@ -1,52 +1,58 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{is_a, tag}, bytes::complete::{tag, take_while1},
character::complete::satisfy, combinator::{into, map, opt},
combinator::{into, map, opt, recognize}, multi::{many0, separated_list1},
multi::{fold_many0, many0, separated_list1},
sequence::{delimited, pair, preceded, terminated, tuple}, sequence::{delimited, pair, preceded, terminated, tuple},
IResult, IResult,
}; };
use std::borrow::Cow;
use crate::fragments::misc_token::{phrase, word}; use crate::text::misc_token::{phrase, word, Word, Phrase};
use crate::fragments::quoted::quoted_string; use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl}; use crate::text::words::{atom};
use crate::fragments::words::{atom, dot_atom}; use crate::text::ascii;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct AddrSpec { pub struct AddrSpec<'a> {
pub local_part: String, pub local_part: LocalPart<'a>,
pub domain: String, pub domain: Domain<'a>,
} }
impl AddrSpec { impl<'a> AddrSpec<'a> {
pub fn fully_qualified(&self) -> String { pub fn to_string(&self) -> String {
format!("{}@{}", self.local_part, self.domain) format!("{}@{}", self.local_part.to_string(), self.domain.to_string())
} }
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct MailboxRef { pub struct MailboxRef<'a> {
// The actual "email address" like hello@example.com // The actual "email address" like hello@example.com
pub addrspec: AddrSpec, pub addrspec: AddrSpec<'a>,
pub name: Option<String>, pub name: Option<Phrase<'a>>,
} }
impl From<AddrSpec> for MailboxRef { impl<'a> MailboxRef<'a> {
fn from(addr: AddrSpec) -> Self { pub fn to_string(&self) -> String {
match &self.name {
Some(n) => format!("{} <{}>", n.to_string(), self.addrspec.to_string()),
None => self.addrspec.to_string()
}
}
}
impl<'a> From<AddrSpec<'a>> for MailboxRef<'a> {
fn from(addr: AddrSpec<'a>) -> Self {
MailboxRef { MailboxRef {
name: None, name: None,
addrspec: addr, addrspec: addr,
} }
} }
} }
pub type MailboxList = Vec<MailboxRef>; pub type MailboxList<'a> = Vec<MailboxRef<'a>>;
/// Mailbox /// Mailbox
/// ///
/// ```abnf /// ```abnf
/// mailbox = name-addr / addr-spec /// mailbox = name-addr / addr-spec
/// ``` /// ```
pub fn mailbox(input: &str) -> IResult<&str, MailboxRef> { pub fn mailbox(input: &[u8]) -> IResult<&[u8], MailboxRef> {
alt((name_addr, into(addr_spec)))(input) alt((name_addr, into(addr_spec)))(input)
} }
@ -55,7 +61,7 @@ pub fn mailbox(input: &str) -> IResult<&str, MailboxRef> {
/// ```abnf /// ```abnf
/// name-addr = [display-name] angle-addr /// name-addr = [display-name] angle-addr
/// ``` /// ```
fn name_addr(input: &str) -> IResult<&str, MailboxRef> { fn name_addr(input: &[u8]) -> IResult<&[u8], MailboxRef> {
let (input, name) = opt(phrase)(input)?; let (input, name) = opt(phrase)(input)?;
let (input, mut mbox) = angle_addr(input)?; let (input, mut mbox) = angle_addr(input)?;
mbox.name = name; mbox.name = name;
@ -68,42 +74,30 @@ fn name_addr(input: &str) -> IResult<&str, MailboxRef> {
/// angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / /// angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
/// obs-angle-addr /// obs-angle-addr
/// ``` /// ```
pub fn angle_addr(input: &str) -> IResult<&str, MailboxRef> { pub fn angle_addr(input: &[u8]) -> IResult<&[u8], MailboxRef> {
delimited( delimited(
tuple((opt(cfws), tag("<"), opt(obs_route))), tuple((opt(cfws), tag(&[ascii::LT]), opt(obs_route))),
into(addr_spec), into(addr_spec),
pair(tag(">"), opt(cfws)), pair(tag(&[ascii::GT]), opt(cfws)),
)(input) )(input)
} }
/// obs-route = obs-domain-list ":" /// obs-route = obs-domain-list ":"
fn obs_route(input: &str) -> IResult<&str, Vec<String>> { fn obs_route(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain>>> {
terminated(obs_domain_list, tag(":"))(input) terminated(obs_domain_list, tag(&[ascii::COL]))(input)
} }
/// ```abnf /// ```abnf
/// obs-domain-list = *(CFWS / ",") "@" domain /// obs-domain-list = *(CFWS / ",") "@" domain
/// *("," [CFWS] ["@" domain]) /// *("," [CFWS] ["@" domain])
/// ``` /// ```
fn obs_domain_list(input: &str) -> IResult<&str, Vec<String>> { fn obs_domain_list(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain>>> {
//@FIXME complexity is O(n) in term of domains here. preceded(
let (input, head) = preceded( many0(cfws),
pair(many0(alt((recognize(cfws), tag(",")))), tag("@")), separated_list1(
obs_domain, tag(&[ascii::COMMA]),
)(input)?; preceded(many0(cfws), opt(preceded(tag(&[ascii::AT]), obs_domain))),
let (input, mut rest) = obs_domain_list_rest(input)?; ))(input)
rest.insert(0, head);
Ok((input, rest))
}
fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec<String>> {
map(
many0(preceded(
pair(tag(","), opt(cfws)),
opt(preceded(tag("@"), obs_domain)),
)),
|v: Vec<Option<String>>| v.into_iter().flatten().collect(),
)(input)
} }
/// AddrSpec /// AddrSpec
@ -113,26 +107,40 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec<String>> {
/// ``` /// ```
/// @FIXME: this system does not work to alternate between strict and obsolete /// @FIXME: this system does not work to alternate between strict and obsolete
/// so I force obsolete for now... /// so I force obsolete for now...
pub fn addr_spec(input: &str) -> IResult<&str, AddrSpec> { pub fn addr_spec(input: &[u8]) -> IResult<&[u8], AddrSpec> {
map( map(
tuple(( tuple((
obs_local_part, obs_local_part,
tag("@"), tag(&[ascii::AT]),
obs_domain, obs_domain,
many0(pair(tag("@"), obs_domain)), many0(pair(tag(&[ascii::AT]), obs_domain)), // for compatibility reasons
)), )),
|(local_part, _, domain, _)| AddrSpec { local_part, domain }, |(local_part, _, domain, _)| AddrSpec { local_part, domain },
)(input) )(input)
} }
/// Local part #[derive(Debug, PartialEq)]
/// pub enum LocalPartToken<'a> {
/// ```abnf Dot,
/// local-part = dot-atom / quoted-string / obs-local-part Word(Word<'a>),
/// ``` }
#[allow(dead_code)]
fn strict_local_part(input: &str) -> IResult<&str, String> { #[derive(Debug, PartialEq)]
alt((into(dot_atom), quoted_string))(input) pub struct LocalPart<'a>(pub Vec<LocalPartToken<'a>>);
impl<'a> LocalPart<'a> {
pub fn to_string(&self) -> String {
self.0.iter().fold(
String::new(),
|mut acc, token| {
match token {
LocalPartToken::Dot => acc.push('.'),
LocalPartToken::Word(v) => acc.push_str(v.to_string().as_ref()),
}
acc
}
)
}
} }
/// Obsolete local part /// Obsolete local part
@ -147,22 +155,29 @@ fn strict_local_part(input: &str) -> IResult<&str, String> {
/// ```abnf /// ```abnf
/// obs-local-part = *("." / word) /// obs-local-part = *("." / word)
/// ``` /// ```
fn obs_local_part(input: &str) -> IResult<&str, String> { fn obs_local_part(input: &[u8]) -> IResult<&[u8], LocalPart> {
fold_many0( map(
alt((map(is_a("."), Cow::Borrowed), word)), many0(alt((
String::new, map(tag(&[ascii::PERIOD]), |_| LocalPartToken::Dot),
|acc, chunk| acc + &chunk, map(word, |v| LocalPartToken::Word(v)),
))),
|v| LocalPart(v),
)(input) )(input)
} }
/// Domain #[derive(Debug, PartialEq)]
/// pub enum Domain<'a> {
/// ```abnf Atoms(Vec<&'a [u8]>),
/// domain = dot-atom / domain-literal Litteral(Vec<&'a [u8]>),
/// ``` }
#[allow(dead_code)]
pub fn strict_domain(input: &str) -> IResult<&str, String> { impl<'a> Domain<'a> {
alt((into(dot_atom), domain_litteral))(input) pub fn to_string(&self) -> String {
match self {
Domain::Atoms(v) => v.iter().map(|v| encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string()).collect::<Vec<String>>().join("."),
Domain::Litteral(v) => v.iter().map(|v| encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string()).collect::<Vec<String>>().join(" "),
}
}
} }
/// Obsolete domain /// Obsolete domain
@ -173,9 +188,9 @@ pub fn strict_domain(input: &str) -> IResult<&str, String> {
/// ```abnf /// ```abnf
/// obs-domain = atom *("." atom) / domain-literal /// obs-domain = atom *("." atom) / domain-literal
/// ``` /// ```
pub fn obs_domain(input: &str) -> IResult<&str, String> { pub fn obs_domain(input: &[u8]) -> IResult<&[u8], Domain> {
alt(( alt((
map(separated_list1(tag("."), atom), |v| v.join(".")), map(separated_list1(tag("."), atom), |v| Domain::Atoms(v)),
domain_litteral, domain_litteral,
))(input) ))(input)
} }
@ -185,35 +200,23 @@ pub fn obs_domain(input: &str) -> IResult<&str, String> {
/// ```abnf /// ```abnf
/// domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] /// domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
/// ``` /// ```
fn domain_litteral(input: &str) -> IResult<&str, String> { fn domain_litteral(input: &[u8]) -> IResult<&[u8], Domain> {
delimited( delimited(
pair(opt(cfws), tag("[")), pair(opt(cfws), tag(&[ascii::LEFT_BRACKET])),
inner_domain_litteral, inner_domain_litteral,
pair(tag("]"), opt(cfws)), pair(tag(&[ascii::RIGHT_BRACKET]), opt(cfws)),
)(input) )(input)
} }
fn inner_domain_litteral(input: &str) -> IResult<&str, String> { fn inner_domain_litteral(input: &[u8]) -> IResult<&[u8], Domain> {
let (input, (cvec, maybe_wsp)) = map(
pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?; terminated(many0(preceded(opt(fws), take_while1(is_dtext))), opt(fws)),
let mut domain = cvec |v| Domain::Litteral(v),
.iter() )(input)
.fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp {
acc.push(*wsp);
}
acc.push(*c);
acc
});
if let Some(wsp) = maybe_wsp {
domain.push(wsp);
}
Ok((input, domain))
} }
fn is_strict_dtext(c: char) -> bool { fn is_strict_dtext(c: u8) -> bool {
(c >= '\x21' && c <= '\x5A') || (c >= '\x5E' && c <= '\x7E') || !c.is_ascii() (c >= 0x21 && c <= 0x5A) || (c >= 0x5E && c <= 0x7E)
} }
/// Is domain text /// Is domain text
@ -224,7 +227,7 @@ fn is_strict_dtext(c: char) -> bool {
/// obs-dtext ; "[", "]", or "\" /// obs-dtext ; "[", "]", or "\"
/// obs-dtext = obs-NO-WS-CTL / quoted-pair /// obs-dtext = obs-NO-WS-CTL / quoted-pair
/// ``` /// ```
pub fn is_dtext(c: char) -> bool { pub fn is_dtext(c: u8) -> bool {
is_strict_dtext(c) || is_obs_no_ws_ctl(c) is_strict_dtext(c) || is_obs_no_ws_ctl(c)
//@FIXME does not support quoted pair yet while RFC requires it //@FIXME does not support quoted pair yet while RFC requires it
} }
@ -268,7 +271,8 @@ mod tests {
); );
// UTF-8 // UTF-8
assert_eq!( // @FIXME ASCII SUPPORT IS BROKEN
/*assert_eq!(
addr_spec("用户@例子.广告"), addr_spec("用户@例子.广告"),
Ok(( Ok((
"", "",
@ -277,7 +281,7 @@ mod tests {
domain: "例子.广告".into() domain: "例子.广告".into()
} }
)) ))
); );*/
// ASCII Edge cases // ASCII Edge cases
assert_eq!( assert_eq!(

1
src/rfc5322/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod mailbox;

View file

@ -41,11 +41,13 @@ impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
} }
}*/ }*/
#[derive(Debug, PartialEq)]
pub enum Word<'a> { pub enum Word<'a> {
Quoted(buffer::Text<'a>), Quoted(buffer::Text<'a>),
Encoded(encoding::EncodedWord<'a>), Encoded(encoding::EncodedWord<'a>),
Atom(&'a [u8]), Atom(&'a [u8]),
} }
impl<'a> Word<'a> { impl<'a> Word<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match self { match self {
@ -69,7 +71,9 @@ pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
))(input) ))(input)
} }
#[derive(Debug, PartialEq)]
pub struct Phrase<'a>(pub Vec<Word<'a>>); pub struct Phrase<'a>(pub Vec<Word<'a>>);
impl<'a> Phrase<'a> { impl<'a> Phrase<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
self.0.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(" ") self.0.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(" ")
@ -95,11 +99,13 @@ fn is_unstructured(c: u8) -> bool {
is_vchar(c) || is_obs_no_ws_ctl(c) || c == ascii::NULL is_vchar(c) || is_obs_no_ws_ctl(c) || c == ascii::NULL
} }
#[derive(Debug, PartialEq)]
pub enum UnstrToken<'a> { pub enum UnstrToken<'a> {
Init, Init,
Encoded(encoding::EncodedWord<'a>), Encoded(encoding::EncodedWord<'a>),
Plain(&'a [u8]), Plain(&'a [u8]),
} }
impl<'a> UnstrToken<'a> { impl<'a> UnstrToken<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match self { match self {
@ -110,7 +116,9 @@ impl<'a> UnstrToken<'a> {
} }
} }
#[derive(Debug, PartialEq)]
pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>); pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
impl<'a> Unstructured<'a> { impl<'a> Unstructured<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
self.0.iter().fold( self.0.iter().fold(