From 0b98a175daa8c7cee2417efd2d61e66d2651e9b5 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Tue, 13 Jun 2023 09:00:40 +0200 Subject: [PATCH] first tests on mailbox --- src/headers.rs | 4 +- src/lib.rs | 5 ++ src/mailbox.rs | 152 +++++++++++++++++++++++++++++++++++++++++++++++++ src/model.rs | 43 ++++++++++---- src/words.rs | 6 +- 5 files changed, 192 insertions(+), 18 deletions(-) create mode 100644 src/mailbox.rs diff --git a/src/headers.rs b/src/headers.rs index b87c9fd..74f4b75 100644 --- a/src/headers.rs +++ b/src/headers.rs @@ -143,9 +143,7 @@ fn from(input: &str) -> IResult<&str, HeaderField> { unimplemented!(); } -fn mailbox(input: &str) -> IResult<&str, MailboxRef> { - unimplemented!(); -} + #[cfg(test)] mod tests { diff --git a/src/lib.rs b/src/lib.rs index 0996105..f53bba6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,11 @@ pub mod headers; pub mod model; + +// Generic mod whitespace; mod words; mod quoted; mod misc_token; + +// Header specific +mod mailbox; diff --git a/src/mailbox.rs b/src/mailbox.rs new file mode 100644 index 0000000..a6485f9 --- /dev/null +++ b/src/mailbox.rs @@ -0,0 +1,152 @@ +use nom::{ + IResult, + branch::alt, + bytes::complete::tag, + character::complete::satisfy, + combinator::{into,opt}, + multi::many0, + sequence::{delimited,pair,tuple}, +}; + +use crate::model::{MailboxRef, AddrSpec}; +use crate::misc_token::phrase; +use crate::whitespace::{cfws, fws}; +use crate::words::dot_atom; +use crate::quoted::quoted_string; + +/// Mailbox +/// +/// ```abnf +/// mailbox = name-addr / addr-spec +/// ``` +pub fn mailbox(input: &str) -> IResult<&str, MailboxRef> { + alt((name_addr, into(addr_spec)))(input) +} + +/// Name of the email address +/// +/// ```abnf +/// name-addr = [display-name] angle-addr +/// ``` +fn name_addr(input: &str) -> IResult<&str, MailboxRef> { + let (input, name) = opt(phrase)(input)?; + let (input, mut mbox) = angle_addr(input)?; + mbox.name = name; + Ok((input, mbox)) +} + +/// Enclosed addr-spec with < and > +/// +/// ```abnf +/// angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / +/// obs-angle-addr +/// ``` +fn angle_addr(input: &str) -> IResult<&str, MailboxRef> { + delimited( + pair(opt(cfws), tag("<")), + into(addr_spec), + pair(tag(">"), opt(cfws)), + )(input) +} + +/// Add-spec +/// +/// ```abnf +/// addr-spec = local-part "@" domain +/// ``` +fn addr_spec(input: &str) -> IResult<&str, AddrSpec> { + let (input, (local, _, domain)) = tuple((local_part, tag("@"), domain_part))(input)?; + Ok((input, AddrSpec { + local_part: local, + domain: domain, + })) +} + +/// Local part +/// +/// ```abnf +/// local-part = dot-atom / quoted-string / obs-local-part +/// ``` +fn local_part(input: &str) -> IResult<&str, String> { + alt((into(dot_atom), quoted_string))(input) +} + +/// Domain +/// +/// ```abnf +/// domain = dot-atom / domain-literal / obs-domain +/// ``` +fn domain_part(input: &str) -> IResult<&str, String> { + alt((into(dot_atom), domain_litteral))(input) +} + +/// Domain litteral +/// +/// ```abnf +/// domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] +/// ``` +fn domain_litteral(input: &str) -> IResult<&str, String> { + delimited( + pair(opt(cfws), tag("[")), + inner_domain_litteral, + pair(tag("]"), opt(cfws)) + )(input) +} + +fn inner_domain_litteral(input: &str) -> IResult<&str, String> { + let (input, (cvec, maybe_wsp)) = pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?; + let mut domain = cvec.iter().fold( + String::with_capacity(16), + |mut acc, (maybe_wsp, c)| { + if let Some(wsp) = maybe_wsp { + acc.push(*wsp); + } + acc.push(*c); + acc + }); + if let Some(wsp) = maybe_wsp { + domain.push(wsp); + } + + Ok((input, domain)) +} + +/// Is domain text +/// +/// ```abnf +/// dtext = %d33-90 / ; Printable US-ASCII +/// %d94-126 / ; characters not including +/// obs-dtext ; "[", "]", or "\" +/// ``` +fn is_dtext(c: char) -> bool { + (c >= '\x21' && c <= '\x5A') || (c >= '\x5E' && c <= '\x7E') || !c.is_ascii() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_addr_spec() { + assert_eq!(addr_spec("alice@example.com"), Ok(("", AddrSpec{local_part: "alice".into(), domain: "example.com".into() }))); + + assert_eq!(addr_spec("jsmith@[192.168.2.1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "192.168.2.1".into() }))); + assert_eq!(addr_spec("jsmith@[IPv6:2001:db8::1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "IPv6:2001:db8::1".into() }))); + + // UTF-8 + assert_eq!(addr_spec("用户@例子.广告"), Ok(("", AddrSpec{local_part: "用户".into(), domain: "例子.广告".into()}))); + + // ASCII Edge cases + assert_eq!( + addr_spec("user+mailbox/department=shipping@example.com"), + Ok(("", AddrSpec{local_part: "user+mailbox/department=shipping".into(), domain: "example.com".into()}))); + assert_eq!( + addr_spec("!#$%&'*+-/=?^_`.{|}~@example.com"), + Ok(("", AddrSpec{local_part: "!#$%&'*+-/=?^_`.{|}~".into(), domain: "example.com".into()}))); + assert_eq!( + addr_spec(r#""Abc@def"@example.com"#), + Ok(("", AddrSpec{local_part: "Abc@def".into(), domain: "example.com".into()}))); + assert_eq!(addr_spec(r#""Fred\ Bloggs"@example.com"#), Ok(("", AddrSpec{local_part: "Fred Bloggs".into(), domain: "example.com".into()}))); + assert_eq!(addr_spec(r#""Joe.\\Blow"@example.com"#), Ok(("", AddrSpec{local_part: r#"Joe.\Blow"#.into(), domain: "example.com".into()}))); + } +} diff --git a/src/model.rs b/src/model.rs index b1d23c5..bafe16f 100644 --- a/src/model.rs +++ b/src/model.rs @@ -9,23 +9,42 @@ pub enum HeaderDate { None, } +#[derive(Debug, PartialEq)] +pub struct AddrSpec { + pub local_part: String, + pub domain: String, +} +impl AddrSpec { + pub fn fully_qualified(&self) -> String { + format!("{}@{}", self.local_part, self.domain) + } +} + #[derive(Debug)] -pub struct MailboxRef<'a> { +pub struct MailboxRef { // The actual "email address" like hello@example.com - pub addrspec: &'a str, - pub name: Option<&'a str>, + pub addrspec: AddrSpec, + pub name: Option, +} +impl From for MailboxRef { + fn from(addr: AddrSpec) -> Self { + MailboxRef { + name: None, + addrspec: addr, + } + } } #[derive(Debug)] -pub struct GroupRef<'a> { - pub name: &'a str, - pub mbx: Vec>, +pub struct GroupRef { + pub name: String, + pub mbx: Vec, } #[derive(Debug)] -pub enum AddressRef<'a> { - Single(MailboxRef<'a>), - Many(GroupRef<'a>), +pub enum AddressRef { + Single(MailboxRef), + Many(GroupRef), } /// Permissive Header Section @@ -37,9 +56,9 @@ pub enum AddressRef<'a> { #[derive(Debug, Default)] pub struct PermissiveHeaderSection<'a> { pub subject: Option, - pub from: Vec>, - pub sender: Option>, - pub reply_to: Vec>, + pub from: Vec, + pub sender: Option, + pub reply_to: Vec, pub date: HeaderDate, pub optional: HashMap<&'a str, String>, } diff --git a/src/words.rs b/src/words.rs index 9535471..c7d9dae 100644 --- a/src/words.rs +++ b/src/words.rs @@ -27,7 +27,7 @@ pub fn vchar_seq(input: &str) -> IResult<&str, &str> { /// Atom allowed characters fn is_atext(c: char) -> bool { - c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) + c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii() } /// Atom @@ -47,7 +47,7 @@ fn dot_atom_text(input: &str) -> IResult<&str, &str> { /// dot-atom /// /// `[CFWS] dot-atom-text [CFWS]` -fn dot_atom(input: &str) -> IResult<&str, &str> { +pub fn dot_atom(input: &str) -> IResult<&str, &str> { delimited(opt(cfws), dot_atom_text, opt(cfws))(input) } @@ -68,7 +68,7 @@ mod tests { assert!(is_atext('5')); assert!(is_atext('Q')); assert!(!is_atext(' ')); - assert!(!is_atext('É')); + assert!(is_atext('É')); // support utf8 } #[test]