From 486ccf2de0e509ed475dad1fbaf353eb52dae630 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Fri, 16 Jun 2023 09:58:07 +0200 Subject: [PATCH] drop my early implementation of trace --- README.md | 1 + src/common_fields.rs | 428 ----------------------------------------- src/datetime.rs | 13 ++ src/header.rs | 449 ++++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 8 +- src/model.rs | 36 +--- src/parse.rs | 13 +- src/trace.rs | 281 ++------------------------- 8 files changed, 494 insertions(+), 735 deletions(-) delete mode 100644 src/common_fields.rs create mode 100644 src/datetime.rs diff --git a/README.md b/README.md index c3636b2..6f4837b 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,4 @@ Targeted RFC |2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures | |2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples | |6532 | Internationalized Email Headers | +|9228 | Delivered-To Email Header Field | diff --git a/src/common_fields.rs b/src/common_fields.rs deleted file mode 100644 index 662d0b7..0000000 --- a/src/common_fields.rs +++ /dev/null @@ -1,428 +0,0 @@ -use chrono::DateTime; -use nom::{ - IResult, - branch::alt, - bytes::complete::take_while1, - bytes::complete::tag, - character::complete::space0, - combinator::opt, - multi::{many0, many1, fold_many0, separated_list1}, - sequence::{terminated, preceded, pair, tuple}, -}; - -use crate::whitespace::{fws, perm_crlf}; -use crate::words::vchar_seq; -use crate::misc_token::{phrase, unstructured}; -use crate::model::{CommonFields, HeaderDate, MailboxRef, AddressRef}; -use crate::mailbox::mailbox; -use crate::address::{mailbox_list, address_list, address_list_cfws}; -use crate::identification::msg_id; -use crate::model; - -/// HEADERS - -/// Header section -/// -/// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2 -pub fn section(input: &str) -> IResult<&str, CommonFields> { - let (input, headers) = fold_many0( - alt((header_field, unknown_field)), - CommonFields::default, - |mut section, head| { - match head { - //@FIXME min and max limits are not enforced, - // it may result in missing data or silently overriden data. - - // 3.6.1. The Origination Date Field - HeaderField::Date(d) => { - // | orig-date | 1 | 1 | | - section.date = d; - } - - // 3.6.2. Originator Fields - HeaderField::From(v) => { - // | from | 1 | 1 | See sender and 3.6.2 | - section.from = v; - } - HeaderField::Sender(mbx) => { - // | sender | 0* | 1 | MUST occur with multi-address from - see 3.6.2 | - section.sender = Some(mbx); - } - HeaderField::ReplyTo(addr_list) => { - // | reply-to | 0 | 1 | | - section.reply_to = addr_list; - } - - // 3.6.3. Destination Address Fields - HeaderField::To(addr_list) => { - // | to | 0 | 1 | | - section.to = addr_list; - } - HeaderField::Cc(addr_list) => { - // | cc | 0 | 1 | | - section.cc = addr_list; - } - HeaderField::Bcc(addr_list) => { - // | bcc | 0 | 1 | | - section.bcc = addr_list; - } - - // 3.6.4. Identification Fields - HeaderField::MessageID(msg_id) => { - // | message-id | 0* | 1 | SHOULD be present - see 3.6.4 | - section.msg_id = Some(msg_id); - } - HeaderField::InReplyTo(id_list) => { - // | in-reply-to | 0* | 1 | SHOULD occur in some replies - see 3.6.4 | - section.in_reply_to = id_list; - } - HeaderField::References(id_list) => { - // | in-reply-to | 0* | 1 | SHOULD occur in some replies - see 3.6.4 | - section.references = id_list; - } - - // 3.6.5. Informational Fields - HeaderField::Subject(title) => { - // | subject | 0 | 1 | | - section.subject = Some(title); - } - HeaderField::Comments(coms) => { - // | comments | 0 | unlimited | | - section.comments.push(coms); - } - HeaderField::Keywords(mut kws) => { - // | keywords | 0 | unlimited | | - section.keywords.append(&mut kws); - } - - // 3.6.8. Optional Fields - HeaderField::Optional(name, body) => { - section.optional.insert(name, body); - } - }; - section - } - )(input)?; - - let (input, _) = perm_crlf(input)?; - Ok((input, headers)) -} - -#[derive(Debug, PartialEq)] -pub enum HeaderField<'a> { - // 3.6.1. The Origination Date Field - Date(HeaderDate), - - // 3.6.2. Originator Fields - From(Vec), - Sender(MailboxRef), - ReplyTo(Vec), - - // 3.6.3. Destination Address Fields - To(Vec), - Cc(Vec), - Bcc(Vec), - - // 3.6.4. Identification Fields - MessageID(model::MessageId<'a>), - InReplyTo(Vec>), - References(Vec>), - - // 3.6.5. Informational Fields - Subject(String), - Comments(String), - Keywords(Vec), - - // 3.6.8. Optional Fields - Optional(&'a str, String) -} - -/// Parse one known header field -/// -/// RFC5322 optional-field seems to be a generalization of the field terminology. -/// We use it to parse all header names: -pub fn header_field(input: &str) -> IResult<&str, HeaderField> { - terminated( - alt(( - // 3.6.1. The Origination Date Field - date, - // 3.6.2. Originator Fields - alt((from, sender, reply_to)), - // 3.6.3. Destination Address Fields - alt((to, cc, bcc)), - // 3.6.4. Identification Fields - alt((msg_id_field, in_reply_to, references)), - // 3.6.5. Informational Fields - alt((subject, comments, keywords)), - )), - perm_crlf, - )(input) -} - -// 3.6.1. The Origination Date Field -fn date(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Date:"), space0), datetime)(input)?; - Ok((input, HeaderField::Date(body))) -} - -// 3.6.2. Originator Fields -fn from(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("From:"), space0), mailbox_list)(input)?; - Ok((input, HeaderField::From(body))) -} -fn sender(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Sender:"), space0), mailbox)(input)?; - Ok((input, HeaderField::Sender(body))) -} -fn reply_to(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Reply-To:"), space0), address_list)(input)?; - Ok((input, HeaderField::ReplyTo(body))) -} - -// 3.6.3. Destination Address Fields -fn to(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("To:"), space0), address_list)(input)?; - Ok((input, HeaderField::To(body))) -} -fn cc(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Cc:"), space0), address_list)(input)?; - Ok((input, HeaderField::Cc(body))) -} -fn bcc(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded( - pair(tag("Bcc:"), space0), - opt(alt((address_list, address_list_cfws))), - )(input)?; - - Ok((input, HeaderField::Bcc(body.unwrap_or(vec![])))) -} - -// 3.6.4. Identification Fields -fn msg_id_field(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Message-ID:"), space0), msg_id)(input)?; - Ok((input, HeaderField::MessageID(body))) -} -fn in_reply_to(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("In-Reply-To:"), space0), many1(msg_id))(input)?; - Ok((input, HeaderField::InReplyTo(body))) -} -fn references(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("References:"), space0), many1(msg_id))(input)?; - Ok((input, HeaderField::References(body))) -} - -// 3.6.5. Informational Fields -fn subject(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Subject:"), space0), unstructured)(input)?; - Ok((input, HeaderField::Subject(body))) -} -fn comments(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded(pair(tag("Comments:"), space0), unstructured)(input)?; - Ok((input, HeaderField::Comments(body))) -} -fn keywords(input: &str) -> IResult<&str, HeaderField> { - let (input, body) = preceded( - pair(tag("Keywords:"), space0), - separated_list1(tag(","), phrase), - )(input)?; - Ok((input, HeaderField::Keywords(body))) -} - - -/// Optional field -/// -/// ```abnf -/// field = field-name ":" unstructured CRLF -/// field-name = 1*ftext -/// ftext = %d33-57 / ; Printable US-ASCII -/// %d59-126 ; characters not including -/// ; ":". -/// ``` -fn unknown_field(input: &str) -> IResult<&str, HeaderField> { - // Extract field name - let (input, field_name) = field_name(input)?; - let (input, body) = unstructured(input)?; - Ok((input, HeaderField::Optional(field_name, body))) -} -pub fn field_name(input: &str) -> IResult<&str, &str> { - terminated( - take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'), - pair(tag(":"), space0) - )(input) -} - -pub fn datetime(input: &str) -> IResult<&str, HeaderDate> { - // @FIXME want to extract datetime our way in the future - // to better handle obsolete/bad cases instead of returning raw text. - let (input, raw_date) = unstructured(input)?; - match DateTime::parse_from_rfc2822(&raw_date) { - Ok(chronodt) => Ok((input, HeaderDate::Parsed(chronodt))), - Err(e) => Ok((input, HeaderDate::Unknown(raw_date, e))), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::model::{GroupRef, AddrSpec}; - - // 3.6.1. The Origination Date Field - #[test] - fn test_datetime() { - let datefield = "Date: Thu,\r\n 13\r\n Feb\r\n 1969\r\n 23:32\r\n -0330 (Newfoundland Time)\r\n"; - let (input, v) = header_field(datefield).unwrap(); - assert_eq!(input, ""); - match v { - HeaderField::Date(HeaderDate::Parsed(_)) => (), - _ => panic!("Date has not been parsed"), - }; - } - - // 3.6.2. Originator Fields - #[test] - fn test_from() { - assert_eq!( - header_field("From: \"Joe Q. Public\" \r\n"), - Ok(("", HeaderField::From(vec![MailboxRef { - name: Some("Joe Q. Public".into()), - addrspec: AddrSpec { - local_part: "john.q.public".into(), - domain: "example.com".into(), - } - }]))), - ); - } - #[test] - fn test_sender() { - assert_eq!( - header_field("Sender: Michael Jones \r\n"), - Ok(("", HeaderField::Sender(MailboxRef { - name: Some("Michael Jones".into()), - addrspec: AddrSpec { - local_part: "mjones".into(), - domain: "machine.example".into(), - }, - }))), - ); - } - #[test] - fn test_reply_to() { - assert_eq!( - header_field("Reply-To: \"Mary Smith: Personal Account\" \r\n"), - Ok(("", HeaderField::ReplyTo(vec![AddressRef::Single(MailboxRef { - name: Some("Mary Smith: Personal Account".into()), - addrspec: AddrSpec { - local_part: "smith".into(), - domain: "home.example".into(), - }, - })]))) - ); - } - - // 3.6.3. Destination Address Fields - #[test] - fn test_to() { - assert_eq!( - header_field("To: A Group:Ed Jones ,joe@where.test,John ;\r\n"), - Ok(("", HeaderField::To(vec![AddressRef::Many(GroupRef { - name: "A Group".into(), - participants: vec![ - MailboxRef { - name: Some("Ed Jones".into()), - addrspec: AddrSpec { local_part: "c".into(), domain: "a.test".into() }, - }, - MailboxRef { - name: None, - addrspec: AddrSpec { local_part: "joe".into(), domain: "where.test".into() }, - }, - MailboxRef { - name: Some("John".into()), - addrspec: AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() }, - }, - ] - })]))) - ); - } - #[test] - fn test_cc() { - assert_eq!( - header_field("Cc: Undisclosed recipients:;\r\n"), - Ok(("", HeaderField::Cc(vec![AddressRef::Many(GroupRef { - name: "Undisclosed recipients".into(), - participants: vec![], - })]))) - ); - } - #[test] - fn test_bcc() { - assert_eq!( - header_field("Bcc: (empty)\r\n"), - Ok(("", HeaderField::Bcc(vec![]))) - ); - assert_eq!( - header_field("Bcc: \r\n"), - Ok(("", HeaderField::Bcc(vec![]))) - ); - } - - - // 3.6.4. Identification Fields - #[test] - fn test_message_id() { - assert_eq!( - header_field("Message-ID: <310@[127.0.0.1]>\r\n"), - Ok(("", HeaderField::MessageID(model::MessageId { left: "310", right: "127.0.0.1" }))) - ); - } - #[test] - fn test_in_reply_to() { - assert_eq!( - header_field("In-Reply-To: \r\n"), - Ok(("", HeaderField::InReplyTo(vec![ - model::MessageId { left: "a", right: "b" }, - model::MessageId { left: "c", right: "example.com" }, - ]))) - ); - } - #[test] - fn test_references() { - assert_eq!( - header_field("References: <1234@local.machine.example> <3456@example.net>\r\n"), - Ok(("", HeaderField::References(vec![ - model::MessageId { left: "1234", right: "local.machine.example" }, - model::MessageId { left: "3456", right: "example.net" }, - ]))) - ); - } - - // 3.6.5. Informational Fields - #[test] - fn test_subject() { - assert_eq!( - header_field("Subject: Aérogramme\r\n"), - Ok(("", HeaderField::Subject("Aérogramme".into()))) - ); - } - #[test] - fn test_comments() { - assert_eq!( - header_field("Comments: 😛 easter egg!\r\n"), - Ok(("", HeaderField::Comments("😛 easter egg!".into()))) - ); - } - #[test] - fn test_keywords() { - assert_eq!( - header_field("Keywords: fantasque, farfelu, fanfreluche\r\n"), - Ok(("", HeaderField::Keywords(vec!["fantasque".into(), "farfelu".into(), "fanfreluche".into()]))) - ); - } - - // Test invalid field name - #[test] - fn test_invalid_field_name() { - assert!(header_field("Unknown: unknown\r\n").is_err()); - } -} - - diff --git a/src/datetime.rs b/src/datetime.rs new file mode 100644 index 0000000..199f7cd --- /dev/null +++ b/src/datetime.rs @@ -0,0 +1,13 @@ +use chrono::DateTime; +use nom::IResult; +use crate::{model,misc_token}; + +pub fn section(input: &str) -> IResult<&str, model::HeaderDate> { + // @FIXME want to extract datetime our way in the future + // to better handle obsolete/bad cases instead of returning raw text. + let (input, raw_date) = misc_token::unstructured(input)?; + match DateTime::parse_from_rfc2822(&raw_date) { + Ok(chronodt) => Ok((input, model::HeaderDate::Parsed(chronodt))), + Err(e) => Ok((input, model::HeaderDate::Unknown(raw_date, e))), + } +} diff --git a/src/header.rs b/src/header.rs index 0a3be52..20a7b4e 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,16 +1,451 @@ use nom::{ IResult, - multi::many0, + branch::alt, + bytes::complete::take_while1, + bytes::complete::tag, + character::complete::space0, + combinator::{map, opt}, + multi::{many0, many1, fold_many0, separated_list1}, + sequence::{terminated, preceded, pair, tuple}, }; -use crate::{common_fields, trace, whitespace, model}; +use crate::whitespace::{fws, perm_crlf}; +use crate::words::vchar_seq; +use crate::misc_token::{phrase, unstructured}; +use crate::model::{HeaderSection, HeaderDate, MailboxRef, AddressRef}; +use crate::mailbox::mailbox; +use crate::address::{mailbox_list, address_list, address_list_cfws}; +use crate::identification::msg_id; +use crate::{datetime, trace, model}; -pub fn section(input: &str) -> IResult<&str, model::HeaderSection> { - let (input, traces) = many0(trace::section)(input)?; - let (input, common) = common_fields::section(input)?; - let (input, _) = whitespace::perm_crlf(input)?; +/// HEADERS - Ok((input, model::HeaderSection { traces, common })) +/// Header section +/// +/// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2 +pub fn section(input: &str) -> IResult<&str, HeaderSection> { + let (input, headers) = fold_many0( + alt((header_field, unknown_field)), + HeaderSection::default, + |mut section, head| { + match head { + //@FIXME min and max limits are not enforced, + // it may result in missing data or silently overriden data. + + // 3.6.1. The Origination Date Field + HeaderField::Date(d) => { + // | orig-date | 1 | 1 | | + section.date = d; + } + + // 3.6.2. Originator Fields + HeaderField::From(v) => { + // | from | 1 | 1 | See sender and 3.6.2 | + section.from = v; + } + HeaderField::Sender(mbx) => { + // | sender | 0* | 1 | MUST occur with multi-address from - see 3.6.2 | + section.sender = Some(mbx); + } + HeaderField::ReplyTo(addr_list) => { + // | reply-to | 0 | 1 | | + section.reply_to = addr_list; + } + + // 3.6.3. Destination Address Fields + HeaderField::To(addr_list) => { + // | to | 0 | 1 | | + section.to = addr_list; + } + HeaderField::Cc(addr_list) => { + // | cc | 0 | 1 | | + section.cc = addr_list; + } + HeaderField::Bcc(addr_list) => { + // | bcc | 0 | 1 | | + section.bcc = addr_list; + } + + // 3.6.4. Identification Fields + HeaderField::MessageID(msg_id) => { + // | message-id | 0* | 1 | SHOULD be present - see 3.6.4 | + section.msg_id = Some(msg_id); + } + HeaderField::InReplyTo(id_list) => { + // | in-reply-to | 0* | 1 | SHOULD occur in some replies - see 3.6.4 | + section.in_reply_to = id_list; + } + HeaderField::References(id_list) => { + // | in-reply-to | 0* | 1 | SHOULD occur in some replies - see 3.6.4 | + section.references = id_list; + } + + // 3.6.5. Informational Fields + HeaderField::Subject(title) => { + // | subject | 0 | 1 | | + section.subject = Some(title); + } + HeaderField::Comments(coms) => { + // | comments | 0 | unlimited | | + section.comments.push(coms); + } + HeaderField::Keywords(mut kws) => { + // | keywords | 0 | unlimited | | + section.keywords.append(&mut kws); + } + + // 3.6.6 Resent Fields are not implemented + // 3.6.7 Trace Fields + HeaderField::ReturnPath(maybe_mbx) => { + if let Some(mbx) = maybe_mbx { + section.return_path.push(mbx); + } + } + HeaderField::Received(log) => { + section.received.push(log); + } + + // 3.6.8. Optional Fields + HeaderField::Optional(name, body) => { + section.optional.insert(name, body); + } + }; + section + } + )(input)?; + + let (input, _) = perm_crlf(input)?; + Ok((input, headers)) +} + +#[derive(Debug, PartialEq)] +pub enum HeaderField<'a> { + // 3.6.1. The Origination Date Field + Date(HeaderDate), + + // 3.6.2. Originator Fields + From(Vec), + Sender(MailboxRef), + ReplyTo(Vec), + + // 3.6.3. Destination Address Fields + To(Vec), + Cc(Vec), + Bcc(Vec), + + // 3.6.4. Identification Fields + MessageID(model::MessageId<'a>), + InReplyTo(Vec>), + References(Vec>), + + // 3.6.5. Informational Fields + Subject(String), + Comments(String), + Keywords(Vec), + + // 3.6.6 Resent Fields (not implemented) + // 3.6.7 Trace Fields + Received(&'a str), + ReturnPath(Option), + + // 3.6.8. Optional Fields + Optional(&'a str, String) +} + +/// Parse one known header field +/// +/// RFC5322 optional-field seems to be a generalization of the field terminology. +/// We use it to parse all header names: +pub fn header_field(input: &str) -> IResult<&str, HeaderField> { + terminated( + alt(( + // 3.6.1. The Origination Date Field + date, + // 3.6.2. Originator Fields + alt((from, sender, reply_to)), + // 3.6.3. Destination Address Fields + alt((to, cc, bcc)), + // 3.6.4. Identification Fields + alt((msg_id_field, in_reply_to, references)), + // 3.6.5. Informational Fields + alt((subject, comments, keywords)), + // 3.6.7 Trace field + alt((return_path, received)), + )), + perm_crlf, + )(input) +} + +// 3.6.1. The Origination Date Field +fn date(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Date:"), space0), datetime::section)(input)?; + Ok((input, HeaderField::Date(body))) +} + +// 3.6.2. Originator Fields +fn from(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("From:"), space0), mailbox_list)(input)?; + Ok((input, HeaderField::From(body))) +} +fn sender(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Sender:"), space0), mailbox)(input)?; + Ok((input, HeaderField::Sender(body))) +} +fn reply_to(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Reply-To:"), space0), address_list)(input)?; + Ok((input, HeaderField::ReplyTo(body))) +} + +// 3.6.3. Destination Address Fields +fn to(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("To:"), space0), address_list)(input)?; + Ok((input, HeaderField::To(body))) +} +fn cc(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Cc:"), space0), address_list)(input)?; + Ok((input, HeaderField::Cc(body))) +} +fn bcc(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded( + pair(tag("Bcc:"), space0), + opt(alt((address_list, address_list_cfws))), + )(input)?; + + Ok((input, HeaderField::Bcc(body.unwrap_or(vec![])))) +} + +// 3.6.4. Identification Fields +fn msg_id_field(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Message-ID:"), space0), msg_id)(input)?; + Ok((input, HeaderField::MessageID(body))) +} +fn in_reply_to(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("In-Reply-To:"), space0), many1(msg_id))(input)?; + Ok((input, HeaderField::InReplyTo(body))) +} +fn references(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("References:"), space0), many1(msg_id))(input)?; + Ok((input, HeaderField::References(body))) +} + +// 3.6.5. Informational Fields +fn subject(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Subject:"), space0), unstructured)(input)?; + Ok((input, HeaderField::Subject(body))) +} +fn comments(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded(pair(tag("Comments:"), space0), unstructured)(input)?; + Ok((input, HeaderField::Comments(body))) +} +fn keywords(input: &str) -> IResult<&str, HeaderField> { + let (input, body) = preceded( + pair(tag("Keywords:"), space0), + separated_list1(tag(","), phrase), + )(input)?; + Ok((input, HeaderField::Keywords(body))) +} + +// 3.6.6 Resent fields +// Not implemented + +// 3.6.7 Trace fields +fn return_path(input: &str) -> IResult<&str, HeaderField> { + map( + preceded(pair(tag("Return-Path:"), space0), trace::return_path_body), + |body| HeaderField::ReturnPath(body), + )(input) +} +fn received(input: &str) -> IResult<&str, HeaderField> { + map( + preceded(pair(tag("Received:"), space0), trace::received_body), + |body| HeaderField::Received(body), + )(input) +} + +/// Optional field +/// +/// ```abnf +/// field = field-name ":" unstructured CRLF +/// field-name = 1*ftext +/// ftext = %d33-57 / ; Printable US-ASCII +/// %d59-126 ; characters not including +/// ; ":". +/// ``` +fn unknown_field(input: &str) -> IResult<&str, HeaderField> { + // Extract field name + let (input, field_name) = field_name(input)?; + let (input, body) = unstructured(input)?; + Ok((input, HeaderField::Optional(field_name, body))) +} +pub fn field_name(input: &str) -> IResult<&str, &str> { + terminated( + take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'), + pair(tag(":"), space0) + )(input) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::model::{GroupRef, AddrSpec}; + + // 3.6.1. The Origination Date Field + #[test] + fn test_datetime() { + let datefield = "Date: Thu,\r\n 13\r\n Feb\r\n 1969\r\n 23:32\r\n -0330 (Newfoundland Time)\r\n"; + let (input, v) = header_field(datefield).unwrap(); + assert_eq!(input, ""); + match v { + HeaderField::Date(HeaderDate::Parsed(_)) => (), + _ => panic!("Date has not been parsed"), + }; + } + + // 3.6.2. Originator Fields + #[test] + fn test_from() { + assert_eq!( + header_field("From: \"Joe Q. Public\" \r\n"), + Ok(("", HeaderField::From(vec![MailboxRef { + name: Some("Joe Q. Public".into()), + addrspec: AddrSpec { + local_part: "john.q.public".into(), + domain: "example.com".into(), + } + }]))), + ); + } + #[test] + fn test_sender() { + assert_eq!( + header_field("Sender: Michael Jones \r\n"), + Ok(("", HeaderField::Sender(MailboxRef { + name: Some("Michael Jones".into()), + addrspec: AddrSpec { + local_part: "mjones".into(), + domain: "machine.example".into(), + }, + }))), + ); + } + #[test] + fn test_reply_to() { + assert_eq!( + header_field("Reply-To: \"Mary Smith: Personal Account\" \r\n"), + Ok(("", HeaderField::ReplyTo(vec![AddressRef::Single(MailboxRef { + name: Some("Mary Smith: Personal Account".into()), + addrspec: AddrSpec { + local_part: "smith".into(), + domain: "home.example".into(), + }, + })]))) + ); + } + + // 3.6.3. Destination Address Fields + #[test] + fn test_to() { + assert_eq!( + header_field("To: A Group:Ed Jones ,joe@where.test,John ;\r\n"), + Ok(("", HeaderField::To(vec![AddressRef::Many(GroupRef { + name: "A Group".into(), + participants: vec![ + MailboxRef { + name: Some("Ed Jones".into()), + addrspec: AddrSpec { local_part: "c".into(), domain: "a.test".into() }, + }, + MailboxRef { + name: None, + addrspec: AddrSpec { local_part: "joe".into(), domain: "where.test".into() }, + }, + MailboxRef { + name: Some("John".into()), + addrspec: AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() }, + }, + ] + })]))) + ); + } + #[test] + fn test_cc() { + assert_eq!( + header_field("Cc: Undisclosed recipients:;\r\n"), + Ok(("", HeaderField::Cc(vec![AddressRef::Many(GroupRef { + name: "Undisclosed recipients".into(), + participants: vec![], + })]))) + ); + } + #[test] + fn test_bcc() { + assert_eq!( + header_field("Bcc: (empty)\r\n"), + Ok(("", HeaderField::Bcc(vec![]))) + ); + assert_eq!( + header_field("Bcc: \r\n"), + Ok(("", HeaderField::Bcc(vec![]))) + ); + } + + + // 3.6.4. Identification Fields + #[test] + fn test_message_id() { + assert_eq!( + header_field("Message-ID: <310@[127.0.0.1]>\r\n"), + Ok(("", HeaderField::MessageID(model::MessageId { left: "310", right: "127.0.0.1" }))) + ); + } + #[test] + fn test_in_reply_to() { + assert_eq!( + header_field("In-Reply-To: \r\n"), + Ok(("", HeaderField::InReplyTo(vec![ + model::MessageId { left: "a", right: "b" }, + model::MessageId { left: "c", right: "example.com" }, + ]))) + ); + } + #[test] + fn test_references() { + assert_eq!( + header_field("References: <1234@local.machine.example> <3456@example.net>\r\n"), + Ok(("", HeaderField::References(vec![ + model::MessageId { left: "1234", right: "local.machine.example" }, + model::MessageId { left: "3456", right: "example.net" }, + ]))) + ); + } + + // 3.6.5. Informational Fields + #[test] + fn test_subject() { + assert_eq!( + header_field("Subject: Aérogramme\r\n"), + Ok(("", HeaderField::Subject("Aérogramme".into()))) + ); + } + #[test] + fn test_comments() { + assert_eq!( + header_field("Comments: 😛 easter egg!\r\n"), + Ok(("", HeaderField::Comments("😛 easter egg!".into()))) + ); + } + #[test] + fn test_keywords() { + assert_eq!( + header_field("Keywords: fantasque, farfelu, fanfreluche\r\n"), + Ok(("", HeaderField::Keywords(vec!["fantasque".into(), "farfelu".into(), "fanfreluche".into()]))) + ); + } + + // Test invalid field name + #[test] + fn test_invalid_field_name() { + assert!(header_field("Unknown: unknown\r\n").is_err()); + } } diff --git a/src/lib.rs b/src/lib.rs index ef83dc5..0d773c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,12 +11,8 @@ mod misc_token; mod mailbox; mod address; mod identification; +mod trace; +mod datetime; // Header blocks -pub mod common_fields; -pub mod trace; - -// Global mail pub mod header; - - diff --git a/src/model.rs b/src/model.rs index fcfbc24..a18c434 100644 --- a/src/model.rs +++ b/src/model.rs @@ -63,27 +63,6 @@ pub struct MessageId<'a> { pub right: &'a str, } -#[derive(Debug, PartialEq, Default)] -pub struct Trace<'a> { - // 3.6.7 Traces - pub received: Vec<&'a str>, - pub return_path: Option, - - // 3.6.6. Resent Fields - pub resent_date: HeaderDate, - pub resent_from: Vec, - pub resent_sender: Option, - pub resent_to: Vec, - pub resent_cc: Vec, - pub resent_bcc: Vec, - pub resent_msg_id: Option>, - - // 3.6.8. Optional Fields - pub optional: HashMap<&'a str, String>, - //pub failed: HashMap<&'a str, String>, - //pub garbage: &'a str, -} - /// Permissive Header Section /// /// This is a structure intended for parsing/decoding, @@ -91,7 +70,7 @@ pub struct Trace<'a> { /// as invalid according to RFC5322 but for which we can /// still extract some data. #[derive(Debug, Default)] -pub struct CommonFields<'a> { +pub struct HeaderSection<'a> { // 3.6.1. The Origination Date Field pub date: HeaderDate, @@ -115,16 +94,13 @@ pub struct CommonFields<'a> { pub comments: Vec, pub keywords: Vec, + // 3.6.6 Not implemented + // 3.6.7 Trace Fields + pub return_path: Vec, + pub received: Vec<&'a str>, + // 3.6.8. Optional Fields pub optional: HashMap<&'a str, String>, //pub failed: HashMap<&'a str, String>, //pub garbage: &'a str, } - -pub struct HeaderSection<'a> { - // 3.6.7 Traces - pub traces: Vec>, - - // 3.6.x - pub common: CommonFields<'a>, -} diff --git a/src/parse.rs b/src/parse.rs index 35b2aa2..72a6278 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,7 +1,14 @@ -use imf_codec::common_fields; +use imf_codec::header; fn main() { - let header = r#"Date: Fri, 21 Nov 1997 10:01:10 -0600 + let hdr = r#"Return-Path: +Delivered-To: quentin@dufour.io +Received: from smtp.deuxfleurs.fr ([10.83.2.2]) + by doradille with LMTP + id IKPyOvS8iGTxBAEAvTd7DQ + (envelope-from ) + for ; Tue, 13 Jun 2023 19:01:08 +0000 +Date: Fri, 21 Nov 1997 10:01:10 -0600 From: Mary Smith Sender: imf@example.com @@ -21,5 +28,5 @@ References: <1234@local.machine.example> This is a reply to your hello. "#; - println!("{:?}", common_fields::section(header)); + println!("{:?}", header::section(hdr)); } diff --git a/src/trace.rs b/src/trace.rs index d8f137f..c781fe2 100644 --- a/src/trace.rs +++ b/src/trace.rs @@ -4,116 +4,24 @@ use nom::{ branch::alt, bytes::complete::tag, character::complete::space0, - combinator::{map, not, opt, recognize}, - multi::{fold_many0, many0, many1}, - sequence::{delimited, preceded, terminated, pair, tuple}, + combinator::{map, opt, recognize}, + multi::many0, + sequence::{delimited, pair, tuple}, }; -use crate::{address, common_fields, identification, mailbox, model, misc_token, whitespace}; +use crate::{datetime, mailbox, model, misc_token, whitespace}; -enum RestField<'a> { - // 3.6.6. Resent Fields - ResentDate(model::HeaderDate), - ResentFrom(Vec), - ResentSender(model::MailboxRef), - ResentTo(Vec), - ResentCc(Vec), - ResentBcc(Vec), - ResentMessageID(model::MessageId<'a>), - - // 3.6.8. Optional fields - OptionalField(&'a str, String), -} - -enum PreludeField { - // 3.6.7. Trace Fields - ReturnPath(String), - Received(Vec), -} - -/// Section -/// -/// Optional fields are allowed everywhere in this implementation... -/// -/// ```abnf -///*(trace -/// *(optional-field / -/// resent-date / -/// resent-from / -/// resent-sender / -/// resent-to / -/// resent-cc / -/// resent-bcc / -/// resent-msg-id)) -/// ``` -pub fn section(input: &str) -> IResult<&str, model::Trace> { - let (input, (path, recv)) = prelude(input)?; - let (input, mut full_trace) = fold_many0( - alt((resent_field, unknown_field)), - model::Trace::default, - |mut trace, field| { - match field { - RestField::ResentDate(date) => { - trace.resent_date = date; - } - RestField::ResentFrom(from) => { - trace.resent_from = from; - } - RestField::ResentSender(sender) => { - trace.resent_sender = Some(sender); - } - RestField::ResentTo(to) => { - trace.resent_to = to; - } - RestField::ResentCc(cc) => { - trace.resent_cc = cc; - } - RestField::ResentBcc(bcc) => { - trace.resent_bcc = bcc; - } - RestField::ResentMessageID(mid) => { - trace.resent_msg_id = Some(mid); - } - RestField::OptionalField(name, body) => { - trace.optional.insert(name, body); - } - }; - trace - } - )(input)?; - full_trace.received = recv; - full_trace.return_path = path; - - Ok((input, full_trace)) -} - -/// Trace prelude -/// -/// ```abnf -/// trace = [return] -/// 1*received -/// return = "Return-Path:" path CRLF -/// path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) -/// received = "Received:" *received-token ";" date-time CRLF -/// received-token = word / angle-addr / addr-spec / domain -/// ``` -fn prelude(input: &str) -> IResult<&str, (Option, Vec<&str>)> { - let (input, (return_path, received)) = pair( - opt(return_path_field), - many1(received_field), - )(input)?; - - Ok((input, (return_path.flatten(), received))) -} - -fn return_path_field(input: &str) -> IResult<&str, Option> { - delimited( - pair(tag("Return-Path:"), space0), - path, - whitespace::perm_crlf, +pub fn received_body(input: &str) -> IResult<&str, &str> { + map( + tuple(( + recognize(many0(received_tokens)), + tag(";"), + datetime::section, + )), + |(tokens, _, _)| tokens, )(input) } -fn path(input: &str) -> IResult<&str, Option> { +pub fn return_path_body(input: &str) -> IResult<&str, Option> { alt(( map(mailbox::angle_addr, |a| Some(a)), empty_path @@ -131,18 +39,6 @@ fn empty_path(input: &str) -> IResult<&str, Option> { Ok((input, None)) } -fn received_field(input: &str) -> IResult<&str, &str> { - let (input, (_, tk, _, _, _)) = tuple(( - pair(tag("Received:"), space0), - recognize(many0(received_tokens)), - tag(";"), - common_fields::datetime, - whitespace::perm_crlf, - ))(input)?; - - Ok((input, tk)) -} - fn received_tokens(input: &str) -> IResult<&str, &str> { alt(( recognize(mailbox::angle_addr), @@ -152,73 +48,6 @@ fn received_tokens(input: &str) -> IResult<&str, &str> { ))(input) } -fn resent_field(input: &str) -> IResult<&str, RestField> { - terminated( - alt(( - resent_date, - resent_from, - resent_sender, - resent_to, - resent_cc, - resent_bcc, - resent_msg_id, - )), - whitespace::perm_crlf, - )(input) -} - -fn resent_date(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded(pair(tag("Resent-Date:"), space0), common_fields::datetime)(input)?; - Ok((input, RestField::ResentDate(body))) -} - -fn resent_from(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded(pair(tag("Resent-From:"), space0), address::mailbox_list)(input)?; - Ok((input, RestField::ResentFrom(body))) -} - -fn resent_sender(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded(pair(tag("Resent-Sender:"), space0), mailbox::mailbox)(input)?; - Ok((input, RestField::ResentSender(body))) -} - -fn resent_to(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded(pair(tag("Resent-To:"), space0), address::address_list)(input)?; - Ok((input, RestField::ResentTo(body))) -} - -fn resent_cc(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded(pair(tag("Resent-Cc:"), space0), address::address_list)(input)?; - Ok((input, RestField::ResentCc(body))) -} - -fn resent_bcc(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded( - pair(tag("Resent-Bcc:"), space0), - opt(alt((address::address_list, address::address_list_cfws))), - )(input)?; - - Ok((input, RestField::ResentBcc(body.unwrap_or(vec![])))) -} - -fn resent_msg_id(input: &str) -> IResult<&str, RestField> { - let (input, body) = preceded(pair(tag("Resent-Message-ID:"), space0), identification::msg_id)(input)?; - Ok((input, RestField::ResentMessageID(body))) -} - -fn unknown_field(input: &str) -> IResult<&str, RestField> { - // Check that we: - // 1. do not start a new trace - // 2. do not start the common fields - not(prelude)(input)?; - not(common_fields::header_field)(input)?; - - // Extract field name - let (input, field_name) = common_fields::field_name(input)?; - let (input, body) = misc_token::unstructured(input)?; - let (input, _) = whitespace::perm_crlf(input)?; - Ok((input, RestField::OptionalField(field_name, body))) -} #[cfg(test)] mod tests { @@ -226,90 +55,20 @@ mod tests { use chrono::{FixedOffset, TimeZone}; #[test] - fn test_section() { - let hdrs = r#"Return-Path: -Received: from smtp.example.com ([10.83.2.2]) + fn test_received() { + let hdrs = r#"from smtp.example.com ([10.83.2.2]) by server with LMTP id xxxxxxxxx (envelope-from ) - for ; Tue, 13 Jun 2023 19:01:08 +0000 -Resent-Date: Tue, 13 Jun 2023 21:01:07 +0200 -Resent-From: -Resent-Sender: you@example.com -X-Specific: XOXO -Resent-To: Annah -Resent-Cc: Empty:; -Resent-Bcc: -Resent-Message-ID: -"#; + for ; Tue, 13 Jun 2023 19:01:08 +0000"#; + assert_eq!( - section(hdrs), - Ok(("", model::Trace { - return_path: Some(model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "gitlab".into(), - domain: "example.com".into(), - } - }), - received: vec![ - r#"from smtp.example.com ([10.83.2.2]) + received_body(hdrs), + Ok(("", r#"from smtp.example.com ([10.83.2.2]) by server with LMTP id xxxxxxxxx (envelope-from ) - for "#, - ], - - resent_date: model::HeaderDate::Parsed( - FixedOffset::east_opt(2 * 3600) - .unwrap() - .with_ymd_and_hms(2023, 06, 13, 21, 1, 7) - .unwrap()), - - resent_from: vec![ - model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "you".into(), - domain: "example.com".into(), - } - } - ], - - resent_sender: Some(model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "you".into(), - domain: "example.com".into(), - } - }), - - resent_to: vec![ - model::AddressRef::Single(model::MailboxRef { - name: Some("Annah".into()), - addrspec: model::AddrSpec { - local_part: "annah".into(), - domain: "example.com".into(), - } - }) - ], - - resent_cc: vec![ - model::AddressRef::Many(model::GroupRef { - name: "Empty".into(), - participants: vec![], - }) - ], - - resent_bcc: vec![], - - resent_msg_id: Some(model::MessageId { - left: "note_1985938", - right: "example.com", - }), - - optional: HashMap::from([("X-Specific", "XOXO".into())]), - })) + for "#)) ); } }