From ba59b037ef94ce77bc1d46bcab87dee95d6e5f7a Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 11:35:29 +0200 Subject: [PATCH 1/8] add an header kv function --- src/header.rs | 58 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/src/header.rs b/src/header.rs index 213dcbb..084491b 100644 --- a/src/header.rs +++ b/src/header.rs @@ -5,8 +5,8 @@ use nom::{ branch::alt, bytes::complete::{tag, tag_no_case, take_while1}, character::complete::space0, - combinator::map, - multi::{fold_many0}, + combinator::{into, map}, + multi::{fold_many0, many0}, sequence::{pair, terminated, tuple}, IResult, }; @@ -20,6 +20,40 @@ pub enum CompField<'a, T> { #[derive(Debug, PartialEq, Clone)] pub struct Kv<'a>(pub &'a [u8], pub Unstructured<'a>); +impl<'a> From<(&'a [u8], Unstructured<'a>)> for Kv<'a> { + fn from(pair: (&'a [u8], Unstructured<'a>)) -> Self { + Self(pair.0, pair.1) + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Field<'a> { + Good(Kv<'a>), + Bad(&'a [u8]), +} +impl<'a> From> for Field<'a> { + fn from(kv: Kv<'a>) -> Self { + Self::Good(kv) + } +} +impl<'a> From<&'a [u8]> for Field<'a> { + fn from(bad: &'a [u8]) -> Self { + Self::Bad(bad) + } +} + +/// Parse headers as key/values +pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec> { + terminated( + many0( + alt(( + into(opt_field), + into(foldable_line), + )) + ), + obs_crlf + )(input) +} pub fn header<'a, T>( @@ -30,7 +64,7 @@ pub fn header<'a, T>( fold_many0( alt(( map(fx, CompField::Known), - map(opt_field, |(k, v)| CompField::Unknown(Kv(k, v))), + map(opt_field, CompField::Unknown), map(foldable_line, CompField::Bad), )), || (Vec::::new(), Vec::::new(), Vec::<&'a [u8]>::new()), @@ -52,6 +86,13 @@ pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [ move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input) } +pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { + terminated( + take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A), + tuple((space0, tag(b":"), space0)), + )(input) +} + /// Optional field /// /// ```abnf @@ -61,15 +102,12 @@ pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [ /// %d59-126 ; characters not including /// ; ":". /// ``` -pub fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> { +pub fn opt_field(input: &[u8]) -> IResult<&[u8], Kv> { terminated( - pair( - terminated( - take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A), - tuple((space0, tag(b":"), space0)), - ), + into(pair( + field_any, unstructured, - ), + )), obs_crlf, )(input) } -- 2.45.2 From dfb5b9fe0f7349dbb53368b52f077f4eb3e62e26 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 11:35:46 +0200 Subject: [PATCH 2/8] refactor imf parsing --- src/imf/field.rs | 67 +++------------------------------------------ src/imf/mod.rs | 70 +++++++++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 2 +- 3 files changed, 74 insertions(+), 65 deletions(-) diff --git a/src/imf/field.rs b/src/imf/field.rs index c9ea2bd..976bf4f 100644 --- a/src/imf/field.rs +++ b/src/imf/field.rs @@ -6,14 +6,13 @@ use nom::{ IResult, }; -use crate::header::{field_name, header}; +use crate::header::{field_name}; use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList}; use crate::imf::datetime::section as date; use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList}; use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef}; use crate::imf::mime::{version, Version}; use crate::imf::trace::{received_log, return_path, ReceivedLog}; -use crate::imf::Imf; use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured}; use crate::text::whitespace::obs_crlf; @@ -49,6 +48,9 @@ pub enum Field<'a> { MIMEVersion(Version), } +/*impl<'a> From> for Field<'a> { + fn from(raw: header::Field +}*/ pub fn field(input: &[u8]) -> IResult<&[u8], Field> { terminated( @@ -79,64 +81,3 @@ pub fn field(input: &[u8]) -> IResult<&[u8], Field> { obs_crlf, )(input) } - -pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> { - map(header(field), |(known, unknown, bad)| { - let mut imf = Imf::from_iter(known); - imf.header_ext = unknown; - imf.header_bad = bad; - imf - })(input) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::imf::address::*; - use crate::imf::mailbox::*; - use crate::text::misc_token::*; - use chrono::{FixedOffset, TimeZone}; - - #[test] - fn test_header() { - let fullmail = b"Date: 7 Mar 2023 08:00:00 +0200 -From: someone@example.com -To: someone_else@example.com -Subject: An RFC 822 formatted message - -This is the plain text body of the message. Note the blank line -between the header information and the body of the message."; - - assert_eq!( - imf(fullmail), - Ok(( - &b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..], - Imf { - date: Some(FixedOffset::east_opt(2 * 3600).unwrap().with_ymd_and_hms(2023, 3, 7, 8, 0, 0).unwrap()), - from: vec![MailboxRef { - name: None, - addrspec: AddrSpec { - local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone"[..]))]), - domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), - } - }], - to: vec![AddressRef::Single(MailboxRef { - name: None, - addrspec: AddrSpec { - local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone_else"[..]))]), - domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), - } - })], - subject: Some(Unstructured(vec![ - UnstrToken::Plain(&b"An"[..]), - UnstrToken::Plain(&b"RFC"[..]), - UnstrToken::Plain(&b"822"[..]), - UnstrToken::Plain(&b"formatted"[..]), - UnstrToken::Plain(&b"message"[..]), - ])), - ..Imf::default() - } - )), - ) - } -} diff --git a/src/imf/mod.rs b/src/imf/mod.rs index 7817ac7..51f1268 100644 --- a/src/imf/mod.rs +++ b/src/imf/mod.rs @@ -8,8 +8,14 @@ pub mod mailbox; pub mod mime; pub mod trace; +use nom::{ + combinator::map, + IResult, +}; + +use crate::header::header; use crate::imf::address::AddressRef; -use crate::imf::field::Field; +use crate::imf::field::{field, Field}; use crate::imf::identification::MessageID; use crate::imf::mailbox::{AddrSpec, MailboxRef}; use crate::imf::mime::Version; @@ -92,3 +98,65 @@ impl<'a> FromIterator> for Imf<'a> { }) } } + +pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> { + map(header(field), |(known, unknown, bad)| { + let mut imf = Imf::from_iter(known); + imf.header_ext = unknown; + imf.header_bad = bad; + imf + })(input) +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::imf::address::*; + use crate::imf::mailbox::*; + use crate::text::misc_token::*; + use chrono::{FixedOffset, TimeZone}; + + #[test] + fn test_header() { + let fullmail = b"Date: 7 Mar 2023 08:00:00 +0200 +From: someone@example.com +To: someone_else@example.com +Subject: An RFC 822 formatted message + +This is the plain text body of the message. Note the blank line +between the header information and the body of the message."; + + assert_eq!( + imf(fullmail), + Ok(( + &b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..], + Imf { + date: Some(FixedOffset::east_opt(2 * 3600).unwrap().with_ymd_and_hms(2023, 3, 7, 8, 0, 0).unwrap()), + from: vec![MailboxRef { + name: None, + addrspec: AddrSpec { + local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone"[..]))]), + domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), + } + }], + to: vec![AddressRef::Single(MailboxRef { + name: None, + addrspec: AddrSpec { + local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone_else"[..]))]), + domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), + } + })], + subject: Some(Unstructured(vec![ + UnstrToken::Plain(&b"An"[..]), + UnstrToken::Plain(&b"RFC"[..]), + UnstrToken::Plain(&b"822"[..]), + UnstrToken::Plain(&b"formatted"[..]), + UnstrToken::Plain(&b"message"[..]), + ])), + ..Imf::default() + } + )), + ) + } +} diff --git a/src/lib.rs b/src/lib.rs index b133b9e..edb327a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,5 +98,5 @@ pub fn parse_message(input: &[u8]) -> IResult<&[u8], part::composite::Message> { /// ); /// ``` pub fn parse_imf(input: &[u8]) -> IResult<&[u8], imf::Imf> { - imf::field::imf(input) + imf::imf(input) } -- 2.45.2 From 628fbc507dafd666f966185ce22fd12d5c21151c Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 11:46:23 +0200 Subject: [PATCH 3/8] fix raw mime test 1/2 --- src/part/composite.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/part/composite.rs b/src/part/composite.rs index dc2ac68..49201c1 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -229,7 +229,10 @@ It DOES end with a linebreak. subtype: mime::r#type::TextSubtype::Plain, charset: mime::r#type::Deductible::Inferred(mime::charset::EmailCharset::US_ASCII), }), - fields: mime::NaiveMIME::default(), + fields: mime::NaiveMIME { + raw: &b"\n"[..], + ..mime::NaiveMIME::default() + }, }, body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..], }), @@ -250,6 +253,7 @@ It DOES end with a linebreak. } ] }), + raw: &b"Content-type: text/plain; charset=us-ascii\n\n"[..], ..mime::NaiveMIME::default() }, }, -- 2.45.2 From d9cf6b225d6b5456cdd4f18245b828b278cdf40e Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 13:31:24 +0200 Subject: [PATCH 4/8] fix raw mime test 2/2 --- src/part/composite.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/part/composite.rs b/src/part/composite.rs index 49201c1..373ffd8 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -487,6 +487,7 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
} ] }), + raw: hdrs, ..mime::NaiveMIME::default() }, }, @@ -511,6 +512,7 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
] }), transfer_encoding: mime::mechanism::Mechanism::QuotedPrintable, + raw: &b"Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n"[..], ..mime::NaiveMIME::default() } }, @@ -534,6 +536,7 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
} ] }), + raw: &b"Content-Type: text/html; charset=us-ascii\n\n"[..], ..mime::NaiveMIME::default() }, }, -- 2.45.2 From 18bb04340acd84da55d188001f03c354a5ed2bfe Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 19:00:08 +0200 Subject: [PATCH 5/8] refactor headers --- src/header.rs | 66 +++++++++-------------------------------- src/imf/field.rs | 68 ++++++++++++++++++------------------------- src/imf/mod.rs | 25 +++------------- src/mime/field.rs | 55 +++++++++++++++------------------- src/mime/mod.rs | 10 ++----- src/part/composite.rs | 43 ++++++++++++--------------- src/part/field.rs | 61 ++++++++++---------------------------- 7 files changed, 107 insertions(+), 221 deletions(-) diff --git a/src/header.rs b/src/header.rs index 084491b..88bbde8 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,38 +1,31 @@ - -use crate::text::misc_token::{unstructured, Unstructured}; -use crate::text::whitespace::{foldable_line, obs_crlf}; use nom::{ branch::alt, - bytes::complete::{tag, tag_no_case, take_while1}, + bytes::complete::{tag, take_while1}, character::complete::space0, - combinator::{into, map}, - multi::{fold_many0, many0}, + combinator::{into, recognize}, + multi::many0, sequence::{pair, terminated, tuple}, IResult, }; -#[derive(Debug, PartialEq)] -pub enum CompField<'a, T> { - Known(T), - Unknown(Kv<'a>), - Bad(&'a [u8]), -} +use crate::text::whitespace::{foldable_line, obs_crlf}; +use crate::text::misc_token::unstructured; #[derive(Debug, PartialEq, Clone)] -pub struct Kv<'a>(pub &'a [u8], pub Unstructured<'a>); -impl<'a> From<(&'a [u8], Unstructured<'a>)> for Kv<'a> { - fn from(pair: (&'a [u8], Unstructured<'a>)) -> Self { +pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]); +impl<'a> From<(&'a [u8], &'a [u8])> for Kv2<'a> { + fn from(pair: (&'a [u8], &'a [u8])) -> Self { Self(pair.0, pair.1) } } #[derive(Debug, PartialEq, Clone)] pub enum Field<'a> { - Good(Kv<'a>), + Good(Kv2<'a>), Bad(&'a [u8]), } -impl<'a> From> for Field<'a> { - fn from(kv: Kv<'a>) -> Self { +impl<'a> From> for Field<'a> { + fn from(kv: Kv2<'a>) -> Self { Self::Good(kv) } } @@ -47,7 +40,7 @@ pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec> { terminated( many0( alt(( - into(opt_field), + into(correct_field), into(foldable_line), )) ), @@ -55,37 +48,6 @@ pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec> { )(input) } - -pub fn header<'a, T>( - fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy, -) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], (Vec::, Vec::, Vec<&'a [u8]>)> { - move |input| { - terminated( - fold_many0( - alt(( - map(fx, CompField::Known), - map(opt_field, CompField::Unknown), - map(foldable_line, CompField::Bad), - )), - || (Vec::::new(), Vec::::new(), Vec::<&'a [u8]>::new()), - |(mut known, mut unknown, mut bad), item| { - match item { - CompField::Known(v) => known.push(v), - CompField::Unknown(v) => unknown.push(v), - CompField::Bad(v) => bad.push(v), - }; - (known, unknown, bad) - } - ), - obs_crlf, - )(input) - } -} - -pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { - move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input) -} - pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { terminated( take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A), @@ -102,11 +64,11 @@ pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { /// %d59-126 ; characters not including /// ; ":". /// ``` -pub fn opt_field(input: &[u8]) -> IResult<&[u8], Kv> { +pub fn correct_field(input: &[u8]) -> IResult<&[u8], Kv2> { terminated( into(pair( field_any, - unstructured, + recognize(unstructured), )), obs_crlf, )(input) diff --git a/src/imf/field.rs b/src/imf/field.rs index 976bf4f..17a48f9 100644 --- a/src/imf/field.rs +++ b/src/imf/field.rs @@ -1,12 +1,7 @@ use chrono::{DateTime, FixedOffset}; -use nom::{ - branch::alt, - combinator::map, - sequence::{preceded, terminated}, - IResult, -}; +use nom::combinator::map; -use crate::header::{field_name}; +use crate::header; use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList}; use crate::imf::datetime::section as date; use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList}; @@ -14,7 +9,6 @@ use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef}; use crate::imf::mime::{version, Version}; use crate::imf::trace::{received_log, return_path, ReceivedLog}; use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured}; -use crate::text::whitespace::obs_crlf; #[derive(Debug, PartialEq)] pub enum Field<'a> { @@ -48,36 +42,32 @@ pub enum Field<'a> { MIMEVersion(Version), } -/*impl<'a> From> for Field<'a> { - fn from(raw: header::Field -}*/ +impl<'a> TryFrom<&header::Field<'a>> for Field<'a> { + type Error = (); + fn try_from(f: &header::Field<'a>) -> Result { + let content = match f { + header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() { + b"date" => map(date, Field::Date)(value), + b"from" => map(mailbox_list, Field::From)(value), + b"sender" => map(mailbox, Field::Sender)(value), + b"reply-to" => map(address_list, Field::ReplyTo)(value), + b"to" => map(address_list, Field::To)(value), + b"cc" => map(address_list, Field::Cc)(value), + b"bcc" => map(nullable_address_list, Field::Bcc)(value), + b"message-id" => map(msg_id, Field::MessageID)(value), + b"in-reply-to" => map(msg_list, Field::InReplyTo)(value), + b"references" => map(msg_list, Field::References)(value), + b"subject" => map(unstructured, Field::Subject)(value), + b"comments" => map(unstructured, Field::Comments)(value), + b"keywords" => map(phrase_list, Field::Keywords)(value), + b"return-path" => map(return_path, Field::ReturnPath)(value), + b"received" => map(received_log, Field::Received)(value), + b"mime-version" => map(version, Field::MIMEVersion)(value), + _ => return Err(()), + }, + _ => return Err(()), + }; -pub fn field(input: &[u8]) -> IResult<&[u8], Field> { - terminated( - alt(( - preceded(field_name(b"date"), map(date, Field::Date)), - preceded(field_name(b"from"), map(mailbox_list, Field::From)), - preceded(field_name(b"sender"), map(mailbox, Field::Sender)), - preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)), - preceded(field_name(b"to"), map(address_list, Field::To)), - preceded(field_name(b"cc"), map(address_list, Field::Cc)), - preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)), - preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)), - preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)), - preceded(field_name(b"references"), map(msg_list, Field::References)), - preceded(field_name(b"subject"), map(unstructured, Field::Subject)), - preceded(field_name(b"comments"), map(unstructured, Field::Comments)), - preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)), - preceded( - field_name(b"return-path"), - map(return_path, Field::ReturnPath), - ), - preceded(field_name(b"received"), map(received_log, Field::Received)), - preceded( - field_name(b"mime-version"), - map(version, Field::MIMEVersion), - ), - )), - obs_crlf, - )(input) + content.map(|(_, content)| content).or(Err(())) + } } diff --git a/src/imf/mod.rs b/src/imf/mod.rs index 51f1268..bd7e24a 100644 --- a/src/imf/mod.rs +++ b/src/imf/mod.rs @@ -13,14 +13,13 @@ use nom::{ IResult, }; -use crate::header::header; +use crate::header; use crate::imf::address::AddressRef; -use crate::imf::field::{field, Field}; +use crate::imf::field::Field; use crate::imf::identification::MessageID; use crate::imf::mailbox::{AddrSpec, MailboxRef}; use crate::imf::mime::Version; use crate::imf::trace::ReceivedLog; -use crate::header; use crate::text::misc_token::{PhraseList, Unstructured}; use chrono::{DateTime, FixedOffset}; @@ -56,19 +55,6 @@ pub struct Imf<'a> { // MIME pub mime_version: Option, - - // Junk - pub header_ext: Vec>, - pub header_bad: Vec<&'a [u8]>, -} - -impl<'a> Imf<'a> { - pub fn with_opt(mut self, opt: Vec>) -> Self { - self.header_ext = opt; self - } - pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self { - self.header_bad = bad; self - } } //@FIXME min and max limits are not enforced, @@ -100,11 +86,8 @@ impl<'a> FromIterator> for Imf<'a> { } pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> { - map(header(field), |(known, unknown, bad)| { - let mut imf = Imf::from_iter(known); - imf.header_ext = unknown; - imf.header_bad = bad; - imf + map(header::header_kv, |fields| { + fields.iter().flat_map(Field::try_from).into_iter().collect::() })(input) } diff --git a/src/mime/field.rs b/src/mime/field.rs index b11210d..2489516 100644 --- a/src/mime/field.rs +++ b/src/mime/field.rs @@ -1,16 +1,10 @@ -use nom::{ - branch::alt, - combinator::map, - sequence::{preceded, terminated}, - IResult, -}; +use nom::combinator::map; -use crate::header::{field_name}; +use crate::header; use crate::imf::identification::{msg_id, MessageID}; use crate::mime::mechanism::{mechanism, Mechanism}; use crate::mime::r#type::{naive_type, NaiveType}; use crate::text::misc_token::{unstructured, Unstructured}; -use crate::text::whitespace::obs_crlf; #[derive(Debug, PartialEq)] pub enum Content<'a> { @@ -47,38 +41,35 @@ impl<'a> Content<'a> { } } -/* -pub fn to_mime<'a, T: WithDefaultType>(list: Vec>) -> AnyMIMEWithDefault<'a, T> { - list.into_iter().collect::>() -}*/ +impl<'a> TryFrom<&header::Field<'a>> for Content<'a> { + type Error = (); + fn try_from(f: &header::Field<'a>) -> Result { + let content = match f { + header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() { + b"content-type" => map(naive_type, Content::Type)(value), + b"content-transfer-encoding" => map(mechanism, Content::TransferEncoding)(value), + b"content-id" => map(msg_id, Content::ID)(value), + b"content-description" => map(unstructured, Content::Description)(value), + _ => return Err(()), + }, + _ => return Err(()), + }; -pub fn content(input: &[u8]) -> IResult<&[u8], Content> { - terminated( - alt(( - preceded(field_name(b"content-type"), map(naive_type, Content::Type)), - preceded( - field_name(b"content-transfer-encoding"), - map(mechanism, Content::TransferEncoding), - ), - preceded(field_name(b"content-id"), map(msg_id, Content::ID)), - preceded( - field_name(b"content-description"), - map(unstructured, Content::Description), - ), - )), - obs_crlf, - )(input) + //@TODO check that the full value is parsed, otherwise maybe log an error ?! + content.map(|(_, content)| content).or(Err(())) + } } #[cfg(test)] mod tests { use super::*; - use crate::header::{header}; - use crate::mime::charset::EmailCharset; + use crate::header; + //use crate::mime::charset::EmailCharset; use crate::mime::r#type::*; use crate::text::misc_token::MIMEWord; use crate::text::quoted::QuotedString; + /* #[test] fn test_content_type() { let (rest, content) = @@ -96,7 +87,7 @@ mod tests { } else { panic!("Expected Content::Type, got {:?}", content); } - } + }*/ #[test] fn test_header() { @@ -116,7 +107,7 @@ This is a multipart message. .as_bytes(); assert_eq!( - map(header(content), |(k, _, _)| k)(fullmail), + map(header::header_kv, |k| k.iter().flat_map(Content::try_from).collect())(fullmail), Ok(( &b"This is a multipart message.\n\n"[..], vec![ diff --git a/src/mime/mod.rs b/src/mime/mod.rs index 2dd0c21..b7de69d 100644 --- a/src/mime/mod.rs +++ b/src/mime/mod.rs @@ -61,8 +61,7 @@ pub struct NaiveMIME<'a> { pub transfer_encoding: Mechanism<'a>, pub id: Option>, pub description: Option>, - pub header_ext: Vec>, - pub header_bad: Vec<&'a [u8]>, + pub fields: Vec>, pub raw: &'a [u8], } @@ -84,11 +83,8 @@ impl<'a> FromIterator> for NaiveMIME<'a> { } impl<'a> NaiveMIME<'a> { - pub fn with_opt(mut self, opt: Vec>) -> Self { - self.header_ext = opt; self - } - pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self { - self.header_bad = bad; self + pub fn with_fields(mut self, fields: Vec>) -> Self { + self.fields = fields; self } pub fn with_raw(mut self, raw: &'a [u8]) -> Self { self.raw = raw; self diff --git a/src/part/composite.rs b/src/part/composite.rs index 373ffd8..bd7231b 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -1,9 +1,9 @@ use nom::IResult; -use crate::header::{header, self}; +use crate::header; use crate::imf; use crate::mime; -use crate::part::{self, AnyPart, field::MixedField}; +use crate::part::{self, AnyPart}; use crate::text::boundary::{boundary, Delimiter}; use crate::pointers; @@ -73,15 +73,19 @@ pub fn multipart<'a>( }; // parse mime headers, otherwise pick default mime - let (input, naive_mime) = match header(mime::field::content)(input) { - Ok((input_eom, (known, unknown, bad))) => { + let (input, naive_mime) = match header::header_kv(input) { + Ok((input_eom, fields)) => { let raw_hdrs = pointers::parsed(input, input_eom); - let mime = known + let mime = fields + .iter() + .flat_map(mime::field::Content::try_from) .into_iter() - .collect::() - .with_opt(unknown) - .with_bad(bad) + .collect::(); + + let mime = mime + .with_fields(fields) .with_raw(raw_hdrs); + (input_eom, mime) }, Err(_) => (input, mime::NaiveMIME::default()), @@ -127,23 +131,21 @@ pub fn message<'a>( let orig = input; // parse header fields - let (input, (known, unknown, bad)): (_, (Vec::, Vec, Vec<&[u8]>)) = - header(part::field::mixed_field)(input)?; + let (input, headers) = header::header_kv(input)?; // extract raw parts 1/2 let raw_headers = pointers::parsed(orig, input); let body_orig = input; + //--------------- // aggregate header fields - let (naive_mime, imf) = part::field::sections(known); - - // attach bad headers to imf - let imf = imf.with_opt(unknown).with_bad(bad); + let (naive_mime, imf) = part::field::split_and_build(&headers); // interpret headers to choose a mime type - let in_mime = naive_mime.with_raw(raw_headers).to_interpreted::().into(); + let in_mime = naive_mime.with_fields(headers).with_raw(raw_headers).to_interpreted::().into(); + //--------------- - // parse this mimetype + // parse a part following this mime specification let (input, part) = part::anypart(in_mime)(input)?; // extract raw parts 2/2 @@ -459,15 +461,6 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
right: &b"www.grrrndzero.org"[..], }), mime_version: Some(imf::mime::Version { major: 1, minor: 0}), - header_ext: vec![ - header::Kv(&b"X-Unknown"[..], Unstructured(vec![ - UnstrToken::Plain(&b"something"[..]), - UnstrToken::Plain(&b"something"[..]), - ])) - ], - header_bad: vec![ - &b"Bad entry\n on multiple lines\n"[..], - ], ..imf::Imf::default() }, child: Box::new(AnyPart::Mult(Multipart { diff --git a/src/part/field.rs b/src/part/field.rs index 3f4a6dc..2717efd 100644 --- a/src/part/field.rs +++ b/src/part/field.rs @@ -1,50 +1,21 @@ -use nom::{branch::alt, combinator::map, IResult}; - +use crate::header; use crate::imf; use crate::mime; -pub enum MixedField<'a> { - MIME(mime::field::Content<'a>), - IMF(imf::field::Field<'a>), -} -#[allow(dead_code)] -impl<'a> MixedField<'a> { - pub fn mime(&self) -> Option<&mime::field::Content<'a>> { - match self { - Self::MIME(v) => Some(v), - _ => None, +pub fn split_and_build<'a>(v: &Vec>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) { + let (mimev, imfv) = v.iter().fold( + (Vec::::new(), Vec::::new()), + |(mut mime, mut imf), f| { + if let Ok(m) = mime::field::Content::try_from(f) { + mime.push(m); + } else if let Ok(i) = imf::field::Field::try_from(f) { + imf.push(i); + } + (mime, imf) } - } - pub fn to_mime(self) -> Option> { - match self { - Self::MIME(v) => Some(v), - _ => None, - } - } - pub fn imf(&self) -> Option<&imf::field::Field<'a>> { - match self { - Self::IMF(v) => Some(v), - _ => None, - } - } - pub fn to_imf(self) -> Option> { - match self { - Self::IMF(v) => Some(v), - _ => None, - } - } -} - -pub fn sections<'a>(list: Vec>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) { - let (v1, v2): (Vec, Vec<_>) = list.into_iter().partition(|v| v.mime().is_some()); - let mime = v1.into_iter().flat_map(MixedField::to_mime).collect::(); - let imf = v2.into_iter().flat_map(MixedField::to_imf).collect::(); - (mime, imf) -} - -pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> { - alt(( - map(mime::field::content, MixedField::MIME), - map(imf::field::field, MixedField::IMF), - ))(input) + ); + + let fmime = mimev.into_iter().collect::(); + let fimf = imfv.into_iter().collect::(); + (fmime, fimf) } -- 2.45.2 From 9b828ad6ad2ee1cf12d78fea5732fed1fb0e34b4 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 19:30:10 +0200 Subject: [PATCH 6/8] better debug --- src/header.rs | 11 ++++++++++- src/mime/mod.rs | 21 +++++++++++++++++---- src/mime/type.rs | 23 +++++++++++++++++++++-- src/part/composite.rs | 34 ++++++++++++++++++++++++++++++---- src/part/discrete.rs | 4 ++-- 5 files changed, 80 insertions(+), 13 deletions(-) diff --git a/src/header.rs b/src/header.rs index 88bbde8..dac6d2d 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,3 +1,4 @@ +use std::fmt; use nom::{ branch::alt, bytes::complete::{tag, take_while1}, @@ -11,13 +12,21 @@ use nom::{ use crate::text::whitespace::{foldable_line, obs_crlf}; use crate::text::misc_token::unstructured; -#[derive(Debug, PartialEq, Clone)] +#[derive(PartialEq, Clone)] pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]); impl<'a> From<(&'a [u8], &'a [u8])> for Kv2<'a> { fn from(pair: (&'a [u8], &'a [u8])) -> Self { Self(pair.0, pair.1) } } +impl<'a> fmt::Debug for Kv2<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_tuple("header::Kv2") + .field(&String::from_utf8_lossy(self.0)) + .field(&String::from_utf8_lossy(self.1)) + .finish() + } +} #[derive(Debug, PartialEq, Clone)] pub enum Field<'a> { diff --git a/src/mime/mod.rs b/src/mime/mod.rs index b7de69d..5b9a46a 100644 --- a/src/mime/mod.rs +++ b/src/mime/mod.rs @@ -10,6 +10,7 @@ pub mod mechanism; /// Content-Type representation pub mod r#type; +use std::fmt; use std::marker::PhantomData; use crate::imf::identification::MessageID; @@ -55,15 +56,27 @@ impl<'a, T: WithDefaultType> From> for AnyMIME<'a> { } } -#[derive(Debug, PartialEq, Default, Clone)] +#[derive(PartialEq, Default, Clone)] pub struct NaiveMIME<'a> { pub ctype: Option>, pub transfer_encoding: Mechanism<'a>, pub id: Option>, pub description: Option>, - pub fields: Vec>, + pub kv: Vec>, pub raw: &'a [u8], } +impl<'a> fmt::Debug for NaiveMIME<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("NaiveMime") + .field("ctype", &self.ctype) + .field("transfer_encoding", &self.transfer_encoding) + .field("id", &self.id) + .field("description", &self.description) + .field("kv", &self.kv) + .field("raw", &String::from_utf8_lossy(self.raw)) + .finish() + } +} impl<'a> FromIterator> for NaiveMIME<'a> { fn from_iter>>(it: I) -> Self { @@ -83,8 +96,8 @@ impl<'a> FromIterator> for NaiveMIME<'a> { } impl<'a> NaiveMIME<'a> { - pub fn with_fields(mut self, fields: Vec>) -> Self { - self.fields = fields; self + pub fn with_kv(mut self, fields: Vec>) -> Self { + self.kv = fields; self } pub fn with_raw(mut self, raw: &'a [u8]) -> Self { self.raw = raw; self diff --git a/src/mime/type.rs b/src/mime/type.rs index b83e643..a1ed74a 100644 --- a/src/mime/type.rs +++ b/src/mime/type.rs @@ -1,3 +1,4 @@ +use std::fmt; use nom::{ bytes::complete::tag, combinator::{map, opt}, @@ -12,12 +13,21 @@ use crate::text::words::mime_atom; use crate::mime::{AnyMIME, MIME, NaiveMIME}; // --------- NAIVE TYPE -#[derive(Debug, PartialEq, Clone)] +#[derive(PartialEq, Clone)] pub struct NaiveType<'a> { pub main: &'a [u8], pub sub: &'a [u8], pub params: Vec>, } +impl<'a> fmt::Debug for NaiveType<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("mime::NaiveType") + .field("main", &String::from_utf8_lossy(self.main)) + .field("sub", &String::from_utf8_lossy(self.sub)) + .field("params", &self.params) + .finish() + } +} impl<'a> NaiveType<'a> { pub fn to_type(&self) -> AnyType { self.into() @@ -30,11 +40,20 @@ pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> { )(input) } -#[derive(Debug, PartialEq, Clone)] +#[derive(PartialEq, Clone)] pub struct Parameter<'a> { pub name: &'a [u8], pub value: MIMEWord<'a>, } +impl<'a> fmt::Debug for Parameter<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("mime::Parameter") + .field("name", &String::from_utf8_lossy(self.name)) + .field("value", &self.value) + .finish() + } +} + pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> { map( tuple((mime_atom, tag(b"="), mime_word)), diff --git a/src/part/composite.rs b/src/part/composite.rs index bd7231b..b2bb754 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -1,3 +1,4 @@ +use std::fmt; use nom::IResult; use crate::header; @@ -8,13 +9,23 @@ use crate::text::boundary::{boundary, Delimiter}; use crate::pointers; //--- Multipart -#[derive(Debug, PartialEq)] +#[derive(PartialEq)] pub struct Multipart<'a> { pub mime: mime::MIME<'a, mime::r#type::Multipart>, pub children: Vec>, pub raw_part_inner: &'a [u8], pub raw_part_outer: &'a [u8], } +impl<'a> fmt::Debug for Multipart<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("part::Multipart") + .field("mime", &self.mime) + .field("children", &self.children) + .field("raw_part_inner", &String::from_utf8_lossy(self.raw_part_inner)) + .field("raw_part_outer", &String::from_utf8_lossy(self.raw_part_outer)) + .finish() + } +} impl<'a> Multipart<'a> { pub fn preamble(&self) -> &'a [u8] { pointers::parsed(self.raw_part_outer, self.raw_part_inner) @@ -83,7 +94,7 @@ pub fn multipart<'a>( .collect::(); let mime = mime - .with_fields(fields) + .with_kv(fields) .with_raw(raw_hdrs); (input_eom, mime) @@ -113,7 +124,7 @@ pub fn multipart<'a>( //--- Message -#[derive(Debug, PartialEq)] +#[derive(PartialEq)] pub struct Message<'a> { pub mime: mime::MIME<'a, mime::r#type::DeductibleMessage>, pub imf: imf::Imf<'a>, @@ -123,6 +134,18 @@ pub struct Message<'a> { pub raw_headers: &'a [u8], pub raw_body: &'a [u8], } +impl<'a> fmt::Debug for Message<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("part::Message") + .field("mime", &self.mime) + .field("imf", &self.imf) + .field("child", &self.child) + .field("raw_part", &String::from_utf8_lossy(self.raw_part)) + .field("raw_headers", &String::from_utf8_lossy(self.raw_headers)) + .field("raw_body", &String::from_utf8_lossy(self.raw_body)) + .finish() + } +} pub fn message<'a>( m: mime::MIME<'a, mime::r#type::DeductibleMessage>, @@ -142,7 +165,7 @@ pub fn message<'a>( let (naive_mime, imf) = part::field::split_and_build(&headers); // interpret headers to choose a mime type - let in_mime = naive_mime.with_fields(headers).with_raw(raw_headers).to_interpreted::().into(); + let in_mime = naive_mime.with_kv(headers).with_raw(raw_headers).to_interpreted::().into(); //--------------- // parse a part following this mime specification @@ -256,6 +279,9 @@ It DOES end with a linebreak. ] }), raw: &b"Content-type: text/plain; charset=us-ascii\n\n"[..], + kv: vec![ + header::Field::Good(header::Kv2(&b"Content-type"[..], &b"text/plain; charset=us-ascii"[..])) + ], ..mime::NaiveMIME::default() }, }, diff --git a/src/part/discrete.rs b/src/part/discrete.rs index f92e032..2372c77 100644 --- a/src/part/discrete.rs +++ b/src/part/discrete.rs @@ -14,7 +14,7 @@ impl<'a> fmt::Debug for Text<'a> { .field("mime", &self.mime) .field( "body", - &format_args!("\"{}\"", String::from_utf8_lossy(self.body)), + &String::from_utf8_lossy(self.body), ) .finish() } @@ -32,7 +32,7 @@ impl<'a> fmt::Debug for Binary<'a> { .field("mime", &self.mime) .field( "body", - &format_args!("\"{}\"", String::from_utf8_lossy(self.body)), + &String::from_utf8_lossy(self.body), ) .finish() } -- 2.45.2 From 2529b0145e4c1d7f6cd6090abcfd2521689d9848 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 19:48:23 +0200 Subject: [PATCH 7/8] fixed tests! --- src/imf/mod.rs | 8 ++++++++ src/part/composite.rs | 27 +++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/imf/mod.rs b/src/imf/mod.rs index bd7e24a..32eb925 100644 --- a/src/imf/mod.rs +++ b/src/imf/mod.rs @@ -55,6 +55,14 @@ pub struct Imf<'a> { // MIME pub mime_version: Option, + + // Raw fields + pub kv: Vec>, +} +impl<'a> Imf<'a> { + pub fn with_kv(mut self, v: Vec>) -> Self { + self.kv = v; self + } } //@FIXME min and max limits are not enforced, diff --git a/src/part/composite.rs b/src/part/composite.rs index b2bb754..f1bae97 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -164,8 +164,11 @@ pub fn message<'a>( // aggregate header fields let (naive_mime, imf) = part::field::split_and_build(&headers); + // Bind headers to the IMF object + let imf = imf.with_kv(headers); + // interpret headers to choose a mime type - let in_mime = naive_mime.with_kv(headers).with_raw(raw_headers).to_interpreted::().into(); + let in_mime = naive_mime.with_raw(raw_headers).to_interpreted::().into(); //--------------- // parse a part following this mime specification @@ -487,6 +490,19 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
right: &b"www.grrrndzero.org"[..], }), mime_version: Some(imf::mime::Version { major: 1, minor: 0}), + kv: vec![ + header::Field::Good(header::Kv2(&b"Date"[..], &b"Sat, 8 Jul 2023 07:14:29 +0200"[..])), + header::Field::Good(header::Kv2(&b"From"[..], &b"Grrrnd Zero "[..])), + header::Field::Good(header::Kv2(&b"To"[..], &b"John Doe "[..])), + header::Field::Good(header::Kv2(&b"CC"[..], &b"=?ISO-8859-1?Q?Andr=E9?= Pirard "[..])), + header::Field::Good(header::Kv2(&b"Subject"[..], &b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="[..])), + header::Field::Good(header::Kv2(&b"X-Unknown"[..], &b"something something"[..])), + header::Field::Bad(&b"Bad entry\n on multiple lines\n"[..]), + header::Field::Good(header::Kv2(&b"Message-ID"[..], &b""[..])), + header::Field::Good(header::Kv2(&b"MIME-Version"[..], &b"1.0"[..])), + header::Field::Good(header::Kv2(&b"Content-Type"[..], &b"multipart/alternative;\n boundary=\"b1_e376dc71bafc953c0b0fdeb9983a9956\""[..])), + header::Field::Good(header::Kv2(&b"Content-Transfer-Encoding"[..], &b"7bit"[..])), + ], ..imf::Imf::default() }, child: Box::new(AnyPart::Mult(Multipart { @@ -531,6 +547,10 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
] }), transfer_encoding: mime::mechanism::Mechanism::QuotedPrintable, + kv: vec![ + header::Field::Good(header::Kv2(&b"Content-Type"[..], &b"text/plain; charset=utf-8"[..])), + header::Field::Good(header::Kv2(&b"Content-Transfer-Encoding"[..], &b"quoted-printable"[..])), + ], raw: &b"Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n"[..], ..mime::NaiveMIME::default() } @@ -554,7 +574,10 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
value: MIMEWord::Atom(&b"us-ascii"[..]), } ] - }), + }), + kv: vec![ + header::Field::Good(header::Kv2(&b"Content-Type"[..], &b"text/html; charset=us-ascii"[..])), + ], raw: &b"Content-Type: text/html; charset=us-ascii\n\n"[..], ..mime::NaiveMIME::default() }, -- 2.45.2 From d9285c9ddffcd23a0d72a4d286f6614fa03e495c Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 30 Aug 2023 19:49:04 +0200 Subject: [PATCH 8/8] format code --- src/header.rs | 21 +++---------- src/imf/field.rs | 40 ++++++++++++------------ src/imf/mod.rs | 18 +++++------ src/lib.rs | 8 +++-- src/mime/field.rs | 10 ++++-- src/mime/mod.rs | 31 ++++++++++--------- src/mime/type.rs | 69 ++++++++++++++++++++++++++++-------------- src/part/composite.rs | 46 +++++++++++++++++++--------- src/part/discrete.rs | 10 ++---- src/part/field.rs | 9 ++++-- src/part/mod.rs | 19 ++++++------ src/text/whitespace.rs | 7 ++++- 12 files changed, 167 insertions(+), 121 deletions(-) diff --git a/src/header.rs b/src/header.rs index dac6d2d..5c889a3 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,4 +1,3 @@ -use std::fmt; use nom::{ branch::alt, bytes::complete::{tag, take_while1}, @@ -8,9 +7,10 @@ use nom::{ sequence::{pair, terminated, tuple}, IResult, }; +use std::fmt; -use crate::text::whitespace::{foldable_line, obs_crlf}; use crate::text::misc_token::unstructured; +use crate::text::whitespace::{foldable_line, obs_crlf}; #[derive(PartialEq, Clone)] pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]); @@ -47,13 +47,8 @@ impl<'a> From<&'a [u8]> for Field<'a> { /// Parse headers as key/values pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec> { terminated( - many0( - alt(( - into(correct_field), - into(foldable_line), - )) - ), - obs_crlf + many0(alt((into(correct_field), into(foldable_line)))), + obs_crlf, )(input) } @@ -74,11 +69,5 @@ pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { /// ; ":". /// ``` pub fn correct_field(input: &[u8]) -> IResult<&[u8], Kv2> { - terminated( - into(pair( - field_any, - recognize(unstructured), - )), - obs_crlf, - )(input) + terminated(into(pair(field_any, recognize(unstructured))), obs_crlf)(input) } diff --git a/src/imf/field.rs b/src/imf/field.rs index 17a48f9..0a7766d 100644 --- a/src/imf/field.rs +++ b/src/imf/field.rs @@ -46,25 +46,27 @@ impl<'a> TryFrom<&header::Field<'a>> for Field<'a> { type Error = (); fn try_from(f: &header::Field<'a>) -> Result { let content = match f { - header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() { - b"date" => map(date, Field::Date)(value), - b"from" => map(mailbox_list, Field::From)(value), - b"sender" => map(mailbox, Field::Sender)(value), - b"reply-to" => map(address_list, Field::ReplyTo)(value), - b"to" => map(address_list, Field::To)(value), - b"cc" => map(address_list, Field::Cc)(value), - b"bcc" => map(nullable_address_list, Field::Bcc)(value), - b"message-id" => map(msg_id, Field::MessageID)(value), - b"in-reply-to" => map(msg_list, Field::InReplyTo)(value), - b"references" => map(msg_list, Field::References)(value), - b"subject" => map(unstructured, Field::Subject)(value), - b"comments" => map(unstructured, Field::Comments)(value), - b"keywords" => map(phrase_list, Field::Keywords)(value), - b"return-path" => map(return_path, Field::ReturnPath)(value), - b"received" => map(received_log, Field::Received)(value), - b"mime-version" => map(version, Field::MIMEVersion)(value), - _ => return Err(()), - }, + header::Field::Good(header::Kv2(key, value)) => { + match key.to_ascii_lowercase().as_slice() { + b"date" => map(date, Field::Date)(value), + b"from" => map(mailbox_list, Field::From)(value), + b"sender" => map(mailbox, Field::Sender)(value), + b"reply-to" => map(address_list, Field::ReplyTo)(value), + b"to" => map(address_list, Field::To)(value), + b"cc" => map(address_list, Field::Cc)(value), + b"bcc" => map(nullable_address_list, Field::Bcc)(value), + b"message-id" => map(msg_id, Field::MessageID)(value), + b"in-reply-to" => map(msg_list, Field::InReplyTo)(value), + b"references" => map(msg_list, Field::References)(value), + b"subject" => map(unstructured, Field::Subject)(value), + b"comments" => map(unstructured, Field::Comments)(value), + b"keywords" => map(phrase_list, Field::Keywords)(value), + b"return-path" => map(return_path, Field::ReturnPath)(value), + b"received" => map(received_log, Field::Received)(value), + b"mime-version" => map(version, Field::MIMEVersion)(value), + _ => return Err(()), + } + } _ => return Err(()), }; diff --git a/src/imf/mod.rs b/src/imf/mod.rs index 32eb925..b920c49 100644 --- a/src/imf/mod.rs +++ b/src/imf/mod.rs @@ -1,5 +1,4 @@ /// Parse and represent IMF (Internet Message Format) headers (RFC822, RFC5322) - pub mod address; pub mod datetime; pub mod field; @@ -8,10 +7,7 @@ pub mod mailbox; pub mod mime; pub mod trace; -use nom::{ - combinator::map, - IResult, -}; +use nom::{combinator::map, IResult}; use crate::header; use crate::imf::address::AddressRef; @@ -61,7 +57,8 @@ pub struct Imf<'a> { } impl<'a> Imf<'a> { pub fn with_kv(mut self, v: Vec>) -> Self { - self.kv = v; self + self.kv = v; + self } } @@ -94,12 +91,15 @@ impl<'a> FromIterator> for Imf<'a> { } pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> { - map(header::header_kv, |fields| { - fields.iter().flat_map(Field::try_from).into_iter().collect::() + map(header::header_kv, |fields| { + fields + .iter() + .flat_map(Field::try_from) + .into_iter() + .collect::() })(input) } - #[cfg(test)] mod tests { use super::*; diff --git a/src/lib.rs b/src/lib.rs index edb327a..bd79650 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,7 @@ pub mod text; /// Manipulate buffer of bytes mod pointers; -use nom::{IResult, combinator::into}; +use nom::{combinator::into, IResult}; /// Parse a whole email including its (MIME) body /// @@ -37,7 +37,7 @@ use nom::{IResult, combinator::into}; /// * `msg` - The parsed message /// /// # Examples -/// +/// /// ``` /// let input = br#"Date: 7 Mar 2023 08:00:00 +0200 /// From: deuxfleurs@example.com @@ -57,7 +57,9 @@ use nom::{IResult, combinator::into}; /// ); /// ``` pub fn parse_message(input: &[u8]) -> IResult<&[u8], part::composite::Message> { - into(part::composite::message(mime::MIME::::default()))(input) + into(part::composite::message(mime::MIME::< + mime::r#type::DeductibleMessage, + >::default()))(input) } /// Only extract the headers of the email that are part of the Internet Message Format spec diff --git a/src/mime/field.rs b/src/mime/field.rs index 2489516..8736b47 100644 --- a/src/mime/field.rs +++ b/src/mime/field.rs @@ -45,7 +45,10 @@ impl<'a> TryFrom<&header::Field<'a>> for Content<'a> { type Error = (); fn try_from(f: &header::Field<'a>) -> Result { let content = match f { - header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() { + header::Field::Good(header::Kv2(key, value)) => match key + .to_ascii_lowercase() + .as_slice() + { b"content-type" => map(naive_type, Content::Type)(value), b"content-transfer-encoding" => map(mechanism, Content::TransferEncoding)(value), b"content-id" => map(msg_id, Content::ID)(value), @@ -107,7 +110,10 @@ This is a multipart message. .as_bytes(); assert_eq!( - map(header::header_kv, |k| k.iter().flat_map(Content::try_from).collect())(fullmail), + map(header::header_kv, |k| k + .iter() + .flat_map(Content::try_from) + .collect())(fullmail), Ok(( &b"This is a multipart message.\n\n"[..], vec![ diff --git a/src/mime/mod.rs b/src/mime/mod.rs index 5b9a46a..542d083 100644 --- a/src/mime/mod.rs +++ b/src/mime/mod.rs @@ -1,4 +1,4 @@ -/// Parsed and represent an email character set +/// Parsed and represent an email character set pub mod charset; /// MIME specific headers @@ -13,17 +13,17 @@ pub mod r#type; use std::fmt; use std::marker::PhantomData; +use crate::header; use crate::imf::identification::MessageID; use crate::mime::field::Content; use crate::mime::mechanism::Mechanism; use crate::mime::r#type::{AnyType, NaiveType}; -use crate::header; use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary}; #[derive(Debug, PartialEq, Clone)] pub struct MIME<'a, T> { - pub interpreted_type: T, - pub fields: NaiveMIME<'a> + pub interpreted_type: T, + pub fields: NaiveMIME<'a>, } impl<'a> Default for MIME<'a, r#type::DeductibleText> { fn default() -> Self { @@ -80,9 +80,8 @@ impl<'a> fmt::Debug for NaiveMIME<'a> { impl<'a> FromIterator> for NaiveMIME<'a> { fn from_iter>>(it: I) -> Self { - it.into_iter().fold( - NaiveMIME::default(), - |mut section, field| { + it.into_iter() + .fold(NaiveMIME::default(), |mut section, field| { match field { Content::Type(v) => section.ctype = Some(v), Content::TransferEncoding(v) => section.transfer_encoding = v, @@ -90,25 +89,29 @@ impl<'a> FromIterator> for NaiveMIME<'a> { Content::Description(v) => section.description = Some(v), }; section - }, - ) + }) } } impl<'a> NaiveMIME<'a> { pub fn with_kv(mut self, fields: Vec>) -> Self { - self.kv = fields; self + self.kv = fields; + self } pub fn with_raw(mut self, raw: &'a [u8]) -> Self { - self.raw = raw; self + self.raw = raw; + self } pub fn to_interpreted(self) -> AnyMIME<'a> { - self.ctype.as_ref().map(|c| c.to_type()).unwrap_or(T::default_type()).to_mime(self).into() + self.ctype + .as_ref() + .map(|c| c.to_type()) + .unwrap_or(T::default_type()) + .to_mime(self) + .into() } } - - pub trait WithDefaultType { fn default_type() -> AnyType; } diff --git a/src/mime/type.rs b/src/mime/type.rs index a1ed74a..7dafb29 100644 --- a/src/mime/type.rs +++ b/src/mime/type.rs @@ -1,4 +1,3 @@ -use std::fmt; use nom::{ bytes::complete::tag, combinator::{map, opt}, @@ -6,11 +5,12 @@ use nom::{ sequence::{preceded, terminated, tuple}, IResult, }; +use std::fmt; use crate::mime::charset::EmailCharset; +use crate::mime::{AnyMIME, NaiveMIME, MIME}; use crate::text::misc_token::{mime_word, MIMEWord}; use crate::text::words::mime_atom; -use crate::mime::{AnyMIME, MIME, NaiveMIME}; // --------- NAIVE TYPE #[derive(PartialEq, Clone)] @@ -92,16 +92,27 @@ impl<'a> From<&'a NaiveType<'a>> for AnyType { impl<'a> AnyType { pub fn to_mime(self, fields: NaiveMIME<'a>) -> AnyMIME<'a> { - match self { - Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME:: { interpreted_type, fields }), - Self::Message(interpreted_type) => AnyMIME::Msg(MIME:: { interpreted_type, fields }), - Self::Text(interpreted_type) => AnyMIME::Txt(MIME:: { interpreted_type, fields }), - Self::Binary(interpreted_type) => AnyMIME::Bin(MIME:: { interpreted_type, fields }), - } + match self { + Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME:: { + interpreted_type, + fields, + }), + Self::Message(interpreted_type) => AnyMIME::Msg(MIME:: { + interpreted_type, + fields, + }), + Self::Text(interpreted_type) => AnyMIME::Txt(MIME:: { + interpreted_type, + fields, + }), + Self::Binary(interpreted_type) => AnyMIME::Bin(MIME:: { + interpreted_type, + fields, + }), + } } } - #[derive(Debug, PartialEq, Clone)] pub enum Deductible { Inferred(T), @@ -158,7 +169,8 @@ impl ToString for MultipartSubtype { Self::Parallel => "parallel", Self::Report => "report", Self::Unknown => "mixed", - }.into() + } + .into() } } impl<'a> From<&NaiveType<'a>> for MultipartSubtype { @@ -174,8 +186,6 @@ impl<'a> From<&NaiveType<'a>> for MultipartSubtype { } } - - #[derive(Debug, PartialEq, Default, Clone)] pub enum MessageSubtype { #[default] @@ -191,7 +201,8 @@ impl ToString for MessageSubtype { Self::Partial => "partial", Self::External => "external", Self::Unknown => "rfc822", - }.into() + } + .into() } } @@ -203,17 +214,25 @@ pub struct Message { impl<'a> From<&NaiveType<'a>> for Message { fn from(nt: &NaiveType<'a>) -> Self { match nt.sub.to_ascii_lowercase().as_slice() { - b"rfc822" => Self { subtype: MessageSubtype::RFC822 }, - b"partial" => Self { subtype: MessageSubtype::Partial }, - b"external" => Self { subtype: MessageSubtype::External }, - _ => Self { subtype: MessageSubtype::Unknown }, + b"rfc822" => Self { + subtype: MessageSubtype::RFC822, + }, + b"partial" => Self { + subtype: MessageSubtype::Partial, + }, + b"external" => Self { + subtype: MessageSubtype::External, + }, + _ => Self { + subtype: MessageSubtype::Unknown, + }, } } } impl From> for Message { fn from(d: Deductible) -> Self { match d { - Deductible::Inferred(t) | Deductible::Explicit(t) => t + Deductible::Inferred(t) | Deductible::Explicit(t) => t, } } } @@ -240,7 +259,7 @@ impl<'a> From<&NaiveType<'a>> for Text { impl From> for Text { fn from(d: Deductible) -> Self { match d { - Deductible::Inferred(t) | Deductible::Explicit(t) => t + Deductible::Inferred(t) | Deductible::Explicit(t) => t, } } } @@ -257,7 +276,8 @@ impl ToString for TextSubtype { match self { Self::Plain | Self::Unknown => "plain", Self::Html => "html", - }.into() + } + .into() } } impl<'a> From<&NaiveType<'a>> for TextSubtype { @@ -277,8 +297,8 @@ pub struct Binary {} mod tests { use super::*; use crate::mime::charset::EmailCharset; - use crate::text::quoted::QuotedString; use crate::mime::r#type::Deductible; + use crate::text::quoted::QuotedString; #[test] fn test_parameter() { @@ -336,7 +356,12 @@ mod tests { let (rest, nt) = naive_type(b"message/rfc822").unwrap(); assert_eq!(rest, &[]); - assert_eq!(nt.to_type(), AnyType::Message(Deductible::Explicit(Message { subtype: MessageSubtype::RFC822 }))); + assert_eq!( + nt.to_type(), + AnyType::Message(Deductible::Explicit(Message { + subtype: MessageSubtype::RFC822 + })) + ); } #[test] diff --git a/src/part/composite.rs b/src/part/composite.rs index f1bae97..fabe5f5 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -1,12 +1,12 @@ -use std::fmt; use nom::IResult; +use std::fmt; use crate::header; use crate::imf; use crate::mime; use crate::part::{self, AnyPart}; -use crate::text::boundary::{boundary, Delimiter}; use crate::pointers; +use crate::text::boundary::{boundary, Delimiter}; //--- Multipart #[derive(PartialEq)] @@ -21,8 +21,14 @@ impl<'a> fmt::Debug for Multipart<'a> { fmt.debug_struct("part::Multipart") .field("mime", &self.mime) .field("children", &self.children) - .field("raw_part_inner", &String::from_utf8_lossy(self.raw_part_inner)) - .field("raw_part_outer", &String::from_utf8_lossy(self.raw_part_outer)) + .field( + "raw_part_inner", + &String::from_utf8_lossy(self.raw_part_inner), + ) + .field( + "raw_part_outer", + &String::from_utf8_lossy(self.raw_part_outer), + ) .finish() } } @@ -76,7 +82,10 @@ pub fn multipart<'a>( mime: m.clone(), children: mparts, raw_part_inner: pointers::parsed(inner_orig, inp), - raw_part_outer: pointers::parsed(outer_orig, &outer_orig[outer_orig.len()..]), + raw_part_outer: pointers::parsed( + outer_orig, + &outer_orig[outer_orig.len()..], + ), }, )) } @@ -93,19 +102,21 @@ pub fn multipart<'a>( .into_iter() .collect::(); - let mime = mime - .with_kv(fields) - .with_raw(raw_hdrs); + let mime = mime.with_kv(fields).with_raw(raw_hdrs); (input_eom, mime) - }, + } Err(_) => (input, mime::NaiveMIME::default()), }; // interpret mime according to context let mime = match m.interpreted_type.subtype { - mime::r#type::MultipartSubtype::Digest => naive_mime.to_interpreted::().into(), - _ => naive_mime.to_interpreted::().into(), + mime::r#type::MultipartSubtype::Digest => naive_mime + .to_interpreted::() + .into(), + _ => naive_mime + .to_interpreted::() + .into(), }; // parse raw part @@ -168,7 +179,10 @@ pub fn message<'a>( let imf = imf.with_kv(headers); // interpret headers to choose a mime type - let in_mime = naive_mime.with_raw(raw_headers).to_interpreted::().into(); + let in_mime = naive_mime + .with_raw(raw_headers) + .to_interpreted::() + .into(); //--------------- // parse a part following this mime specification @@ -183,7 +197,9 @@ pub fn message<'a>( Message { mime: m.clone(), imf, - raw_part, raw_headers, raw_body, + raw_part, + raw_headers, + raw_body, child: Box::new(part), }, )) @@ -196,7 +212,7 @@ mod tests { use crate::part::discrete::Text; use crate::part::AnyPart; use crate::text::encoding::{Base64Word, EncodedWord, QuotedChunk, QuotedWord}; - use crate::text::misc_token::{Phrase, UnstrToken, Unstructured, Word, MIMEWord}; + use crate::text::misc_token::{MIMEWord, Phrase, UnstrToken, Unstructured, Word}; use crate::text::quoted::QuotedString; use chrono::{FixedOffset, TimeZone}; @@ -265,7 +281,7 @@ It DOES end with a linebreak. body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..], }), AnyPart::Txt(Text { - mime: mime::MIME { + mime: mime::MIME { interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text { subtype: mime::r#type::TextSubtype::Plain, charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII), diff --git a/src/part/discrete.rs b/src/part/discrete.rs index 2372c77..8cfe5c3 100644 --- a/src/part/discrete.rs +++ b/src/part/discrete.rs @@ -12,10 +12,7 @@ impl<'a> fmt::Debug for Text<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("part::Text") .field("mime", &self.mime) - .field( - "body", - &String::from_utf8_lossy(self.body), - ) + .field("body", &String::from_utf8_lossy(self.body)) .finish() } } @@ -30,10 +27,7 @@ impl<'a> fmt::Debug for Binary<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("part::Binary") .field("mime", &self.mime) - .field( - "body", - &String::from_utf8_lossy(self.body), - ) + .field("body", &String::from_utf8_lossy(self.body)) .finish() } } diff --git a/src/part/field.rs b/src/part/field.rs index 2717efd..e7fd32a 100644 --- a/src/part/field.rs +++ b/src/part/field.rs @@ -4,7 +4,10 @@ use crate::mime; pub fn split_and_build<'a>(v: &Vec>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) { let (mimev, imfv) = v.iter().fold( - (Vec::::new(), Vec::::new()), + ( + Vec::::new(), + Vec::::new(), + ), |(mut mime, mut imf), f| { if let Ok(m) = mime::field::Content::try_from(f) { mime.push(m); @@ -12,9 +15,9 @@ pub fn split_and_build<'a>(v: &Vec>) -> (mime::NaiveMIME<'a>, imf.push(i); } (mime, imf) - } + }, ); - + let fmime = mimev.into_iter().collect::(); let fimf = imfv.into_iter().collect::(); (fmime, fimf) diff --git a/src/part/mod.rs b/src/part/mod.rs index 1d91f91..cc5c4b5 100644 --- a/src/part/mod.rs +++ b/src/part/mod.rs @@ -80,18 +80,19 @@ pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8], move |input| { let part = match m { AnyMIME::Mult(a) => multipart(a)(input) - .map(|(_, multi)| - multi.into()) - .unwrap_or(AnyPart::Txt(Text { - mime: mime::MIME::::default(), - body: input, - })), - AnyMIME::Msg(a) => message(a)(input) - .map(|(_, msg)| msg.into()) + .map(|(_, multi)| multi.into()) .unwrap_or(AnyPart::Txt(Text { mime: mime::MIME::::default(), body: input, })), + AnyMIME::Msg(a) => { + message(a)(input) + .map(|(_, msg)| msg.into()) + .unwrap_or(AnyPart::Txt(Text { + mime: mime::MIME::::default(), + body: input, + })) + } AnyMIME::Txt(a) => AnyPart::Txt(Text { mime: a, body: input, @@ -101,7 +102,7 @@ pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8], body: input, }), }; - + // This function always consumes the whole input Ok((&input[input.len()..], part)) } diff --git a/src/text/whitespace.rs b/src/text/whitespace.rs index c48e347..b7777c0 100644 --- a/src/text/whitespace.rs +++ b/src/text/whitespace.rs @@ -22,7 +22,12 @@ use nom::{ /// \r or \n is allowed nowhere else, so we also add this support. pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> { - alt((tag(ascii::CRLF), tag(ascii::CRCRLF), tag(&[ascii::CR]), tag(&[ascii::LF])))(input) + alt(( + tag(ascii::CRLF), + tag(ascii::CRCRLF), + tag(&[ascii::CR]), + tag(&[ascii::LF]), + ))(input) } /// ```abnf -- 2.45.2