diff --git a/src/fragments/address.rs b/src/fragments/address.rs index 6aefbb0..7bf2a43 100644 --- a/src/fragments/address.rs +++ b/src/fragments/address.rs @@ -1,18 +1,18 @@ use nom::{ - IResult, branch::alt, bytes::complete::tag, combinator::{into, opt}, multi::separated_list1, sequence::tuple, + IResult, }; +use crate::error::IMFError; use crate::fragments::lazy; -use crate::fragments::model::{GroupRef, AddressRef, MailboxRef, MailboxList, AddressList}; use crate::fragments::mailbox::mailbox; use crate::fragments::misc_token::phrase; -use crate::fragments::whitespace::{cfws}; -use crate::error::IMFError; +use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef}; +use crate::fragments::whitespace::cfws; impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef { type Error = IMFError<'a>; @@ -70,13 +70,16 @@ pub fn address(input: &str) -> IResult<&str, AddressRef> { /// display-name = phrase /// ``` pub fn group(input: &str) -> IResult<&str, GroupRef> { - let (input, (grp_name, _, grp_list, _, _)) = + let (input, (grp_name, _, grp_list, _, _)) = tuple((phrase, tag(":"), opt(group_list), tag(";"), opt(cfws)))(input)?; - Ok((input, GroupRef { - name: grp_name, - participants: grp_list.unwrap_or(vec![]), - })) + Ok(( + input, + GroupRef { + name: grp_name, + participants: grp_list.unwrap_or(vec![]), + }, + )) } /// Group list @@ -128,7 +131,9 @@ mod tests { _ => panic!(), }; - match mailbox_list(r#"Mary Smith , jdoe@example.org, Who? , , "Giant; \"Big\" Box" "#) { + match mailbox_list( + r#"Mary Smith , jdoe@example.org, Who? , , "Giant; \"Big\" Box" "#, + ) { Ok(("", _)) => (), _ => panic!(), }; @@ -137,30 +142,47 @@ mod tests { #[test] fn test_address_list() { assert_eq!( - address_list(r#"A Group:Ed Jones ,joe@where.test,John ;, Mary Smith "#), - Ok(("", vec![ - AddressRef::Many(GroupRef { - name: "A Group".to_string(), - participants: vec![ - MailboxRef { - name: Some("Ed Jones".into()), - addrspec: AddrSpec { local_part: "c".into(), domain: "a.test".into() }, + address_list( + r#"A Group:Ed Jones ,joe@where.test,John ;, Mary Smith "# + ), + Ok(( + "", + vec![ + AddressRef::Many(GroupRef { + name: "A Group".to_string(), + participants: vec![ + MailboxRef { + name: Some("Ed Jones".into()), + addrspec: AddrSpec { + local_part: "c".into(), + domain: "a.test".into() + }, + }, + MailboxRef { + name: None, + addrspec: AddrSpec { + local_part: "joe".into(), + domain: "where.test".into() + }, + }, + MailboxRef { + name: Some("John".into()), + addrspec: AddrSpec { + local_part: "jdoe".into(), + domain: "one.test".into() + }, + }, + ], + }), + AddressRef::Single(MailboxRef { + name: Some("Mary Smith".into()), + addrspec: AddrSpec { + local_part: "mary".into(), + domain: "x.test".into() }, - MailboxRef { - name: None, - addrspec: AddrSpec { local_part: "joe".into(), domain: "where.test".into() }, - }, - MailboxRef { - name: Some("John".into()), - addrspec: AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() }, - }, - ], - }), - AddressRef::Single(MailboxRef { - name: Some("Mary Smith".into()), - addrspec: AddrSpec { local_part: "mary".into(), domain: "x.test".into() }, - }), - ])) + }), + ] + )) ); } } diff --git a/src/fragments/datetime.rs b/src/fragments/datetime.rs index feb4020..1a3dc93 100644 --- a/src/fragments/datetime.rs +++ b/src/fragments/datetime.rs @@ -1,16 +1,16 @@ +use crate::error::IMFError; +use crate::fragments::lazy; +use crate::fragments::whitespace::{cfws, fws}; use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime}; use nom::{ - IResult, branch::alt, - bytes::complete::{tag, tag_no_case, take_while_m_n, is_a}, + bytes::complete::{is_a, tag, tag_no_case, take_while_m_n}, character, - character::complete::{one_of, alphanumeric1, digit0}, + character::complete::{alphanumeric1, digit0, one_of}, combinator::{map, opt, value}, - sequence::{preceded, terminated, tuple, delimited }, + sequence::{delimited, preceded, terminated, tuple}, + IResult, }; -use crate::fragments::lazy; -use crate::fragments::whitespace::{fws, cfws}; -use crate::error::IMFError; const MIN: i32 = 60; const HOUR: i32 = 60 * MIN; @@ -43,20 +43,31 @@ impl<'a> TryFrom<&'a lazy::DateTime<'a>> for DateTime { /// due to an error in RFC0822 but are interpreted as their respective /// timezone according to the RFC5322 definition pub fn section(input: &str) -> IResult<&str, Option>> { - map(terminated( + map( + terminated( alt(( - tuple((opt(terminated(strict_day_of_week, tag(","))), strict_date, strict_time_of_day, strict_zone )), - tuple((opt(terminated(obs_day_of_week, tag(","))), obs_date, obs_time_of_day, alt((strict_zone, obs_zone)) )), + tuple(( + opt(terminated(strict_day_of_week, tag(","))), + strict_date, + strict_time_of_day, + strict_zone, + )), + tuple(( + opt(terminated(obs_day_of_week, tag(","))), + obs_date, + obs_time_of_day, + alt((strict_zone, obs_zone)), + )), )), - opt(cfws) - ), |res| { - match res { - (_, Some(date), Some(time), Some(tz)) => { - date.and_time(time).and_local_timezone(tz).earliest() - }, - _ => None, + opt(cfws), + ), + |res| match res { + (_, Some(date), Some(time), Some(tz)) => { + date.and_time(time).and_local_timezone(tz).earliest() } - })(input) + _ => None, + }, + )(input) } /// day-of-week = ([FWS] day-name) / obs-day-of-week @@ -85,18 +96,16 @@ fn day_name(input: &str) -> IResult<&str, &str> { /// date = day month year fn strict_date(input: &str) -> IResult<&str, Option> { - map( - tuple((strict_day, month, strict_year)), - |(d, m, y)| NaiveDate::from_ymd_opt(y, m, d) - )(input) + map(tuple((strict_day, month, strict_year)), |(d, m, y)| { + NaiveDate::from_ymd_opt(y, m, d) + })(input) } /// date = day month year fn obs_date(input: &str) -> IResult<&str, Option> { - map( - tuple((obs_day, month, obs_year)), - |(d, m, y)| NaiveDate::from_ymd_opt(y, m, d) - )(input) + map(tuple((obs_day, month, obs_year)), |(d, m, y)| { + NaiveDate::from_ymd_opt(y, m, d) + })(input) } /// day = ([FWS] 1*2DIGIT FWS) / obs-day @@ -132,45 +141,63 @@ fn month(input: &str) -> IResult<&str, u32> { /// year = (FWS 4*DIGIT FWS) / obs-year fn strict_year(input: &str) -> IResult<&str, i32> { delimited( - fws, + fws, map( - terminated(take_while_m_n(4,9,|c| c >= '\x30' && c <= '\x39'), digit0), - |d: &str| d.parse::().unwrap()), + terminated(take_while_m_n(4, 9, |c| c >= '\x30' && c <= '\x39'), digit0), + |d: &str| d.parse::().unwrap(), + ), fws, )(input) } /// obs-year = [CFWS] 2*DIGIT [CFWS] fn obs_year(input: &str) -> IResult<&str, i32> { - map(delimited( - opt(cfws), - terminated(take_while_m_n(2,7,|c| c >= '\x30' && c <= '\x39'), digit0), - opt(cfws) - ), |cap: &str| { - let d = cap.parse::().unwrap(); - if d >= 0 && d <= 49 { - 2000 + d - } else if d >= 50 && d <= 999 { - 1900 + d - } else { - d - } - })(input) + map( + delimited( + opt(cfws), + terminated(take_while_m_n(2, 7, |c| c >= '\x30' && c <= '\x39'), digit0), + opt(cfws), + ), + |cap: &str| { + let d = cap.parse::().unwrap(); + if d >= 0 && d <= 49 { + 2000 + d + } else if d >= 50 && d <= 999 { + 1900 + d + } else { + d + } + }, + )(input) } /// time-of-day = hour ":" minute [ ":" second ] fn strict_time_of_day(input: &str) -> IResult<&str, Option> { map( - tuple((strict_time_digit, tag(":"), strict_time_digit, opt(preceded(tag(":"), strict_time_digit)))), - |(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)), + tuple(( + strict_time_digit, + tag(":"), + strict_time_digit, + opt(preceded(tag(":"), strict_time_digit)), + )), + |(hour, _, minute, maybe_sec)| { + NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)) + }, )(input) } /// time-of-day = hour ":" minute [ ":" second ] fn obs_time_of_day(input: &str) -> IResult<&str, Option> { map( - tuple((obs_time_digit, tag(":"), obs_time_digit, opt(preceded(tag(":"), obs_time_digit)))), - |(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)), + tuple(( + obs_time_digit, + tag(":"), + obs_time_digit, + opt(preceded(tag(":"), obs_time_digit)), + )), + |(hour, _, minute, maybe_sec)| { + NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)) + }, )(input) } @@ -189,15 +216,21 @@ fn obs_time_digit(input: &str) -> IResult<&str, u32> { /// ``` fn strict_zone(input: &str) -> IResult<&str, Option> { map( - tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))), + tuple(( + opt(fws), + is_a("+-"), + take_while_m_n(2, 2, |c| c >= '\x30' && c <= '\x39'), + take_while_m_n(2, 2, |c| c >= '\x30' && c <= '\x39'), + )), |(_, op, dig_zone_hour, dig_zone_min)| { let zone_hour = dig_zone_hour.parse::().unwrap() * HOUR; let zone_min = dig_zone_min.parse::().unwrap() * MIN; match op { "+" => FixedOffset::east_opt(zone_hour + zone_min), "-" => FixedOffset::west_opt(zone_hour + zone_min), - _ => unreachable!(), } - } + _ => unreachable!(), + } + }, )(input) } @@ -216,7 +249,7 @@ fn strict_zone(input: &str) -> IResult<&str, Option> { /// %d97-105 / ; through "Z", both /// %d107-122 / ; upper and lower case /// ; -/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones +/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones fn obs_zone(input: &str) -> IResult<&str, Option> { // The writing of this function is volontarily verbose // to keep it straightforward to understand. @@ -227,18 +260,27 @@ fn obs_zone(input: &str) -> IResult<&str, Option> { opt(fws), alt(( // Legacy UTC/GMT - value(FixedOffset::west_opt(0 * HOUR), alt((tag("UTC"), tag("UT"), tag("GMT")))), - + value( + FixedOffset::west_opt(0 * HOUR), + alt((tag("UTC"), tag("UT"), tag("GMT"))), + ), // USA Timezones value(FixedOffset::west_opt(4 * HOUR), tag("EDT")), - value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))), - value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))), - value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))), + value( + FixedOffset::west_opt(5 * HOUR), + alt((tag("EST"), tag("CDT"))), + ), + value( + FixedOffset::west_opt(6 * HOUR), + alt((tag("CST"), tag("MDT"))), + ), + value( + FixedOffset::west_opt(7 * HOUR), + alt((tag("MST"), tag("PDT"))), + ), value(FixedOffset::west_opt(8 * HOUR), tag("PST")), - // Military Timezone UTC value(FixedOffset::west_opt(0 * HOUR), tag("Z")), - // Military Timezones East map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c { 'A' | 'a' => FixedOffset::east_opt(1 * HOUR), @@ -255,7 +297,6 @@ fn obs_zone(input: &str) -> IResult<&str, Option> { 'M' | 'm' => FixedOffset::east_opt(12 * HOUR), _ => unreachable!(), }), - // Military Timezones West map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c { 'N' | 'n' => FixedOffset::west_opt(1 * HOUR), @@ -272,7 +313,6 @@ fn obs_zone(input: &str) -> IResult<&str, Option> { 'Y' | 'y' => FixedOffset::west_opt(12 * HOUR), _ => unreachable!(), }), - // Unknown timezone value(FixedOffset::west_opt(0 * HOUR), alphanumeric1), )), @@ -284,12 +324,19 @@ mod tests { use super::*; use chrono::TimeZone; - #[test] fn test_section_rfc_strict() { assert_eq!( - section("Fri, 21 Nov 1997 09:55:06 -0600"), - Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))), + section("Fri, 21 Nov 1997 09:55:06 -0600"), + Ok(( + "", + Some( + FixedOffset::west_opt(6 * HOUR) + .unwrap() + .with_ymd_and_hms(1997, 11, 21, 9, 55, 6) + .unwrap() + ) + )), ); } @@ -297,7 +344,15 @@ mod tests { fn test_section_received() { assert_eq!( section("Sun, 18 Jun 2023 15:39:08 +0200 (CEST)"), - Ok(("", Some(FixedOffset::east_opt(2 * HOUR).unwrap().with_ymd_and_hms(2023, 6, 18, 15, 39, 8).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(2 * HOUR) + .unwrap() + .with_ymd_and_hms(2023, 6, 18, 15, 39, 8) + .unwrap() + ) + )), ); } @@ -310,8 +365,17 @@ mod tests { Feb 1969 23:32 - -0330 (Newfoundland Time)"#), - Ok(("", Some(FixedOffset::west_opt(3 * HOUR + 30 * MIN).unwrap().with_ymd_and_hms(1969, 2, 13, 23, 32, 00).unwrap()))), + -0330 (Newfoundland Time)"# + ), + Ok(( + "", + Some( + FixedOffset::west_opt(3 * HOUR + 30 * MIN) + .unwrap() + .with_ymd_and_hms(1969, 2, 13, 23, 32, 00) + .unwrap() + ) + )), ); } @@ -319,7 +383,15 @@ mod tests { fn test_section_rfc_obs() { assert_eq!( section("21 Nov 97 09:55:06 GMT"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(1997, 11, 21, 9, 55, 6) + .unwrap() + ) + )), ); } @@ -327,7 +399,15 @@ mod tests { fn test_section_3digit_year() { assert_eq!( section("21 Nov 103 09:55:06 UT"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2003, 11, 21, 9, 55, 6).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2003, 11, 21, 9, 55, 6) + .unwrap() + ) + )), ); } @@ -335,7 +415,15 @@ mod tests { fn test_section_rfc_obs_ws() { assert_eq!( section("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600"), - Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))), + Ok(( + "", + Some( + FixedOffset::west_opt(6 * HOUR) + .unwrap() + .with_ymd_and_hms(1997, 11, 21, 9, 55, 6) + .unwrap() + ) + )), ); } @@ -343,55 +431,133 @@ mod tests { fn test_section_2digit_year() { assert_eq!( section("21 Nov 23 09:55:06Z"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 9, 55, 6).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 9, 55, 6) + .unwrap() + ) + )), ); } #[test] fn test_section_military_zone_east() { - ["a", "B", "c", "D", "e", "F", "g", "H", "i", "K", "l", "M"].iter().enumerate().for_each(|(i, x)| { - assert_eq!( - section(format!("1 Jan 22 08:00:00 {}", x).as_str()), - Ok(("", Some(FixedOffset::east_opt((i as i32 + 1) * HOUR).unwrap().with_ymd_and_hms(2022, 01, 01, 8, 0, 0).unwrap()))) - ); - }); + ["a", "B", "c", "D", "e", "F", "g", "H", "i", "K", "l", "M"] + .iter() + .enumerate() + .for_each(|(i, x)| { + assert_eq!( + section(format!("1 Jan 22 08:00:00 {}", x).as_str()), + Ok(( + "", + Some( + FixedOffset::east_opt((i as i32 + 1) * HOUR) + .unwrap() + .with_ymd_and_hms(2022, 01, 01, 8, 0, 0) + .unwrap() + ) + )) + ); + }); } #[test] fn test_section_military_zone_west() { - ["N", "O", "P", "q", "r", "s", "T", "U", "V", "w", "x", "y"].iter().enumerate().for_each(|(i, x)| { - assert_eq!( - section(format!("1 Jan 22 08:00:00 {}", x).as_str()), - Ok(("", Some(FixedOffset::west_opt((i as i32 + 1) * HOUR).unwrap().with_ymd_and_hms(2022, 01, 01, 8, 0, 0).unwrap()))) - ); - }); + ["N", "O", "P", "q", "r", "s", "T", "U", "V", "w", "x", "y"] + .iter() + .enumerate() + .for_each(|(i, x)| { + assert_eq!( + section(format!("1 Jan 22 08:00:00 {}", x).as_str()), + Ok(( + "", + Some( + FixedOffset::west_opt((i as i32 + 1) * HOUR) + .unwrap() + .with_ymd_and_hms(2022, 01, 01, 8, 0, 0) + .unwrap() + ) + )) + ); + }); } #[test] fn test_section_gmt() { assert_eq!( section("21 Nov 2023 07:07:07 +0000"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 7, 7, 7) + .unwrap() + ) + )), ); assert_eq!( section("21 Nov 2023 07:07:07 -0000"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 7, 7, 7) + .unwrap() + ) + )), ); assert_eq!( section("21 Nov 2023 07:07:07 Z"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 7, 7, 7) + .unwrap() + ) + )), ); assert_eq!( section("21 Nov 2023 07:07:07 GMT"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 7, 7, 7) + .unwrap() + ) + )), ); assert_eq!( section("21 Nov 2023 07:07:07 UT"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 7, 7, 7) + .unwrap() + ) + )), ); assert_eq!( section("21 Nov 2023 07:07:07 UTC"), - Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), + Ok(( + "", + Some( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 7, 7, 7) + .unwrap() + ) + )), ); } @@ -399,7 +565,15 @@ mod tests { fn test_section_usa() { assert_eq!( section("21 Nov 2023 4:4:4 CST"), - Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(2023, 11, 21, 4, 4, 4).unwrap()))), + Ok(( + "", + Some( + FixedOffset::west_opt(6 * HOUR) + .unwrap() + .with_ymd_and_hms(2023, 11, 21, 4, 4, 4) + .unwrap() + ) + )), ); } } diff --git a/src/fragments/eager.rs b/src/fragments/eager.rs index ce602ea..cadeaa6 100644 --- a/src/fragments/eager.rs +++ b/src/fragments/eager.rs @@ -1,11 +1,9 @@ -use chrono::{DateTime, FixedOffset}; -use crate::fragments::model::{ - MailboxList, MailboxRef, AddressList, - MessageId, MessageIdList}; -use crate::fragments::misc_token::{Unstructured, PhraseList}; -use crate::fragments::trace::ReceivedLog; -use crate::fragments::lazy::Field as Lazy; use crate::error::IMFError; +use crate::fragments::lazy::Field as Lazy; +use crate::fragments::misc_token::{PhraseList, Unstructured}; +use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList}; +use crate::fragments::trace::ReceivedLog; +use chrono::{DateTime, FixedOffset}; #[derive(Debug, PartialEq)] pub enum Field<'a> { diff --git a/src/fragments/identification.rs b/src/fragments/identification.rs index 912e717..8ba4b89 100644 --- a/src/fragments/identification.rs +++ b/src/fragments/identification.rs @@ -1,18 +1,18 @@ use nom::{ - IResult, branch::alt, - bytes::complete::{take_while, tag}, + bytes::complete::{tag, take_while}, combinator::opt, multi::many1, sequence::{delimited, pair, tuple}, + IResult, }; +use crate::error::IMFError; use crate::fragments::lazy; -use crate::fragments::whitespace::cfws; -use crate::fragments::words::dot_atom_text; use crate::fragments::mailbox::is_dtext; use crate::fragments::model::{MessageId, MessageIdList}; -use crate::error::IMFError; +use crate::fragments::whitespace::cfws; +use crate::fragments::words::dot_atom_text; impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> { type Error = IMFError<'a>; @@ -45,12 +45,12 @@ pub fn msg_id(input: &str) -> IResult<&str, MessageId> { tuple((id_left, tag("@"), id_right)), pair(tag(">"), opt(cfws)), )(input)?; - Ok((input, MessageId{ left, right })) + Ok((input, MessageId { left, right })) } // Missing obsolete fn id_left(input: &str) -> IResult<&str, &str> { - dot_atom_text(input) + dot_atom_text(input) } // Missing obsolete @@ -70,7 +70,13 @@ mod tests { fn test_msg_id() { assert_eq!( msg_id("<5678.21-Nov-1997@example.com>"), - Ok(("", MessageId{left: "5678.21-Nov-1997", right: "example.com"})), + Ok(( + "", + MessageId { + left: "5678.21-Nov-1997", + right: "example.com" + } + )), ); } } diff --git a/src/fragments/lazy.rs b/src/fragments/lazy.rs index a5a055c..e23dfe4 100644 --- a/src/fragments/lazy.rs +++ b/src/fragments/lazy.rs @@ -1,10 +1,10 @@ use std::convert::From; use nom::{ - IResult, - bytes::complete::{take_while1, tag}, + bytes::complete::{tag, take_while1}, character::complete::space0, sequence::{terminated, tuple}, + IResult, }; #[derive(Debug, PartialEq)] @@ -98,34 +98,38 @@ impl<'a> From<&'a str> for Field<'a> { fn field_name(input: &str) -> IResult<&str, &str> { terminated( take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'), - tuple((space0, tag(":"), space0)) + tuple((space0, tag(":"), space0)), )(input) } fn correct_field(input: &str) -> IResult<&str, Field> { - field_name(input) - .map(|(rest, name)| ("", match name.to_lowercase().as_ref() { - "date" => Date(DateTime(rest)), + field_name(input).map(|(rest, name)| { + ( + "", + match name.to_lowercase().as_ref() { + "date" => Date(DateTime(rest)), - "from" => From(MailboxList(rest)), - "sender" => Sender(Mailbox(rest)), - "reply-to" => ReplyTo(AddressList(rest)), + "from" => From(MailboxList(rest)), + "sender" => Sender(Mailbox(rest)), + "reply-to" => ReplyTo(AddressList(rest)), - "to" => To(AddressList(rest)), - "cc" => Cc(AddressList(rest)), - "bcc" => Bcc(NullableAddressList(rest)), + "to" => To(AddressList(rest)), + "cc" => Cc(AddressList(rest)), + "bcc" => Bcc(NullableAddressList(rest)), - "message-id" => MessageID(Identifier(rest)), - "in-reply-to" => InReplyTo(IdentifierList(rest)), - "references" => References(IdentifierList(rest)), + "message-id" => MessageID(Identifier(rest)), + "in-reply-to" => InReplyTo(IdentifierList(rest)), + "references" => References(IdentifierList(rest)), - "subject" => Subject(Unstructured(rest)), - "comments" => Comments(Unstructured(rest)), - "keywords" => Keywords(PhraseList(rest)), + "subject" => Subject(Unstructured(rest)), + "comments" => Comments(Unstructured(rest)), + "keywords" => Keywords(PhraseList(rest)), - "return-path" => ReturnPath(Mailbox(rest)), - "received" => Received(ReceivedLog(rest)), + "return-path" => ReturnPath(Mailbox(rest)), + "received" => Received(ReceivedLog(rest)), - _ => Optional(name, Unstructured(rest)), - })) + _ => Optional(name, Unstructured(rest)), + }, + ) + }) } diff --git a/src/fragments/mailbox.rs b/src/fragments/mailbox.rs index 337bacd..6860c7c 100644 --- a/src/fragments/mailbox.rs +++ b/src/fragments/mailbox.rs @@ -1,19 +1,19 @@ -use std::borrow::Cow; use nom::{ - IResult, branch::alt, - bytes::complete::{tag, is_a}, + bytes::complete::{is_a, tag}, character::complete::satisfy, - combinator::{into,map,opt,recognize}, - multi::{separated_list1, fold_many0, many0}, - sequence::{delimited,pair,preceded,terminated,tuple}, + combinator::{into, map, opt, recognize}, + multi::{fold_many0, many0, separated_list1}, + sequence::{delimited, pair, preceded, terminated, tuple}, + IResult, }; +use std::borrow::Cow; -use crate::fragments::model::{MailboxRef, AddrSpec}; use crate::fragments::misc_token::{phrase, word}; +use crate::fragments::model::{AddrSpec, MailboxRef}; +use crate::fragments::quoted::quoted_string; use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl}; use crate::fragments::words::{atom, dot_atom}; -use crate::fragments::quoted::quoted_string; /// Mailbox /// @@ -43,10 +43,10 @@ fn name_addr(input: &str) -> IResult<&str, MailboxRef> { /// obs-angle-addr /// ``` pub fn angle_addr(input: &str) -> IResult<&str, MailboxRef> { - delimited( - tuple((opt(cfws), tag("<"), opt(obs_route))), - into(addr_spec), - pair(tag(">"), opt(cfws)), + delimited( + tuple((opt(cfws), tag("<"), opt(obs_route))), + into(addr_spec), + pair(tag(">"), opt(cfws)), )(input) } @@ -61,7 +61,10 @@ fn obs_route(input: &str) -> IResult<&str, Vec> { /// ``` fn obs_domain_list(input: &str) -> IResult<&str, Vec> { //@FIXME complexity is O(n) in term of domains here. - let (input, head) = preceded(pair(many0(alt((recognize(cfws), tag(",")))), tag("@")), obs_domain)(input)?; + let (input, head) = preceded( + pair(many0(alt((recognize(cfws), tag(",")))), tag("@")), + obs_domain, + )(input)?; let (input, mut rest) = obs_domain_list_rest(input)?; rest.insert(0, head); Ok((input, rest)) @@ -73,7 +76,7 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec> { pair(tag(","), opt(cfws)), opt(preceded(tag("@"), obs_domain)), )), - |v: Vec>| v.into_iter().flatten().collect() + |v: Vec>| v.into_iter().flatten().collect(), )(input) } @@ -86,9 +89,13 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec> { /// so I force obsolete for now... pub fn addr_spec(input: &str) -> IResult<&str, AddrSpec> { map( - tuple((obs_local_part, tag("@"), obs_domain, many0(pair(tag("@"), obs_domain)))), - |(local_part, _, domain, _)| - AddrSpec { local_part, domain }, + tuple(( + obs_local_part, + tag("@"), + obs_domain, + many0(pair(tag("@"), obs_domain)), + )), + |(local_part, _, domain, _)| AddrSpec { local_part, domain }, )(input) } @@ -108,7 +115,7 @@ fn strict_local_part(input: &str) -> IResult<&str, String> { /// This is found in Enron emails and supported by Gmail. /// /// Obsolete local part is a superset of strict_local_part: -/// anything that is parsed by strict_local_part will be parsed by +/// anything that is parsed by strict_local_part will be parsed by /// obs_local_part. /// /// ```abnf @@ -118,7 +125,8 @@ fn obs_local_part(input: &str) -> IResult<&str, String> { fold_many0( alt((map(is_a("."), Cow::Borrowed), word)), String::new, - |acc, chunk| acc + &chunk)(input) + |acc, chunk| acc + &chunk, + )(input) } /// Domain @@ -140,7 +148,10 @@ pub fn strict_domain(input: &str) -> IResult<&str, String> { /// obs-domain = atom *("." atom) / domain-literal /// ``` pub fn obs_domain(input: &str) -> IResult<&str, String> { - alt((map(separated_list1(tag("."), atom), |v| v.join(".")), domain_litteral))(input) + alt(( + map(separated_list1(tag("."), atom), |v| v.join(".")), + domain_litteral, + ))(input) } /// Domain litteral @@ -152,15 +163,16 @@ fn domain_litteral(input: &str) -> IResult<&str, String> { delimited( pair(opt(cfws), tag("[")), inner_domain_litteral, - pair(tag("]"), opt(cfws)) + pair(tag("]"), opt(cfws)), )(input) } fn inner_domain_litteral(input: &str) -> IResult<&str, String> { - let (input, (cvec, maybe_wsp)) = pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?; - let mut domain = cvec.iter().fold( - String::with_capacity(16), - |mut acc, (maybe_wsp, c)| { + let (input, (cvec, maybe_wsp)) = + pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?; + let mut domain = cvec + .iter() + .fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| { if let Some(wsp) = maybe_wsp { acc.push(*wsp); } @@ -174,7 +186,6 @@ fn inner_domain_litteral(input: &str) -> IResult<&str, String> { Ok((input, domain)) } - fn is_strict_dtext(c: char) -> bool { (c >= '\x21' && c <= '\x5A') || (c >= '\x5E' && c <= '\x7E') || !c.is_ascii() } @@ -188,7 +199,7 @@ fn is_strict_dtext(c: char) -> bool { /// obs-dtext = obs-NO-WS-CTL / quoted-pair /// ``` pub fn is_dtext(c: char) -> bool { - is_strict_dtext(c) || is_obs_no_ws_ctl(c) + is_strict_dtext(c) || is_obs_no_ws_ctl(c) //@FIXME does not support quoted pair yet while RFC requires it } @@ -198,89 +209,213 @@ mod tests { #[test] fn test_addr_spec() { - assert_eq!(addr_spec("alice@example.com"), Ok(("", AddrSpec{local_part: "alice".into(), domain: "example.com".into() }))); + assert_eq!( + addr_spec("alice@example.com"), + Ok(( + "", + AddrSpec { + local_part: "alice".into(), + domain: "example.com".into() + } + )) + ); - assert_eq!(addr_spec("jsmith@[192.168.2.1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "192.168.2.1".into() }))); - assert_eq!(addr_spec("jsmith@[IPv6:2001:db8::1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "IPv6:2001:db8::1".into() }))); + assert_eq!( + addr_spec("jsmith@[192.168.2.1]"), + Ok(( + "", + AddrSpec { + local_part: "jsmith".into(), + domain: "192.168.2.1".into() + } + )) + ); + assert_eq!( + addr_spec("jsmith@[IPv6:2001:db8::1]"), + Ok(( + "", + AddrSpec { + local_part: "jsmith".into(), + domain: "IPv6:2001:db8::1".into() + } + )) + ); // UTF-8 - assert_eq!(addr_spec("用户@例子.广告"), Ok(("", AddrSpec{local_part: "用户".into(), domain: "例子.广告".into()}))); + assert_eq!( + addr_spec("用户@例子.广告"), + Ok(( + "", + AddrSpec { + local_part: "用户".into(), + domain: "例子.广告".into() + } + )) + ); // ASCII Edge cases assert_eq!( addr_spec("user+mailbox/department=shipping@example.com"), - Ok(("", AddrSpec{local_part: "user+mailbox/department=shipping".into(), domain: "example.com".into()}))); + Ok(( + "", + AddrSpec { + local_part: "user+mailbox/department=shipping".into(), + domain: "example.com".into() + } + )) + ); assert_eq!( addr_spec("!#$%&'*+-/=?^_`.{|}~@example.com"), - Ok(("", AddrSpec{local_part: "!#$%&'*+-/=?^_`.{|}~".into(), domain: "example.com".into()}))); + Ok(( + "", + AddrSpec { + local_part: "!#$%&'*+-/=?^_`.{|}~".into(), + domain: "example.com".into() + } + )) + ); assert_eq!( addr_spec(r#""Abc@def"@example.com"#), - Ok(("", AddrSpec{local_part: "Abc@def".into(), domain: "example.com".into()}))); - assert_eq!(addr_spec(r#""Fred\ Bloggs"@example.com"#), Ok(("", AddrSpec{local_part: "Fred Bloggs".into(), domain: "example.com".into()}))); - assert_eq!(addr_spec(r#""Joe.\\Blow"@example.com"#), Ok(("", AddrSpec{local_part: r#"Joe.\Blow"#.into(), domain: "example.com".into()}))); + Ok(( + "", + AddrSpec { + local_part: "Abc@def".into(), + domain: "example.com".into() + } + )) + ); + assert_eq!( + addr_spec(r#""Fred\ Bloggs"@example.com"#), + Ok(( + "", + AddrSpec { + local_part: "Fred Bloggs".into(), + domain: "example.com".into() + } + )) + ); + assert_eq!( + addr_spec(r#""Joe.\\Blow"@example.com"#), + Ok(( + "", + AddrSpec { + local_part: r#"Joe.\Blow"#.into(), + domain: "example.com".into() + } + )) + ); } #[test] fn test_mailbox() { - assert_eq!(mailbox(r#""Joe Q. Public" "#), Ok(("", MailboxRef { - name: Some("Joe Q. Public".into()), - addrspec: AddrSpec { - local_part: "john.q.public".into(), - domain: "example.com".into(), - } - }))); + assert_eq!( + mailbox(r#""Joe Q. Public" "#), + Ok(( + "", + MailboxRef { + name: Some("Joe Q. Public".into()), + addrspec: AddrSpec { + local_part: "john.q.public".into(), + domain: "example.com".into(), + } + } + )) + ); - assert_eq!(mailbox(r#"Mary Smith "#), Ok(("", MailboxRef { - name: Some("Mary Smith".into()), - addrspec: AddrSpec { - local_part: "mary".into(), - domain: "x.test".into(), - } - }))); + assert_eq!( + mailbox(r#"Mary Smith "#), + Ok(( + "", + MailboxRef { + name: Some("Mary Smith".into()), + addrspec: AddrSpec { + local_part: "mary".into(), + domain: "x.test".into(), + } + } + )) + ); - assert_eq!(mailbox(r#"jdoe@example.org"#), Ok(("", MailboxRef { - name: None, - addrspec: AddrSpec { - local_part: "jdoe".into(), - domain: "example.org".into(), - } - }))); + assert_eq!( + mailbox(r#"jdoe@example.org"#), + Ok(( + "", + MailboxRef { + name: None, + addrspec: AddrSpec { + local_part: "jdoe".into(), + domain: "example.org".into(), + } + } + )) + ); - assert_eq!(mailbox(r#"Who? "#), Ok(("", MailboxRef { - name: Some("Who?".into()), - addrspec: AddrSpec { - local_part: "one".into(), - domain: "y.test".into(), - } - }))); + assert_eq!( + mailbox(r#"Who? "#), + Ok(( + "", + MailboxRef { + name: Some("Who?".into()), + addrspec: AddrSpec { + local_part: "one".into(), + domain: "y.test".into(), + } + } + )) + ); - assert_eq!(mailbox(r#""#), Ok(("", MailboxRef { - name: None, - addrspec: AddrSpec { - local_part: "boss".into(), - domain: "nil.test".into(), - } - }))); + assert_eq!( + mailbox(r#""#), + Ok(( + "", + MailboxRef { + name: None, + addrspec: AddrSpec { + local_part: "boss".into(), + domain: "nil.test".into(), + } + } + )) + ); - assert_eq!(mailbox(r#""Giant; \"Big\" Box" "#), Ok(("", MailboxRef { - name: Some(r#"Giant; "Big" Box"#.into()), - addrspec: AddrSpec { - local_part: "sysservices".into(), - domain: "example.net".into(), - } - }))); + assert_eq!( + mailbox(r#""Giant; \"Big\" Box" "#), + Ok(( + "", + MailboxRef { + name: Some(r#"Giant; "Big" Box"#.into()), + addrspec: AddrSpec { + local_part: "sysservices".into(), + domain: "example.net".into(), + } + } + )) + ); } #[test] fn test_obs_domain_list() { - assert_eq!(obs_domain_list(r#"(shhh it's coming) + assert_eq!( + obs_domain_list( + r#"(shhh it's coming) , (not yet) @33+4.com,,,, ,,,, (again) - @example.com,@yep.com,@a,@b,,,@c"#), - Ok(("", vec!["33+4.com".into(), "example.com".into(), "yep.com".into(), "a".into(), "b".into(), "c".into()])) + @example.com,@yep.com,@a,@b,,,@c"# + ), + Ok(( + "", + vec![ + "33+4.com".into(), + "example.com".into(), + "yep.com".into(), + "a".into(), + "b".into(), + "c".into() + ] + )) ); } @@ -288,10 +423,13 @@ mod tests { fn test_enron1() { assert_eq!( addr_spec("a..howard@enron.com"), - Ok(("", AddrSpec { - local_part: "a..howard".into(), - domain: "enron.com".into(), - })) + Ok(( + "", + AddrSpec { + local_part: "a..howard".into(), + domain: "enron.com".into(), + } + )) ); } @@ -299,10 +437,13 @@ mod tests { fn test_enron2() { assert_eq!( addr_spec(".nelson@enron.com"), - Ok(("", AddrSpec { - local_part: ".nelson".into(), - domain: "enron.com".into(), - })) + Ok(( + "", + AddrSpec { + local_part: ".nelson".into(), + domain: "enron.com".into(), + } + )) ); } @@ -310,25 +451,30 @@ mod tests { fn test_enron3() { assert_eq!( addr_spec("ecn2760.conf.@enron.com"), - Ok(("", AddrSpec { - local_part: "ecn2760.conf.".into(), - domain: "enron.com".into(), - })) + Ok(( + "", + AddrSpec { + local_part: "ecn2760.conf.".into(), + domain: "enron.com".into(), + } + )) ); } - #[test] fn test_enron4() { assert_eq!( mailbox(r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#), - Ok(("", MailboxRef { - name: None, - addrspec: AddrSpec { - local_part: "mark_kopinski/intl/acim/americancentury".into(), - domain: "americancentury.com".into(), + Ok(( + "", + MailboxRef { + name: None, + addrspec: AddrSpec { + local_part: "mark_kopinski/intl/acim/americancentury".into(), + domain: "americancentury.com".into(), + } } - })) + )) ); } } diff --git a/src/fragments/misc_token.rs b/src/fragments/misc_token.rs index 19d7cdb..3f18213 100644 --- a/src/fragments/misc_token.rs +++ b/src/fragments/misc_token.rs @@ -1,19 +1,19 @@ -use std::borrow::Cow; use nom::{ - IResult, branch::alt, - bytes::complete::{take_while1, tag}, + bytes::complete::{tag, take_while1}, character::complete::space0, combinator::{into, opt}, multi::{many0, many1, separated_list1}, sequence::tuple, + IResult, }; +use std::borrow::Cow; +use crate::error::IMFError; use crate::fragments::lazy; use crate::fragments::quoted::quoted_string; use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl}; use crate::fragments::words::{atom, is_vchar}; -use crate::error::IMFError; #[derive(Debug, PartialEq, Default)] pub struct Unstructured(pub String); @@ -28,7 +28,7 @@ impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured { unstructured(input.0) .map(|(_, v)| Unstructured(v)) .map_err(|e| IMFError::Unstructured(e)) - } + } } impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList { @@ -101,7 +101,13 @@ mod tests { #[test] fn test_phrase() { assert_eq!(phrase("hello world"), Ok(("", "hello world".into()))); - assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into()))); - assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into()))); + assert_eq!( + phrase("salut \"le\" monde"), + Ok(("", "salut le monde".into())) + ); + assert_eq!( + phrase("fin\r\n du\r\nmonde"), + Ok(("\r\nmonde", "fin du".into())) + ); } } diff --git a/src/fragments/mod.rs b/src/fragments/mod.rs index c9e472e..4034c3e 100644 --- a/src/fragments/mod.rs +++ b/src/fragments/mod.rs @@ -2,17 +2,17 @@ pub mod model; // Generic +pub mod misc_token; +mod quoted; pub mod whitespace; mod words; -mod quoted; -pub mod misc_token; // Header specific -mod mailbox; mod address; -mod identification; -pub mod trace; mod datetime; -pub mod lazy; pub mod eager; +mod identification; +pub mod lazy; +mod mailbox; pub mod section; +pub mod trace; diff --git a/src/fragments/model.rs b/src/fragments/model.rs index e28eb2d..fb0fa30 100644 --- a/src/fragments/model.rs +++ b/src/fragments/model.rs @@ -1,5 +1,5 @@ +use chrono::{DateTime, FixedOffset}; use std::collections::HashMap; -use chrono::{DateTime,FixedOffset}; #[derive(Debug, PartialEq)] pub struct AddrSpec { @@ -126,7 +126,7 @@ pub struct HeaderSection<'a> { pub msg_id: Option>, pub in_reply_to: Vec>, pub references: Vec>, - + // 3.6.5. Informational Fields pub subject: Option, pub comments: Vec, diff --git a/src/fragments/quoted.rs b/src/fragments/quoted.rs index 6a593a0..261f499 100644 --- a/src/fragments/quoted.rs +++ b/src/fragments/quoted.rs @@ -1,14 +1,14 @@ use nom::{ - IResult, branch::alt, bytes::complete::tag, character::complete::{anychar, satisfy}, combinator::opt, multi::many0, sequence::{pair, preceded}, + IResult, }; -use crate::fragments::whitespace::{fws, cfws, is_obs_no_ws_ctl}; +use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl}; /// Quoted pair /// @@ -53,29 +53,29 @@ fn qcontent(input: &str) -> IResult<&str, char> { /// [CFWS] /// ``` pub fn quoted_string(input: &str) -> IResult<&str, String> { - let (input, _) = opt(cfws)(input)?; - let (input, _) = tag("\"")(input)?; - let (input, content) = many0(pair(opt(fws), qcontent))(input)?; + let (input, _) = opt(cfws)(input)?; + let (input, _) = tag("\"")(input)?; + let (input, content) = many0(pair(opt(fws), qcontent))(input)?; - // Rebuild string - let mut qstring = content.iter().fold( - String::with_capacity(16), - |mut acc, (maybe_wsp, c)| { - if let Some(wsp) = maybe_wsp { - acc.push(*wsp); - } - acc.push(*c); - acc - }); + // Rebuild string + let mut qstring = content + .iter() + .fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| { + if let Some(wsp) = maybe_wsp { + acc.push(*wsp); + } + acc.push(*c); + acc + }); - let (input, maybe_wsp) = opt(fws)(input)?; - if let Some(wsp) = maybe_wsp { - qstring.push(wsp); - } + let (input, maybe_wsp) = opt(fws)(input)?; + if let Some(wsp) = maybe_wsp { + qstring.push(wsp); + } - let (input, _) = tag("\"")(input)?; - let (input, _) = opt(cfws)(input)?; - Ok((input, qstring)) + let (input, _) = tag("\"")(input)?; + let (input, _) = opt(cfws)(input)?; + Ok((input, qstring)) } #[cfg(test)] @@ -84,7 +84,13 @@ mod tests { #[test] fn test_quoted_string() { - assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string()))); - assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string()))); + assert_eq!( + quoted_string(" \"hello\\\"world\" "), + Ok(("", "hello\"world".to_string())) + ); + assert_eq!( + quoted_string("\"hello\r\n world\""), + Ok(("", "hello world".to_string())) + ); } } diff --git a/src/fragments/section.rs b/src/fragments/section.rs index 6ebb5a9..8a435b9 100644 --- a/src/fragments/section.rs +++ b/src/fragments/section.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; -use chrono::{DateTime, FixedOffset}; -use crate::fragments::model::{MailboxRef,MessageId, AddressRef}; -use crate::fragments::misc_token::{Unstructured, PhraseList}; -use crate::fragments::trace::ReceivedLog; use crate::fragments::eager::Field; use crate::fragments::lazy; +use crate::fragments::misc_token::{PhraseList, Unstructured}; +use crate::fragments::model::{AddressRef, MailboxRef, MessageId}; +use crate::fragments::trace::ReceivedLog; +use chrono::{DateTime, FixedOffset}; #[derive(Debug, PartialEq, Default)] pub struct Section<'a> { @@ -26,7 +26,7 @@ pub struct Section<'a> { pub msg_id: Option<&'a MessageId<'a>>, pub in_reply_to: Vec<&'a MessageId<'a>>, pub references: Vec<&'a MessageId<'a>>, - + // 3.6.5. Informational Fields pub subject: Option<&'a Unstructured>, pub comments: Vec<&'a Unstructured>, @@ -48,7 +48,7 @@ pub struct Section<'a> { //@FIXME min and max limits are not enforced, // it may result in missing data or silently overriden data. impl<'a> FromIterator<&'a Field<'a>> for Section<'a> { - fn from_iter>>(iter: I) -> Self { + fn from_iter>>(iter: I) -> Self { let mut section = Section::default(); for field in iter { match field { @@ -67,11 +67,12 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> { Field::Keywords(v) => section.keywords.push(v), Field::ReturnPath(v) => section.return_path.push(v), Field::Received(v) => section.received.push(v), - Field::Optional(k, v) => { section.optional.insert(k, v); }, + Field::Optional(k, v) => { + section.optional.insert(k, v); + } Field::Rescue(v) => section.unparsed.push(v), } } section } } - diff --git a/src/fragments/trace.rs b/src/fragments/trace.rs index a25170d..5dc99d0 100644 --- a/src/fragments/trace.rs +++ b/src/fragments/trace.rs @@ -1,13 +1,13 @@ +use crate::error::IMFError; +use crate::fragments::{datetime, lazy, mailbox, misc_token, model, whitespace}; use nom::{ - IResult, branch::alt, bytes::complete::tag, combinator::{map, opt, recognize}, multi::many0, sequence::tuple, + IResult, }; -use crate::fragments::{datetime, mailbox, model, misc_token, whitespace, lazy}; -use crate::error::IMFError; #[derive(Debug, PartialEq)] pub struct ReceivedLog<'a>(pub &'a str); @@ -29,15 +29,12 @@ pub fn received_body(input: &str) -> IResult<&str, &str> { tag(";"), datetime::section, )), - |(tokens, _, _)| tokens, + |(tokens, _, _)| tokens, )(input) } pub fn return_path_body(input: &str) -> IResult<&str, Option> { - alt(( - map(mailbox::angle_addr, |a| Some(a)), - empty_path - ))(input) + alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input) } fn empty_path(input: &str) -> IResult<&str, Option> { @@ -57,11 +54,10 @@ fn received_tokens(input: &str) -> IResult<&str, &str> { recognize(mailbox::angle_addr), recognize(mailbox::addr_spec), recognize(mailbox::obs_domain), - recognize(misc_token::word), + recognize(misc_token::word), ))(input) } - #[cfg(test)] mod tests { use super::*; @@ -76,11 +72,14 @@ mod tests { assert_eq!( received_body(hdrs), - Ok(("", r#"from smtp.example.com ([10.83.2.2]) + Ok(( + "", + r#"from smtp.example.com ([10.83.2.2]) by server with LMTP id xxxxxxxxx (envelope-from ) - for "#)) + for "# + )) ); } } diff --git a/src/fragments/whitespace.rs b/src/fragments/whitespace.rs index 19ec292..4acb8e8 100644 --- a/src/fragments/whitespace.rs +++ b/src/fragments/whitespace.rs @@ -1,13 +1,13 @@ +use crate::fragments::quoted::quoted_pair; use nom::{ - IResult, branch::alt, bytes::complete::tag, character::complete::{crlf, satisfy, space0, space1}, - combinator::{recognize, opt}, + combinator::{opt, recognize}, multi::{many0, many1}, sequence::tuple, + IResult, }; -use crate::fragments::quoted::quoted_pair; // --- whitespaces and comments @@ -35,12 +35,11 @@ pub fn fws(input: &str) -> IResult<&str, char> { Ok((input, ' ')) } fn fold_marker(input: &str) -> IResult<&str, &str> { - let (input, _) = space0(input)?; - let (input, _) = perm_crlf(input)?; - space1(input) + let (input, _) = space0(input)?; + let (input, _) = perm_crlf(input)?; + space1(input) } - /// Folding White Space with Comment /// /// Note: we drop the comments for now... @@ -76,7 +75,7 @@ pub fn comment(input: &str) -> IResult<&str, ()> { } pub fn ccontent(input: &str) -> IResult<&str, &str> { - alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input) + alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input) } pub fn ctext(input: &str) -> IResult<&str, char> { @@ -92,14 +91,17 @@ pub fn ctext(input: &str) -> IResult<&str, char> { /// obs-ctext ///``` pub fn is_restr_ctext(c: char) -> bool { - (c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii() + (c >= '\x21' && c <= '\x27') + || (c >= '\x2A' && c <= '\x5B') + || (c >= '\x5D' && c <= '\x7E') + || !c.is_ascii() } pub fn is_ctext(c: char) -> bool { is_restr_ctext(c) || is_obs_no_ws_ctl(c) } -/// US ASCII control characters without effect +/// US ASCII control characters without effect /// /// ```abnf /// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control @@ -109,7 +111,11 @@ pub fn is_ctext(c: char) -> bool { /// %d127 ; white space characters /// ``` pub fn is_obs_no_ws_ctl(c: char) -> bool { - (c >= '\x01' && c <= '\x08') || c == '\x0b' || c == '\x0b' || (c >= '\x0e' && c<= '\x1f') || c == '\x7F' + (c >= '\x01' && c <= '\x08') + || c == '\x0b' + || c == '\x0b' + || (c >= '\x0e' && c <= '\x1f') + || c == '\x7F' } #[cfg(test)] @@ -133,8 +139,20 @@ mod tests { #[test] fn test_cfws() { - assert_eq!(cfws("(A nice \\) chap) "), Ok(("", "(A nice \\) chap) "))); - assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)"))); - assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) "))); + assert_eq!( + cfws("(A nice \\) chap) "), + Ok(( + "", + "(A nice \\) chap) " + )) + ); + assert_eq!( + cfws("(Chris's host.)public.example>,"), + Ok(("public.example>,", "(Chris's host.)")) + ); + assert_eq!( + cfws("(double (comment) is fun) wouch"), + Ok(("wouch", "(double (comment) is fun) ")) + ); } } diff --git a/src/fragments/words.rs b/src/fragments/words.rs index 1cc8d1c..acc5584 100644 --- a/src/fragments/words.rs +++ b/src/fragments/words.rs @@ -1,16 +1,15 @@ +use crate::fragments::whitespace::cfws; use nom::{ - IResult, bytes::complete::{tag, take_while1}, - combinator::{recognize, opt}, + combinator::{opt, recognize}, multi::many0, sequence::{delimited, pair}, + IResult, }; -use crate::fragments::whitespace::cfws; - /// VCHAR definition pub fn is_vchar(c: char) -> bool { - (c >= '\x21' && c <= '\x7E') || !c.is_ascii() + (c >= '\x21' && c <= '\x7E') || !c.is_ascii() } /// Sequence of visible chars with the UTF-8 extension @@ -23,7 +22,7 @@ pub fn is_vchar(c: char) -> bool { ///``` #[allow(dead_code)] pub fn vchar_seq(input: &str) -> IResult<&str, &str> { - take_while1(is_vchar)(input) + take_while1(is_vchar)(input) } /// Atom allowed characters @@ -31,7 +30,7 @@ fn is_atext(c: char) -> bool { c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii() } -/// Atom +/// Atom /// /// `[CFWS] 1*atext [CFWS]` pub fn atom(input: &str) -> IResult<&str, &str> { @@ -42,7 +41,10 @@ pub fn atom(input: &str) -> IResult<&str, &str> { /// /// `1*atext *("." 1*atext)` pub fn dot_atom_text(input: &str) -> IResult<&str, &str> { - recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input) + recognize(pair( + take_while1(is_atext), + many0(pair(tag("."), take_while1(is_atext))), + ))(input) } /// dot-atom @@ -54,13 +56,19 @@ pub fn dot_atom(input: &str) -> IResult<&str, &str> { #[allow(dead_code)] pub fn is_special(c: char) -> bool { - c == '(' || c == ')' || - c == '<' || c == '>' || - c == '[' || c == ']' || - c == ':' || c == ';' || - c == '@' || c == '\\' || - c == ',' || c == '.' || - c == '"' + c == '(' + || c == ')' + || c == '<' + || c == '>' + || c == '[' + || c == ']' + || c == ':' + || c == ';' + || c == '@' + || c == '\\' + || c == ',' + || c == '.' + || c == '"' } #[cfg(test)] @@ -84,16 +92,25 @@ mod tests { #[test] fn test_atom() { - assert_eq!(atom("(skip) imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec"))); + assert_eq!( + atom("(skip) imf_codec (hidden) aerogramme"), + Ok(("aerogramme", "imf_codec")) + ); } #[test] fn test_dot_atom_text() { - assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io"))); + assert_eq!( + dot_atom_text("quentin.dufour.io abcdef"), + Ok((" abcdef", "quentin.dufour.io")) + ); } #[test] fn test_dot_atom() { - assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io"))); + assert_eq!( + dot_atom(" (skip) quentin.dufour.io abcdef"), + Ok(("abcdef", "quentin.dufour.io")) + ); } } diff --git a/src/multipass/extract_fields.rs b/src/multipass/extract_fields.rs index fc401af..1f4f6a2 100644 --- a/src/multipass/extract_fields.rs +++ b/src/multipass/extract_fields.rs @@ -1,16 +1,16 @@ use nom::{ - IResult, - character::complete::space1, bytes::complete::is_not, + character::complete::space1, combinator::{all_consuming, recognize}, multi::{many0, many1}, sequence::{pair, tuple}, + IResult, }; use crate::error::IMFError; use crate::fragments::whitespace; -use crate::multipass::guess_charset; use crate::multipass::field_lazy; +use crate::multipass::guess_charset; #[derive(Debug, PartialEq)] pub struct Parsed<'a> { @@ -21,7 +21,10 @@ pub struct Parsed<'a> { pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result, IMFError<'a>> { all_consuming(many0(foldable_line))(&gcha.header) .map_err(|e| IMFError::ExtractFields(e)) - .map(|(_, fields)| Parsed { fields, body: gcha.body }) + .map(|(_, fields)| Parsed { + fields, + body: gcha.body, + }) } impl<'a> Parsed<'a> { @@ -35,11 +38,12 @@ impl<'a> Parsed<'a> { /// ``` fn foldable_line(input: &str) -> IResult<&str, &str> { recognize(tuple(( - is_not("\r\n"), + is_not("\r\n"), many0(pair( - many1(pair(whitespace::perm_crlf, space1)), - is_not("\r\n"))), - whitespace::perm_crlf + many1(pair(whitespace::perm_crlf, space1)), + is_not("\r\n"), + )), + whitespace::perm_crlf, )))(input) } diff --git a/src/multipass/field_eager.rs b/src/multipass/field_eager.rs index e11a5ba..aec4b6e 100644 --- a/src/multipass/field_eager.rs +++ b/src/multipass/field_eager.rs @@ -10,7 +10,8 @@ pub struct Parsed<'a> { pub fn new<'a>(p: &'a field_lazy::Parsed<'a>) -> Parsed<'a> { Parsed { - fields: p.fields + fields: p + .fields .iter() .filter_map(|entry| entry.try_into().ok()) .collect(), @@ -33,47 +34,56 @@ mod tests { #[test] fn test_field_body() { - assert_eq!(new(&field_lazy::Parsed { - fields: vec![ - lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")), - lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), - ], - body: b"Hello world!", - }), - Parsed { - fields: vec![ - eager::Field::From(vec![ - model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "hello".into(), - domain: "world.com".into() - } - }, - model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "alice".into(), - domain: "wonderlands.com".into() - } - }, - ]), - eager::Field::Date( - FixedOffset::east_opt(0) - .unwrap() - .with_ymd_and_hms(1997, 03, 12, 7, 33, 25) - .unwrap() - ), - ], - body: b"Hello world!", - }); + assert_eq!( + new(&field_lazy::Parsed { + fields: vec![ + lazy::Field::From(lazy::MailboxList( + "hello@world.com,\r\n\talice@wonderlands.com\r\n" + )), + lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), + ], + body: b"Hello world!", + }), + Parsed { + fields: vec![ + eager::Field::From(vec![ + model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "hello".into(), + domain: "world.com".into() + } + }, + model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "alice".into(), + domain: "wonderlands.com".into() + } + }, + ]), + eager::Field::Date( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(1997, 03, 12, 7, 33, 25) + .unwrap() + ), + ], + body: b"Hello world!", + } + ); } - use crate::multipass::extract_fields; use crate::fragments::misc_token; + use crate::multipass::extract_fields; fn lazy_eager(input: &str, func: F) - where F: Fn(&eager::Field) { - let field = extract_fields::Parsed { fields: vec![input], body: b""}; + where + F: Fn(&eager::Field), + { + let field = extract_fields::Parsed { + fields: vec![input], + body: b"", + }; let lazy = field_lazy::new(&field); let eager = new(&lazy); func(eager.fields.first().unwrap()) @@ -83,16 +93,18 @@ mod tests { fn test_from() { lazy_eager( "From: \"Joe Q. Public\" \r\n", - |from| assert_eq!( - from, - &eager::Field::From(vec![model::MailboxRef { - name: Some("Joe Q. Public".into()), - addrspec: model::AddrSpec { - local_part: "john.q.public".into(), - domain: "example.com".into(), - } - }]) - ) + |from| { + assert_eq!( + from, + &eager::Field::From(vec![model::MailboxRef { + name: Some("Joe Q. Public".into()), + addrspec: model::AddrSpec { + local_part: "john.q.public".into(), + domain: "example.com".into(), + } + }]) + ) + }, ); } @@ -100,16 +112,18 @@ mod tests { fn test_sender() { lazy_eager( "Sender: Michael Jones \r\n", - |sender| assert_eq!( - sender, - &eager::Field::Sender(model::MailboxRef { - name: Some("Michael Jones".into()), - addrspec: model::AddrSpec { - local_part: "mjones".into(), - domain: "machine.example".into(), - }, - }) - ) + |sender| { + assert_eq!( + sender, + &eager::Field::Sender(model::MailboxRef { + name: Some("Michael Jones".into()), + addrspec: model::AddrSpec { + local_part: "mjones".into(), + domain: "machine.example".into(), + }, + }) + ) + }, ); } @@ -117,18 +131,18 @@ mod tests { fn test_reply_to() { lazy_eager( "Reply-To: \"Mary Smith: Personal Account\" \r\n", - |reply_to| assert_eq!( - reply_to, - &eager::Field::ReplyTo( - vec![model::AddressRef::Single(model::MailboxRef { + |reply_to| { + assert_eq!( + reply_to, + &eager::Field::ReplyTo(vec![model::AddressRef::Single(model::MailboxRef { name: Some("Mary Smith: Personal Account".into()), addrspec: model::AddrSpec { local_part: "smith".into(), domain: "home.example".into(), }, - })] + })]) ) - ) + }, ) } @@ -136,177 +150,187 @@ mod tests { fn test_to() { lazy_eager( "To: A Group:Ed Jones ,joe@where.test,John ;\r\n", - |to| assert_eq!( - to, - &eager::Field::To(vec![model::AddressRef::Many(model::GroupRef { - name: "A Group".into(), - participants: vec![ - model::MailboxRef { - name: Some("Ed Jones".into()), - addrspec: model::AddrSpec { local_part: "c".into(), domain: "a.test".into() }, - }, - model::MailboxRef { - name: None, - addrspec: model::AddrSpec { local_part: "joe".into(), domain: "where.test".into() }, - }, - model::MailboxRef { - name: Some("John".into()), - addrspec: model::AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() }, - }, - ] - })]) - ) + |to| { + assert_eq!( + to, + &eager::Field::To(vec![model::AddressRef::Many(model::GroupRef { + name: "A Group".into(), + participants: vec![ + model::MailboxRef { + name: Some("Ed Jones".into()), + addrspec: model::AddrSpec { + local_part: "c".into(), + domain: "a.test".into() + }, + }, + model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "joe".into(), + domain: "where.test".into() + }, + }, + model::MailboxRef { + name: Some("John".into()), + addrspec: model::AddrSpec { + local_part: "jdoe".into(), + domain: "one.test".into() + }, + }, + ] + })]) + ) + }, ) } #[test] fn test_cc() { - lazy_eager( - "Cc: Undisclosed recipients:;\r\n", - |cc| assert_eq!( - cc, - &eager::Field::Cc(vec![model::AddressRef::Many(model::GroupRef { - name: "Undisclosed recipients".into(), - participants: vec![], - })]), + lazy_eager("Cc: Undisclosed recipients:;\r\n", |cc| { + assert_eq!( + cc, + &eager::Field::Cc(vec![model::AddressRef::Many(model::GroupRef { + name: "Undisclosed recipients".into(), + participants: vec![], + })]), ) - ) + }) } #[test] fn test_bcc() { - lazy_eager( - "Bcc: (empty)\r\n", - |bcc| assert_eq!( - bcc, - &eager::Field::Bcc(vec![]), - ) - ); + lazy_eager("Bcc: (empty)\r\n", |bcc| { + assert_eq!(bcc, &eager::Field::Bcc(vec![]),) + }); - lazy_eager( - "Bcc: \r\n", - |bcc| assert_eq!( - bcc, - &eager::Field::Bcc(vec![]), - ) - ); + lazy_eager("Bcc: \r\n", |bcc| { + assert_eq!(bcc, &eager::Field::Bcc(vec![]),) + }); } #[test] fn test_message_id() { - lazy_eager( - "Message-ID: <310@[127.0.0.1]>\r\n", - |msg_id| assert_eq!( - msg_id, - &eager::Field::MessageID( - model::MessageId { left: "310", right: "127.0.0.1" }, - ) + lazy_eager("Message-ID: <310@[127.0.0.1]>\r\n", |msg_id| { + assert_eq!( + msg_id, + &eager::Field::MessageID(model::MessageId { + left: "310", + right: "127.0.0.1" + },) ) - ) + }) } #[test] fn test_in_reply_to() { - lazy_eager( - "In-Reply-To: \r\n", - |irt| assert_eq!( + lazy_eager("In-Reply-To: \r\n", |irt| { + assert_eq!( irt, - &eager::Field::InReplyTo( - vec![ - model::MessageId { left: "a", right: "b" }, - model::MessageId { left: "c", right: "example.com" }, - ] - ) + &eager::Field::InReplyTo(vec![ + model::MessageId { + left: "a", + right: "b" + }, + model::MessageId { + left: "c", + right: "example.com" + }, + ]) ) - ) + }) } #[test] fn test_references() { - lazy_eager( - "References: <1234@local.machine.example> <3456@example.net>\r\n", - |refer| assert_eq!( - refer, - &eager::Field::References( - vec![ - model::MessageId { left: "1234", right: "local.machine.example" }, - model::MessageId { left: "3456", right: "example.net" }, - ] + lazy_eager( + "References: <1234@local.machine.example> <3456@example.net>\r\n", + |refer| { + assert_eq!( + refer, + &eager::Field::References(vec![ + model::MessageId { + left: "1234", + right: "local.machine.example" + }, + model::MessageId { + left: "3456", + right: "example.net" + }, + ]) ) - ) + }, ) } #[test] fn test_subject() { - lazy_eager( - "Subject: Aérogramme\r\n", - |subject| assert_eq!( - subject, - &eager::Field::Subject(misc_token::Unstructured("Aérogramme".into())) + lazy_eager("Subject: Aérogramme\r\n", |subject| { + assert_eq!( + subject, + &eager::Field::Subject(misc_token::Unstructured("Aérogramme".into())) ) - ) + }) } #[test] fn test_comments() { - lazy_eager( - "Comments: 😛 easter egg!\r\n", - |comments| assert_eq!( - comments, - &eager::Field::Comments(misc_token::Unstructured("😛 easter egg!".into())), + lazy_eager("Comments: 😛 easter egg!\r\n", |comments| { + assert_eq!( + comments, + &eager::Field::Comments(misc_token::Unstructured("😛 easter egg!".into())), ) - ) + }) } #[test] fn test_keywords() { - lazy_eager( - "Keywords: fantasque, farfelu, fanfreluche\r\n", - |keywords| assert_eq!( - keywords, - &eager::Field::Keywords(misc_token::PhraseList(vec![ - "fantasque".into(), - "farfelu".into(), - "fanfreluche".into() - ])) - ) + lazy_eager( + "Keywords: fantasque, farfelu, fanfreluche\r\n", + |keywords| { + assert_eq!( + keywords, + &eager::Field::Keywords(misc_token::PhraseList(vec![ + "fantasque".into(), + "farfelu".into(), + "fanfreluche".into() + ])) + ) + }, ) } //@FIXME non ported tests: - + /* - #[test] - fn test_invalid_field_name() { - assert!(known_field("Unknown: unknown\r\n").is_err()); - } + #[test] + fn test_invalid_field_name() { + assert!(known_field("Unknown: unknown\r\n").is_err()); + } - #[test] - fn test_rescue_field() { - assert_eq!( - rescue_field("Héron: élan\r\n\tnoël: test\r\nFrom: ..."), - Ok(("From: ...", Field::Rescue("Héron: élan\r\n\tnoël: test"))), - ); - } + #[test] + fn test_rescue_field() { + assert_eq!( + rescue_field("Héron: élan\r\n\tnoël: test\r\nFrom: ..."), + Ok(("From: ...", Field::Rescue("Héron: élan\r\n\tnoël: test"))), + ); + } - #[test] - fn test_wrong_fields() { - let fullmail = r#"Return-Path: xoxo -From: !!!! + #[test] + fn test_wrong_fields() { + let fullmail = r#"Return-Path: xoxo + From: !!!! -Hello world"#; - assert_eq!( - section(fullmail), - Ok(("Hello world", HeaderSection { - bad_fields: vec![ - Field::ReturnPath(FieldBody::Failed("xoxo")), - Field::From(FieldBody::Failed("!!!!")), - ], - ..Default::default() - })) - ); - } - */ - + Hello world"#; + assert_eq!( + section(fullmail), + Ok(("Hello world", HeaderSection { + bad_fields: vec![ + Field::ReturnPath(FieldBody::Failed("xoxo")), + Field::From(FieldBody::Failed("!!!!")), + ], + ..Default::default() + })) + ); + } + */ } diff --git a/src/multipass/field_lazy.rs b/src/multipass/field_lazy.rs index 811243e..c5b00c7 100644 --- a/src/multipass/field_lazy.rs +++ b/src/multipass/field_lazy.rs @@ -27,19 +27,23 @@ mod tests { #[test] fn test_field_name() { - assert_eq!(new(&extract_fields::Parsed { - fields: vec![ - "From: hello@world.com,\r\n\talice@wonderlands.com\r\n", - "Date: 12 Mar 1997 07:33:25 Z\r\n", - ], - body: b"Hello world!", - }), - Parsed { - fields: vec![ - lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")), - lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), - ], - body: b"Hello world!", - }); + assert_eq!( + new(&extract_fields::Parsed { + fields: vec![ + "From: hello@world.com,\r\n\talice@wonderlands.com\r\n", + "Date: 12 Mar 1997 07:33:25 Z\r\n", + ], + body: b"Hello world!", + }), + Parsed { + fields: vec![ + lazy::Field::From(lazy::MailboxList( + "hello@world.com,\r\n\talice@wonderlands.com\r\n" + )), + lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), + ], + body: b"Hello world!", + } + ); } } diff --git a/src/multipass/guess_charset.rs b/src/multipass/guess_charset.rs index 465e682..be1f728 100644 --- a/src/multipass/guess_charset.rs +++ b/src/multipass/guess_charset.rs @@ -1,10 +1,10 @@ -use std::borrow::Cow; use chardetng::EncodingDetector; use encoding_rs::Encoding; +use std::borrow::Cow; use crate::error::IMFError; -use crate::multipass::segment; use crate::multipass::extract_fields; +use crate::multipass::segment; #[derive(Debug, PartialEq)] pub struct Parsed<'a> { @@ -26,11 +26,11 @@ pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> { // Get encoding let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8); let (header, encoding, malformed) = enc.decode(&seg.header); - Parsed { - header, - encoding, - malformed, - body: seg.body + Parsed { + header, + encoding, + malformed, + body: seg.body, } } @@ -48,15 +48,15 @@ mod tests { fn test_charset() { assert_eq!( new(&segment::Parsed { - body: b"Hello world!", - header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", - } - ), + body: b"Hello world!", + header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", + }), Parsed { header: "From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(), encoding: encoding_rs::UTF_8, malformed: false, body: b"Hello world!", - }); + } + ); } } diff --git a/src/multipass/header_section.rs b/src/multipass/header_section.rs index 017482b..3d34cec 100644 --- a/src/multipass/header_section.rs +++ b/src/multipass/header_section.rs @@ -23,60 +23,64 @@ mod tests { #[test] fn test_section() { - assert_eq!(new(&field_eager::Parsed { - fields: vec![ - eager::Field::From(vec![ - model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "hello".into(), - domain: "world.com".into() - } - }, - model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "alice".into(), - domain: "wonderlands.com".into() - } - }, - ]), - eager::Field::Date( - FixedOffset::east_opt(0) - .unwrap() - .with_ymd_and_hms(1997, 03, 12, 7, 33, 25) - .unwrap() - ), - ], - body: b"Hello world!", - }), - Parsed { - fields: Section { - from: vec![ - &model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "hello".into(), - domain: "world.com".into() - } - }, - &model::MailboxRef { - name: None, - addrspec: model::AddrSpec { - local_part: "alice".into(), - domain: "wonderlands.com".into() - } - }, + assert_eq!( + new(&field_eager::Parsed { + fields: vec![ + eager::Field::From(vec![ + model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "hello".into(), + domain: "world.com".into() + } + }, + model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "alice".into(), + domain: "wonderlands.com".into() + } + }, + ]), + eager::Field::Date( + FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(1997, 03, 12, 7, 33, 25) + .unwrap() + ), ], + body: b"Hello world!", + }), + Parsed { + fields: Section { + from: vec![ + &model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "hello".into(), + domain: "world.com".into() + } + }, + &model::MailboxRef { + name: None, + addrspec: model::AddrSpec { + local_part: "alice".into(), + domain: "wonderlands.com".into() + } + }, + ], - date: Some(&FixedOffset::east_opt(0) - .unwrap() - .with_ymd_and_hms(1997, 03, 12, 7, 33, 25) - .unwrap()), + date: Some( + &FixedOffset::east_opt(0) + .unwrap() + .with_ymd_and_hms(1997, 03, 12, 7, 33, 25) + .unwrap() + ), - ..Default::default() - }, - body: b"Hello world!", - }); + ..Default::default() + }, + body: b"Hello world!", + } + ); } } diff --git a/src/multipass/mod.rs b/src/multipass/mod.rs index 1f8cce3..35441d0 100644 --- a/src/multipass/mod.rs +++ b/src/multipass/mod.rs @@ -1,6 +1,6 @@ -pub mod segment; -pub mod guess_charset; pub mod extract_fields; -pub mod field_lazy; pub mod field_eager; +pub mod field_lazy; +pub mod guess_charset; pub mod header_section; +pub mod segment; diff --git a/src/multipass/segment.rs b/src/multipass/segment.rs index db9a8d3..1062d32 100644 --- a/src/multipass/segment.rs +++ b/src/multipass/segment.rs @@ -1,14 +1,14 @@ use nom::{ - IResult, branch::alt, bytes::complete::{is_not, tag}, combinator::recognize, - sequence::{pair, terminated}, multi::many0, + sequence::{pair, terminated}, + IResult, }; -use crate::multipass::guess_charset; use crate::error::IMFError; +use crate::multipass::guess_charset; #[derive(Debug, PartialEq)] pub struct Parsed<'a> { @@ -21,10 +21,7 @@ const LF: u8 = 0x0A; const CRLF: &[u8] = &[CR, LF]; pub fn new<'a>(buffer: &'a [u8]) -> Result, IMFError<'a>> { - terminated( - recognize(many0(line)), - obs_crlf - )(buffer) + terminated(recognize(many0(line)), obs_crlf)(buffer) .map_err(|e| IMFError::Segment(e)) .map(|(body, header)| Parsed { header, body }) } @@ -36,10 +33,7 @@ impl<'a> Parsed<'a> { } fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { - pair( - is_not(CRLF), - obs_crlf, - )(input) + pair(is_not(CRLF), obs_crlf)(input) } fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> { @@ -56,7 +50,7 @@ mod tests { new(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]), Ok(Parsed { header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", - body: b"Hello world!", + body: b"Hello world!", }) ); } diff --git a/src/parse.rs b/src/parse.rs index b250f1a..60306c1 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,10 +1,12 @@ -use imf_codec::multipass::segment; use imf_codec::fragments::section::Section; +use imf_codec::multipass::segment; use std::io; use std::io::Read; fn parser<'a, F>(input: &'a [u8], func: F) -> () -where F: FnOnce(&Section) -> () { +where + F: FnOnce(&Section) -> (), +{ let seg = segment::new(input).unwrap(); let charset = seg.charset(); let fields = charset.fields().unwrap(); diff --git a/tests/enron.rs b/tests/enron.rs index a988dab..8020bd9 100644 --- a/tests/enron.rs +++ b/tests/enron.rs @@ -1,13 +1,15 @@ +use imf_codec::fragments::section; +use imf_codec::multipass; use std::collections::HashSet; -use std::path::PathBuf; use std::fs::File; use std::io::Read; -use imf_codec::multipass; -use imf_codec::fragments::section; +use std::path::PathBuf; use walkdir::WalkDir; fn parser<'a, F>(input: &'a [u8], func: F) -> () -where F: FnOnce(§ion::Section) -> () { +where + F: FnOnce(§ion::Section) -> (), +{ let seg = multipass::segment::new(input).unwrap(); let charset = seg.charset(); let fields = charset.fields().unwrap(); @@ -27,51 +29,44 @@ fn test_enron500k() { //d.push("williams-w3/"); let known_bad_fields = HashSet::from([ - "white-s/calendar/113.", // To: east <7..> - - "skilling-j/inbox/223.", // From: pep - + "white-s/calendar/113.", // To: east <7..> + "skilling-j/inbox/223.", // From: pep "jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com> - "jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com> - - "lokey-t/calendar/33.", // A second Date entry for the calendar containing - // Date: Monday, March 12 - - "zipper-a/inbox/199.", // To: e-mail - - "dasovich-j/deleted_items/128.", // To: f62489 - "dasovich-j/all_documents/677.", // To: w/assts - "dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com> - "dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com> - "dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com> - "dasovich-j/all_documents/578.", // To: w/assts - "dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com> - "dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com> - "dasovich-j/risk_analytics/3.", // To: w/assts - "dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com> - "dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com> - "dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com> - "dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com> - "dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com> - "dasovich-j/notes_inbox/5.", // To: w/assts - - "kaminski-v/sites/19.", // To: <"the.desk":@enron.com> - "kaminski-v/sites/1.", // To: <"the.desk":@enron.com> - "kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com> - "kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com> - "kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com> + "jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com> + "lokey-t/calendar/33.", // A second Date entry for the calendar containing + // Date: Monday, March 12 + "zipper-a/inbox/199.", // To: e-mail + "dasovich-j/deleted_items/128.", // To: f62489 + "dasovich-j/all_documents/677.", // To: w/assts + "dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com> + "dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com> + "dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com> + "dasovich-j/all_documents/578.", // To: w/assts + "dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com> + "dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com> + "dasovich-j/risk_analytics/3.", // To: w/assts + "dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com> + "dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com> + "dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com> + "dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com> + "dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com> + "dasovich-j/notes_inbox/5.", // To: w/assts + "kaminski-v/sites/19.", // To: <"the.desk":@enron.com> + "kaminski-v/sites/1.", // To: <"the.desk":@enron.com> + "kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com> + "kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com> + "kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com> "kaminski-v/discussion_threads/8068.", // To: cats , risk , leaders "kaminski-v/discussion_threads/7980.", // To: dogs , cats , risk ,\r\n\tleaders "kaminski-v/all_documents/5970.", //To: dogs , cats , risk ,\r\n\tleaders "kaminski-v/all_documents/5838.", // To + Cc: dogs , breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com "kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com> - "kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com> - "kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com> - "kaminski-v/technical/1.", // To: <"the.desk":@enron.com> - "kaminski-v/technical/7.", // To: <"the.desk":@enron.com> + "kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com> + "kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com> + "kaminski-v/technical/1.", // To: <"the.desk":@enron.com> + "kaminski-v/technical/7.", // To: <"the.desk":@enron.com> "kaminski-v/notes_inbox/140.", // To: dogs , cats , risk ,\r\n\tleaders "kaminski-v/notes_inbox/95.", // To + CC failed: cats , risk , leaders - "kean-s/archiving/untitled/1232.", // To: w/assts , mark.palmer@enron.com, karen.denne@enron.com "kean-s/archiving/untitled/1688.", // To: w/assts "kean-s/sent/198.", // To: w/assts , mark.palmer@enron.com, karen.denne@enron.com @@ -79,11 +74,10 @@ fn test_enron500k() { "kean-s/discussion_threads/950.", // To: w/assts , mark.palmer@enron.com, karen.denne@enron.com "kean-s/discussion_threads/577.", // To: w/assts "kean-s/calendar/untitled/1096.", // To: w/assts , mark.palmer@enron.com, karen.denne@enron.com - "kean-s/calendar/untitled/640.", // To: w/assts - "kean-s/all_documents/640.", // To: w/assts - "kean-s/all_documents/1095.", // To: w/assts - "kean-s/attachments/2030.", // To: w/assts - + "kean-s/calendar/untitled/640.", // To: w/assts + "kean-s/all_documents/640.", // To: w/assts + "kean-s/all_documents/1095.", // To: w/assts + "kean-s/attachments/2030.", // To: w/assts "williams-w3/operations_committee_isas/10.", // To: z34655 ]); @@ -92,7 +86,10 @@ fn test_enron500k() { ]); let mut i = 0; - for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) { + for entry in WalkDir::new(d.as_path()) + .into_iter() + .filter_map(|file| file.ok()) + { if entry.metadata().unwrap().is_file() { let mail_path = entry.path(); let suffix = &mail_path.to_str().unwrap()[prefix_sz..]; @@ -127,6 +124,6 @@ fn test_enron500k() { println!("Analyzed emails: {}", i); } }) - } + } } } diff --git a/tests/known.rs b/tests/known.rs index b2865af..9eac7c8 100644 --- a/tests/known.rs +++ b/tests/known.rs @@ -1,10 +1,12 @@ use chrono::{FixedOffset, TimeZone}; -use std::collections::HashMap; +use imf_codec::fragments::{misc_token, model, section, trace}; use imf_codec::multipass; -use imf_codec::fragments::{model, misc_token, trace, section}; +use std::collections::HashMap; fn parser<'a, F>(input: &'a [u8], func: F) -> () -where F: FnOnce(§ion::Section) -> () { +where + F: FnOnce(§ion::Section) -> (), +{ let seg = multipass::segment::new(input).unwrap(); let charset = seg.charset(); let fields = charset.fields().unwrap(); @@ -48,29 +50,35 @@ References: <1234@local.machine.example> Unknown: unknown This is a reply to your hello. -"#.as_bytes(); - parser(fullmail, |parsed_section| +"# + .as_bytes(); + parser(fullmail, |parsed_section| { assert_eq!( parsed_section, §ion::Section { - date: Some(&FixedOffset::east_opt(2 * 3600) - .unwrap() - .with_ymd_and_hms(2023, 06, 13, 10, 01, 10) - .unwrap()), + date: Some( + &FixedOffset::east_opt(2 * 3600) + .unwrap() + .with_ymd_and_hms(2023, 06, 13, 10, 01, 10) + .unwrap() + ), - from: vec![&model::MailboxRef { - name: Some("Mary Smith".into()), - addrspec: model::AddrSpec { - local_part: "mary".into(), - domain: "example.net".into(), + from: vec![ + &model::MailboxRef { + name: Some("Mary Smith".into()), + addrspec: model::AddrSpec { + local_part: "mary".into(), + domain: "example.net".into(), + } + }, + &model::MailboxRef { + name: Some("Alan".into()), + addrspec: model::AddrSpec { + local_part: "alan".into(), + domain: "example".into(), + } } - }, &model::MailboxRef { - name: Some("Alan".into()), - addrspec: model::AddrSpec { - local_part: "alan".into(), - domain: "example".into(), - } - }], + ], sender: Some(&model::MailboxRef { name: None, @@ -106,33 +114,41 @@ This is a reply to your hello. bcc: vec![], - msg_id: Some(&model::MessageId { left: "3456", right: "example.net" }), - in_reply_to: vec![&model::MessageId { left: "1234", right: "local.machine.example" }], - references: vec![&model::MessageId { left: "1234", right: "local.machine.example" }], + msg_id: Some(&model::MessageId { + left: "3456", + right: "example.net" + }), + in_reply_to: vec![&model::MessageId { + left: "1234", + right: "local.machine.example" + }], + references: vec![&model::MessageId { + left: "1234", + right: "local.machine.example" + }], subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())), comments: vec![ &misc_token::Unstructured("A simple message".into()), &misc_token::Unstructured("Not that complicated".into()), - &misc_token::Unstructured("not valid header name but should be accepted by the parser.".into()), + &misc_token::Unstructured( + "not valid header name but should be accepted by the parser.".into() + ), ], keywords: vec![ - &misc_token::PhraseList(vec![ - "hello".into(), - "world".into(), - ]), - &misc_token::PhraseList(vec![ - "salut".into(), - "le".into(), - "monde".into(), - ]), + &misc_token::PhraseList(vec!["hello".into(), "world".into(),]), + &misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]), ], - received: vec![ - &trace::ReceivedLog("from smtp.example.com ([10.83.2.2])\n\tby doradille with LMTP\n\tid xyzabcd\n\t(envelope-from )\n\tfor ") - ], + received: vec![&trace::ReceivedLog( + r#"from smtp.example.com ([10.83.2.2]) + by doradille with LMTP + id xyzabcd + (envelope-from ) + for "# + )], return_path: vec![&model::MailboxRef { name: None, @@ -143,8 +159,11 @@ This is a reply to your hello. }], optional: HashMap::from([ - ("Delivered-To", &misc_token::Unstructured("quentin@example.com".into())), - ("Unknown", &misc_token::Unstructured("unknown".into())), + ( + "Delivered-To", + &misc_token::Unstructured("quentin@example.com".into()) + ), + ("Unknown", &misc_token::Unstructured("unknown".into())), ]), bad_fields: vec![], @@ -155,5 +174,5 @@ This is a reply to your hello. ], } ) - ) + }) }