cargo fmt

This commit is contained in:
Quentin 2023-06-22 15:08:50 +02:00
parent 99c6490eb2
commit 5fda64477c
Signed by: quentin
GPG key ID: E9602264D639FF68
24 changed files with 1173 additions and 728 deletions

View file

@ -1,18 +1,18 @@
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
combinator::{into, opt}, combinator::{into, opt},
multi::separated_list1, multi::separated_list1,
sequence::tuple, sequence::tuple,
IResult,
}; };
use crate::error::IMFError;
use crate::fragments::lazy; use crate::fragments::lazy;
use crate::fragments::model::{GroupRef, AddressRef, MailboxRef, MailboxList, AddressList};
use crate::fragments::mailbox::mailbox; use crate::fragments::mailbox::mailbox;
use crate::fragments::misc_token::phrase; use crate::fragments::misc_token::phrase;
use crate::fragments::whitespace::{cfws}; use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef};
use crate::error::IMFError; use crate::fragments::whitespace::cfws;
impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef { impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>; type Error = IMFError<'a>;
@ -70,13 +70,16 @@ pub fn address(input: &str) -> IResult<&str, AddressRef> {
/// display-name = phrase /// display-name = phrase
/// ``` /// ```
pub fn group(input: &str) -> IResult<&str, GroupRef> { pub fn group(input: &str) -> IResult<&str, GroupRef> {
let (input, (grp_name, _, grp_list, _, _)) = let (input, (grp_name, _, grp_list, _, _)) =
tuple((phrase, tag(":"), opt(group_list), tag(";"), opt(cfws)))(input)?; tuple((phrase, tag(":"), opt(group_list), tag(";"), opt(cfws)))(input)?;
Ok((input, GroupRef { Ok((
name: grp_name, input,
participants: grp_list.unwrap_or(vec![]), GroupRef {
})) name: grp_name,
participants: grp_list.unwrap_or(vec![]),
},
))
} }
/// Group list /// Group list
@ -128,7 +131,9 @@ mod tests {
_ => panic!(), _ => panic!(),
}; };
match mailbox_list(r#"Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>, <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>"#) { match mailbox_list(
r#"Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>, <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>"#,
) {
Ok(("", _)) => (), Ok(("", _)) => (),
_ => panic!(), _ => panic!(),
}; };
@ -137,30 +142,47 @@ mod tests {
#[test] #[test]
fn test_address_list() { fn test_address_list() {
assert_eq!( assert_eq!(
address_list(r#"A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;, Mary Smith <mary@x.test>"#), address_list(
Ok(("", vec![ r#"A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;, Mary Smith <mary@x.test>"#
AddressRef::Many(GroupRef { ),
name: "A Group".to_string(), Ok((
participants: vec![ "",
MailboxRef { vec![
name: Some("Ed Jones".into()), AddressRef::Many(GroupRef {
addrspec: AddrSpec { local_part: "c".into(), domain: "a.test".into() }, name: "A Group".to_string(),
participants: vec![
MailboxRef {
name: Some("Ed Jones".into()),
addrspec: AddrSpec {
local_part: "c".into(),
domain: "a.test".into()
},
},
MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "joe".into(),
domain: "where.test".into()
},
},
MailboxRef {
name: Some("John".into()),
addrspec: AddrSpec {
local_part: "jdoe".into(),
domain: "one.test".into()
},
},
],
}),
AddressRef::Single(MailboxRef {
name: Some("Mary Smith".into()),
addrspec: AddrSpec {
local_part: "mary".into(),
domain: "x.test".into()
}, },
MailboxRef { }),
name: None, ]
addrspec: AddrSpec { local_part: "joe".into(), domain: "where.test".into() }, ))
},
MailboxRef {
name: Some("John".into()),
addrspec: AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() },
},
],
}),
AddressRef::Single(MailboxRef {
name: Some("Mary Smith".into()),
addrspec: AddrSpec { local_part: "mary".into(), domain: "x.test".into() },
}),
]))
); );
} }
} }

View file

@ -1,16 +1,16 @@
use crate::error::IMFError;
use crate::fragments::lazy;
use crate::fragments::whitespace::{cfws, fws};
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime}; use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime};
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{tag, tag_no_case, take_while_m_n, is_a}, bytes::complete::{is_a, tag, tag_no_case, take_while_m_n},
character, character,
character::complete::{one_of, alphanumeric1, digit0}, character::complete::{alphanumeric1, digit0, one_of},
combinator::{map, opt, value}, combinator::{map, opt, value},
sequence::{preceded, terminated, tuple, delimited }, sequence::{delimited, preceded, terminated, tuple},
IResult,
}; };
use crate::fragments::lazy;
use crate::fragments::whitespace::{fws, cfws};
use crate::error::IMFError;
const MIN: i32 = 60; const MIN: i32 = 60;
const HOUR: i32 = 60 * MIN; const HOUR: i32 = 60 * MIN;
@ -43,20 +43,31 @@ impl<'a> TryFrom<&'a lazy::DateTime<'a>> for DateTime<FixedOffset> {
/// due to an error in RFC0822 but are interpreted as their respective /// due to an error in RFC0822 but are interpreted as their respective
/// timezone according to the RFC5322 definition /// timezone according to the RFC5322 definition
pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> { pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> {
map(terminated( map(
terminated(
alt(( alt((
tuple((opt(terminated(strict_day_of_week, tag(","))), strict_date, strict_time_of_day, strict_zone )), tuple((
tuple((opt(terminated(obs_day_of_week, tag(","))), obs_date, obs_time_of_day, alt((strict_zone, obs_zone)) )), opt(terminated(strict_day_of_week, tag(","))),
strict_date,
strict_time_of_day,
strict_zone,
)),
tuple((
opt(terminated(obs_day_of_week, tag(","))),
obs_date,
obs_time_of_day,
alt((strict_zone, obs_zone)),
)),
)), )),
opt(cfws) opt(cfws),
), |res| { ),
match res { |res| match res {
(_, Some(date), Some(time), Some(tz)) => { (_, Some(date), Some(time), Some(tz)) => {
date.and_time(time).and_local_timezone(tz).earliest() date.and_time(time).and_local_timezone(tz).earliest()
},
_ => None,
} }
})(input) _ => None,
},
)(input)
} }
/// day-of-week = ([FWS] day-name) / obs-day-of-week /// day-of-week = ([FWS] day-name) / obs-day-of-week
@ -85,18 +96,16 @@ fn day_name(input: &str) -> IResult<&str, &str> {
/// date = day month year /// date = day month year
fn strict_date(input: &str) -> IResult<&str, Option<NaiveDate>> { fn strict_date(input: &str) -> IResult<&str, Option<NaiveDate>> {
map( map(tuple((strict_day, month, strict_year)), |(d, m, y)| {
tuple((strict_day, month, strict_year)), NaiveDate::from_ymd_opt(y, m, d)
|(d, m, y)| NaiveDate::from_ymd_opt(y, m, d) })(input)
)(input)
} }
/// date = day month year /// date = day month year
fn obs_date(input: &str) -> IResult<&str, Option<NaiveDate>> { fn obs_date(input: &str) -> IResult<&str, Option<NaiveDate>> {
map( map(tuple((obs_day, month, obs_year)), |(d, m, y)| {
tuple((obs_day, month, obs_year)), NaiveDate::from_ymd_opt(y, m, d)
|(d, m, y)| NaiveDate::from_ymd_opt(y, m, d) })(input)
)(input)
} }
/// day = ([FWS] 1*2DIGIT FWS) / obs-day /// day = ([FWS] 1*2DIGIT FWS) / obs-day
@ -132,45 +141,63 @@ fn month(input: &str) -> IResult<&str, u32> {
/// year = (FWS 4*DIGIT FWS) / obs-year /// year = (FWS 4*DIGIT FWS) / obs-year
fn strict_year(input: &str) -> IResult<&str, i32> { fn strict_year(input: &str) -> IResult<&str, i32> {
delimited( delimited(
fws, fws,
map( map(
terminated(take_while_m_n(4,9,|c| c >= '\x30' && c <= '\x39'), digit0), terminated(take_while_m_n(4, 9, |c| c >= '\x30' && c <= '\x39'), digit0),
|d: &str| d.parse::<i32>().unwrap()), |d: &str| d.parse::<i32>().unwrap(),
),
fws, fws,
)(input) )(input)
} }
/// obs-year = [CFWS] 2*DIGIT [CFWS] /// obs-year = [CFWS] 2*DIGIT [CFWS]
fn obs_year(input: &str) -> IResult<&str, i32> { fn obs_year(input: &str) -> IResult<&str, i32> {
map(delimited( map(
opt(cfws), delimited(
terminated(take_while_m_n(2,7,|c| c >= '\x30' && c <= '\x39'), digit0), opt(cfws),
opt(cfws) terminated(take_while_m_n(2, 7, |c| c >= '\x30' && c <= '\x39'), digit0),
), |cap: &str| { opt(cfws),
let d = cap.parse::<i32>().unwrap(); ),
if d >= 0 && d <= 49 { |cap: &str| {
2000 + d let d = cap.parse::<i32>().unwrap();
} else if d >= 50 && d <= 999 { if d >= 0 && d <= 49 {
1900 + d 2000 + d
} else { } else if d >= 50 && d <= 999 {
d 1900 + d
} } else {
})(input) d
}
},
)(input)
} }
/// time-of-day = hour ":" minute [ ":" second ] /// time-of-day = hour ":" minute [ ":" second ]
fn strict_time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> { fn strict_time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
map( map(
tuple((strict_time_digit, tag(":"), strict_time_digit, opt(preceded(tag(":"), strict_time_digit)))), tuple((
|(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)), strict_time_digit,
tag(":"),
strict_time_digit,
opt(preceded(tag(":"), strict_time_digit)),
)),
|(hour, _, minute, maybe_sec)| {
NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0))
},
)(input) )(input)
} }
/// time-of-day = hour ":" minute [ ":" second ] /// time-of-day = hour ":" minute [ ":" second ]
fn obs_time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> { fn obs_time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
map( map(
tuple((obs_time_digit, tag(":"), obs_time_digit, opt(preceded(tag(":"), obs_time_digit)))), tuple((
|(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)), obs_time_digit,
tag(":"),
obs_time_digit,
opt(preceded(tag(":"), obs_time_digit)),
)),
|(hour, _, minute, maybe_sec)| {
NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0))
},
)(input) )(input)
} }
@ -189,15 +216,21 @@ fn obs_time_digit(input: &str) -> IResult<&str, u32> {
/// ``` /// ```
fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> { fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
map( map(
tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))), tuple((
opt(fws),
is_a("+-"),
take_while_m_n(2, 2, |c| c >= '\x30' && c <= '\x39'),
take_while_m_n(2, 2, |c| c >= '\x30' && c <= '\x39'),
)),
|(_, op, dig_zone_hour, dig_zone_min)| { |(_, op, dig_zone_hour, dig_zone_min)| {
let zone_hour = dig_zone_hour.parse::<i32>().unwrap() * HOUR; let zone_hour = dig_zone_hour.parse::<i32>().unwrap() * HOUR;
let zone_min = dig_zone_min.parse::<i32>().unwrap() * MIN; let zone_min = dig_zone_min.parse::<i32>().unwrap() * MIN;
match op { match op {
"+" => FixedOffset::east_opt(zone_hour + zone_min), "+" => FixedOffset::east_opt(zone_hour + zone_min),
"-" => FixedOffset::west_opt(zone_hour + zone_min), "-" => FixedOffset::west_opt(zone_hour + zone_min),
_ => unreachable!(), } _ => unreachable!(),
} }
},
)(input) )(input)
} }
@ -216,7 +249,7 @@ fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
/// %d97-105 / ; through "Z", both /// %d97-105 / ; through "Z", both
/// %d107-122 / ; upper and lower case /// %d107-122 / ; upper and lower case
/// ; /// ;
/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones /// 1*(ALPHA / DIGIT) ; Unknown legacy timezones
fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> { fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
// The writing of this function is volontarily verbose // The writing of this function is volontarily verbose
// to keep it straightforward to understand. // to keep it straightforward to understand.
@ -227,18 +260,27 @@ fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
opt(fws), opt(fws),
alt(( alt((
// Legacy UTC/GMT // Legacy UTC/GMT
value(FixedOffset::west_opt(0 * HOUR), alt((tag("UTC"), tag("UT"), tag("GMT")))), value(
FixedOffset::west_opt(0 * HOUR),
alt((tag("UTC"), tag("UT"), tag("GMT"))),
),
// USA Timezones // USA Timezones
value(FixedOffset::west_opt(4 * HOUR), tag("EDT")), value(FixedOffset::west_opt(4 * HOUR), tag("EDT")),
value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))), value(
value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))), FixedOffset::west_opt(5 * HOUR),
value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))), alt((tag("EST"), tag("CDT"))),
),
value(
FixedOffset::west_opt(6 * HOUR),
alt((tag("CST"), tag("MDT"))),
),
value(
FixedOffset::west_opt(7 * HOUR),
alt((tag("MST"), tag("PDT"))),
),
value(FixedOffset::west_opt(8 * HOUR), tag("PST")), value(FixedOffset::west_opt(8 * HOUR), tag("PST")),
// Military Timezone UTC // Military Timezone UTC
value(FixedOffset::west_opt(0 * HOUR), tag("Z")), value(FixedOffset::west_opt(0 * HOUR), tag("Z")),
// Military Timezones East // Military Timezones East
map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c { map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c {
'A' | 'a' => FixedOffset::east_opt(1 * HOUR), 'A' | 'a' => FixedOffset::east_opt(1 * HOUR),
@ -255,7 +297,6 @@ fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
'M' | 'm' => FixedOffset::east_opt(12 * HOUR), 'M' | 'm' => FixedOffset::east_opt(12 * HOUR),
_ => unreachable!(), _ => unreachable!(),
}), }),
// Military Timezones West // Military Timezones West
map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c { map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c {
'N' | 'n' => FixedOffset::west_opt(1 * HOUR), 'N' | 'n' => FixedOffset::west_opt(1 * HOUR),
@ -272,7 +313,6 @@ fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
'Y' | 'y' => FixedOffset::west_opt(12 * HOUR), 'Y' | 'y' => FixedOffset::west_opt(12 * HOUR),
_ => unreachable!(), _ => unreachable!(),
}), }),
// Unknown timezone // Unknown timezone
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1), value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
)), )),
@ -284,12 +324,19 @@ mod tests {
use super::*; use super::*;
use chrono::TimeZone; use chrono::TimeZone;
#[test] #[test]
fn test_section_rfc_strict() { fn test_section_rfc_strict() {
assert_eq!( assert_eq!(
section("Fri, 21 Nov 1997 09:55:06 -0600"), section("Fri, 21 Nov 1997 09:55:06 -0600"),
Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))), Ok((
"",
Some(
FixedOffset::west_opt(6 * HOUR)
.unwrap()
.with_ymd_and_hms(1997, 11, 21, 9, 55, 6)
.unwrap()
)
)),
); );
} }
@ -297,7 +344,15 @@ mod tests {
fn test_section_received() { fn test_section_received() {
assert_eq!( assert_eq!(
section("Sun, 18 Jun 2023 15:39:08 +0200 (CEST)"), section("Sun, 18 Jun 2023 15:39:08 +0200 (CEST)"),
Ok(("", Some(FixedOffset::east_opt(2 * HOUR).unwrap().with_ymd_and_hms(2023, 6, 18, 15, 39, 8).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(2 * HOUR)
.unwrap()
.with_ymd_and_hms(2023, 6, 18, 15, 39, 8)
.unwrap()
)
)),
); );
} }
@ -310,8 +365,17 @@ mod tests {
Feb Feb
1969 1969
23:32 23:32
-0330 (Newfoundland Time)"#), -0330 (Newfoundland Time)"#
Ok(("", Some(FixedOffset::west_opt(3 * HOUR + 30 * MIN).unwrap().with_ymd_and_hms(1969, 2, 13, 23, 32, 00).unwrap()))), ),
Ok((
"",
Some(
FixedOffset::west_opt(3 * HOUR + 30 * MIN)
.unwrap()
.with_ymd_and_hms(1969, 2, 13, 23, 32, 00)
.unwrap()
)
)),
); );
} }
@ -319,7 +383,15 @@ mod tests {
fn test_section_rfc_obs() { fn test_section_rfc_obs() {
assert_eq!( assert_eq!(
section("21 Nov 97 09:55:06 GMT"), section("21 Nov 97 09:55:06 GMT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 11, 21, 9, 55, 6)
.unwrap()
)
)),
); );
} }
@ -327,7 +399,15 @@ mod tests {
fn test_section_3digit_year() { fn test_section_3digit_year() {
assert_eq!( assert_eq!(
section("21 Nov 103 09:55:06 UT"), section("21 Nov 103 09:55:06 UT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2003, 11, 21, 9, 55, 6).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2003, 11, 21, 9, 55, 6)
.unwrap()
)
)),
); );
} }
@ -335,7 +415,15 @@ mod tests {
fn test_section_rfc_obs_ws() { fn test_section_rfc_obs_ws() {
assert_eq!( assert_eq!(
section("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600"), section("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600"),
Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))), Ok((
"",
Some(
FixedOffset::west_opt(6 * HOUR)
.unwrap()
.with_ymd_and_hms(1997, 11, 21, 9, 55, 6)
.unwrap()
)
)),
); );
} }
@ -343,55 +431,133 @@ mod tests {
fn test_section_2digit_year() { fn test_section_2digit_year() {
assert_eq!( assert_eq!(
section("21 Nov 23 09:55:06Z"), section("21 Nov 23 09:55:06Z"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 9, 55, 6).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 9, 55, 6)
.unwrap()
)
)),
); );
} }
#[test] #[test]
fn test_section_military_zone_east() { fn test_section_military_zone_east() {
["a", "B", "c", "D", "e", "F", "g", "H", "i", "K", "l", "M"].iter().enumerate().for_each(|(i, x)| { ["a", "B", "c", "D", "e", "F", "g", "H", "i", "K", "l", "M"]
assert_eq!( .iter()
section(format!("1 Jan 22 08:00:00 {}", x).as_str()), .enumerate()
Ok(("", Some(FixedOffset::east_opt((i as i32 + 1) * HOUR).unwrap().with_ymd_and_hms(2022, 01, 01, 8, 0, 0).unwrap()))) .for_each(|(i, x)| {
); assert_eq!(
}); section(format!("1 Jan 22 08:00:00 {}", x).as_str()),
Ok((
"",
Some(
FixedOffset::east_opt((i as i32 + 1) * HOUR)
.unwrap()
.with_ymd_and_hms(2022, 01, 01, 8, 0, 0)
.unwrap()
)
))
);
});
} }
#[test] #[test]
fn test_section_military_zone_west() { fn test_section_military_zone_west() {
["N", "O", "P", "q", "r", "s", "T", "U", "V", "w", "x", "y"].iter().enumerate().for_each(|(i, x)| { ["N", "O", "P", "q", "r", "s", "T", "U", "V", "w", "x", "y"]
assert_eq!( .iter()
section(format!("1 Jan 22 08:00:00 {}", x).as_str()), .enumerate()
Ok(("", Some(FixedOffset::west_opt((i as i32 + 1) * HOUR).unwrap().with_ymd_and_hms(2022, 01, 01, 8, 0, 0).unwrap()))) .for_each(|(i, x)| {
); assert_eq!(
}); section(format!("1 Jan 22 08:00:00 {}", x).as_str()),
Ok((
"",
Some(
FixedOffset::west_opt((i as i32 + 1) * HOUR)
.unwrap()
.with_ymd_and_hms(2022, 01, 01, 8, 0, 0)
.unwrap()
)
))
);
});
} }
#[test] #[test]
fn test_section_gmt() { fn test_section_gmt() {
assert_eq!( assert_eq!(
section("21 Nov 2023 07:07:07 +0000"), section("21 Nov 2023 07:07:07 +0000"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
); );
assert_eq!( assert_eq!(
section("21 Nov 2023 07:07:07 -0000"), section("21 Nov 2023 07:07:07 -0000"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
); );
assert_eq!( assert_eq!(
section("21 Nov 2023 07:07:07 Z"), section("21 Nov 2023 07:07:07 Z"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
); );
assert_eq!( assert_eq!(
section("21 Nov 2023 07:07:07 GMT"), section("21 Nov 2023 07:07:07 GMT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
); );
assert_eq!( assert_eq!(
section("21 Nov 2023 07:07:07 UT"), section("21 Nov 2023 07:07:07 UT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
); );
assert_eq!( assert_eq!(
section("21 Nov 2023 07:07:07 UTC"), section("21 Nov 2023 07:07:07 UTC"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))), Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
); );
} }
@ -399,7 +565,15 @@ mod tests {
fn test_section_usa() { fn test_section_usa() {
assert_eq!( assert_eq!(
section("21 Nov 2023 4:4:4 CST"), section("21 Nov 2023 4:4:4 CST"),
Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(2023, 11, 21, 4, 4, 4).unwrap()))), Ok((
"",
Some(
FixedOffset::west_opt(6 * HOUR)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 4, 4, 4)
.unwrap()
)
)),
); );
} }
} }

View file

@ -1,11 +1,9 @@
use chrono::{DateTime, FixedOffset};
use crate::fragments::model::{
MailboxList, MailboxRef, AddressList,
MessageId, MessageIdList};
use crate::fragments::misc_token::{Unstructured, PhraseList};
use crate::fragments::trace::ReceivedLog;
use crate::fragments::lazy::Field as Lazy;
use crate::error::IMFError; use crate::error::IMFError;
use crate::fragments::lazy::Field as Lazy;
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList};
use crate::fragments::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Field<'a> { pub enum Field<'a> {

View file

@ -1,18 +1,18 @@
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{take_while, tag}, bytes::complete::{tag, take_while},
combinator::opt, combinator::opt,
multi::many1, multi::many1,
sequence::{delimited, pair, tuple}, sequence::{delimited, pair, tuple},
IResult,
}; };
use crate::error::IMFError;
use crate::fragments::lazy; use crate::fragments::lazy;
use crate::fragments::whitespace::cfws;
use crate::fragments::words::dot_atom_text;
use crate::fragments::mailbox::is_dtext; use crate::fragments::mailbox::is_dtext;
use crate::fragments::model::{MessageId, MessageIdList}; use crate::fragments::model::{MessageId, MessageIdList};
use crate::error::IMFError; use crate::fragments::whitespace::cfws;
use crate::fragments::words::dot_atom_text;
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> { impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
type Error = IMFError<'a>; type Error = IMFError<'a>;
@ -45,12 +45,12 @@ pub fn msg_id(input: &str) -> IResult<&str, MessageId> {
tuple((id_left, tag("@"), id_right)), tuple((id_left, tag("@"), id_right)),
pair(tag(">"), opt(cfws)), pair(tag(">"), opt(cfws)),
)(input)?; )(input)?;
Ok((input, MessageId{ left, right })) Ok((input, MessageId { left, right }))
} }
// Missing obsolete // Missing obsolete
fn id_left(input: &str) -> IResult<&str, &str> { fn id_left(input: &str) -> IResult<&str, &str> {
dot_atom_text(input) dot_atom_text(input)
} }
// Missing obsolete // Missing obsolete
@ -70,7 +70,13 @@ mod tests {
fn test_msg_id() { fn test_msg_id() {
assert_eq!( assert_eq!(
msg_id("<5678.21-Nov-1997@example.com>"), msg_id("<5678.21-Nov-1997@example.com>"),
Ok(("", MessageId{left: "5678.21-Nov-1997", right: "example.com"})), Ok((
"",
MessageId {
left: "5678.21-Nov-1997",
right: "example.com"
}
)),
); );
} }
} }

View file

@ -1,10 +1,10 @@
use std::convert::From; use std::convert::From;
use nom::{ use nom::{
IResult, bytes::complete::{tag, take_while1},
bytes::complete::{take_while1, tag},
character::complete::space0, character::complete::space0,
sequence::{terminated, tuple}, sequence::{terminated, tuple},
IResult,
}; };
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -98,34 +98,38 @@ impl<'a> From<&'a str> for Field<'a> {
fn field_name(input: &str) -> IResult<&str, &str> { fn field_name(input: &str) -> IResult<&str, &str> {
terminated( terminated(
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'), take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
tuple((space0, tag(":"), space0)) tuple((space0, tag(":"), space0)),
)(input) )(input)
} }
fn correct_field(input: &str) -> IResult<&str, Field> { fn correct_field(input: &str) -> IResult<&str, Field> {
field_name(input) field_name(input).map(|(rest, name)| {
.map(|(rest, name)| ("", match name.to_lowercase().as_ref() { (
"date" => Date(DateTime(rest)), "",
match name.to_lowercase().as_ref() {
"date" => Date(DateTime(rest)),
"from" => From(MailboxList(rest)), "from" => From(MailboxList(rest)),
"sender" => Sender(Mailbox(rest)), "sender" => Sender(Mailbox(rest)),
"reply-to" => ReplyTo(AddressList(rest)), "reply-to" => ReplyTo(AddressList(rest)),
"to" => To(AddressList(rest)), "to" => To(AddressList(rest)),
"cc" => Cc(AddressList(rest)), "cc" => Cc(AddressList(rest)),
"bcc" => Bcc(NullableAddressList(rest)), "bcc" => Bcc(NullableAddressList(rest)),
"message-id" => MessageID(Identifier(rest)), "message-id" => MessageID(Identifier(rest)),
"in-reply-to" => InReplyTo(IdentifierList(rest)), "in-reply-to" => InReplyTo(IdentifierList(rest)),
"references" => References(IdentifierList(rest)), "references" => References(IdentifierList(rest)),
"subject" => Subject(Unstructured(rest)), "subject" => Subject(Unstructured(rest)),
"comments" => Comments(Unstructured(rest)), "comments" => Comments(Unstructured(rest)),
"keywords" => Keywords(PhraseList(rest)), "keywords" => Keywords(PhraseList(rest)),
"return-path" => ReturnPath(Mailbox(rest)), "return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)), "received" => Received(ReceivedLog(rest)),
_ => Optional(name, Unstructured(rest)), _ => Optional(name, Unstructured(rest)),
})) },
)
})
} }

View file

@ -1,19 +1,19 @@
use std::borrow::Cow;
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{tag, is_a}, bytes::complete::{is_a, tag},
character::complete::satisfy, character::complete::satisfy,
combinator::{into,map,opt,recognize}, combinator::{into, map, opt, recognize},
multi::{separated_list1, fold_many0, many0}, multi::{fold_many0, many0, separated_list1},
sequence::{delimited,pair,preceded,terminated,tuple}, sequence::{delimited, pair, preceded, terminated, tuple},
IResult,
}; };
use std::borrow::Cow;
use crate::fragments::model::{MailboxRef, AddrSpec};
use crate::fragments::misc_token::{phrase, word}; use crate::fragments::misc_token::{phrase, word};
use crate::fragments::model::{AddrSpec, MailboxRef};
use crate::fragments::quoted::quoted_string;
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl}; use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::fragments::words::{atom, dot_atom}; use crate::fragments::words::{atom, dot_atom};
use crate::fragments::quoted::quoted_string;
/// Mailbox /// Mailbox
/// ///
@ -43,10 +43,10 @@ fn name_addr(input: &str) -> IResult<&str, MailboxRef> {
/// obs-angle-addr /// obs-angle-addr
/// ``` /// ```
pub fn angle_addr(input: &str) -> IResult<&str, MailboxRef> { pub fn angle_addr(input: &str) -> IResult<&str, MailboxRef> {
delimited( delimited(
tuple((opt(cfws), tag("<"), opt(obs_route))), tuple((opt(cfws), tag("<"), opt(obs_route))),
into(addr_spec), into(addr_spec),
pair(tag(">"), opt(cfws)), pair(tag(">"), opt(cfws)),
)(input) )(input)
} }
@ -61,7 +61,10 @@ fn obs_route(input: &str) -> IResult<&str, Vec<String>> {
/// ``` /// ```
fn obs_domain_list(input: &str) -> IResult<&str, Vec<String>> { fn obs_domain_list(input: &str) -> IResult<&str, Vec<String>> {
//@FIXME complexity is O(n) in term of domains here. //@FIXME complexity is O(n) in term of domains here.
let (input, head) = preceded(pair(many0(alt((recognize(cfws), tag(",")))), tag("@")), obs_domain)(input)?; let (input, head) = preceded(
pair(many0(alt((recognize(cfws), tag(",")))), tag("@")),
obs_domain,
)(input)?;
let (input, mut rest) = obs_domain_list_rest(input)?; let (input, mut rest) = obs_domain_list_rest(input)?;
rest.insert(0, head); rest.insert(0, head);
Ok((input, rest)) Ok((input, rest))
@ -73,7 +76,7 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec<String>> {
pair(tag(","), opt(cfws)), pair(tag(","), opt(cfws)),
opt(preceded(tag("@"), obs_domain)), opt(preceded(tag("@"), obs_domain)),
)), )),
|v: Vec<Option<String>>| v.into_iter().flatten().collect() |v: Vec<Option<String>>| v.into_iter().flatten().collect(),
)(input) )(input)
} }
@ -86,9 +89,13 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec<String>> {
/// so I force obsolete for now... /// so I force obsolete for now...
pub fn addr_spec(input: &str) -> IResult<&str, AddrSpec> { pub fn addr_spec(input: &str) -> IResult<&str, AddrSpec> {
map( map(
tuple((obs_local_part, tag("@"), obs_domain, many0(pair(tag("@"), obs_domain)))), tuple((
|(local_part, _, domain, _)| obs_local_part,
AddrSpec { local_part, domain }, tag("@"),
obs_domain,
many0(pair(tag("@"), obs_domain)),
)),
|(local_part, _, domain, _)| AddrSpec { local_part, domain },
)(input) )(input)
} }
@ -108,7 +115,7 @@ fn strict_local_part(input: &str) -> IResult<&str, String> {
/// This is found in Enron emails and supported by Gmail. /// This is found in Enron emails and supported by Gmail.
/// ///
/// Obsolete local part is a superset of strict_local_part: /// Obsolete local part is a superset of strict_local_part:
/// anything that is parsed by strict_local_part will be parsed by /// anything that is parsed by strict_local_part will be parsed by
/// obs_local_part. /// obs_local_part.
/// ///
/// ```abnf /// ```abnf
@ -118,7 +125,8 @@ fn obs_local_part(input: &str) -> IResult<&str, String> {
fold_many0( fold_many0(
alt((map(is_a("."), Cow::Borrowed), word)), alt((map(is_a("."), Cow::Borrowed), word)),
String::new, String::new,
|acc, chunk| acc + &chunk)(input) |acc, chunk| acc + &chunk,
)(input)
} }
/// Domain /// Domain
@ -140,7 +148,10 @@ pub fn strict_domain(input: &str) -> IResult<&str, String> {
/// obs-domain = atom *("." atom) / domain-literal /// obs-domain = atom *("." atom) / domain-literal
/// ``` /// ```
pub fn obs_domain(input: &str) -> IResult<&str, String> { pub fn obs_domain(input: &str) -> IResult<&str, String> {
alt((map(separated_list1(tag("."), atom), |v| v.join(".")), domain_litteral))(input) alt((
map(separated_list1(tag("."), atom), |v| v.join(".")),
domain_litteral,
))(input)
} }
/// Domain litteral /// Domain litteral
@ -152,15 +163,16 @@ fn domain_litteral(input: &str) -> IResult<&str, String> {
delimited( delimited(
pair(opt(cfws), tag("[")), pair(opt(cfws), tag("[")),
inner_domain_litteral, inner_domain_litteral,
pair(tag("]"), opt(cfws)) pair(tag("]"), opt(cfws)),
)(input) )(input)
} }
fn inner_domain_litteral(input: &str) -> IResult<&str, String> { fn inner_domain_litteral(input: &str) -> IResult<&str, String> {
let (input, (cvec, maybe_wsp)) = pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?; let (input, (cvec, maybe_wsp)) =
let mut domain = cvec.iter().fold( pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?;
String::with_capacity(16), let mut domain = cvec
|mut acc, (maybe_wsp, c)| { .iter()
.fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp { if let Some(wsp) = maybe_wsp {
acc.push(*wsp); acc.push(*wsp);
} }
@ -174,7 +186,6 @@ fn inner_domain_litteral(input: &str) -> IResult<&str, String> {
Ok((input, domain)) Ok((input, domain))
} }
fn is_strict_dtext(c: char) -> bool { fn is_strict_dtext(c: char) -> bool {
(c >= '\x21' && c <= '\x5A') || (c >= '\x5E' && c <= '\x7E') || !c.is_ascii() (c >= '\x21' && c <= '\x5A') || (c >= '\x5E' && c <= '\x7E') || !c.is_ascii()
} }
@ -188,7 +199,7 @@ fn is_strict_dtext(c: char) -> bool {
/// obs-dtext = obs-NO-WS-CTL / quoted-pair /// obs-dtext = obs-NO-WS-CTL / quoted-pair
/// ``` /// ```
pub fn is_dtext(c: char) -> bool { pub fn is_dtext(c: char) -> bool {
is_strict_dtext(c) || is_obs_no_ws_ctl(c) is_strict_dtext(c) || is_obs_no_ws_ctl(c)
//@FIXME does not support quoted pair yet while RFC requires it //@FIXME does not support quoted pair yet while RFC requires it
} }
@ -198,89 +209,213 @@ mod tests {
#[test] #[test]
fn test_addr_spec() { fn test_addr_spec() {
assert_eq!(addr_spec("alice@example.com"), Ok(("", AddrSpec{local_part: "alice".into(), domain: "example.com".into() }))); assert_eq!(
addr_spec("alice@example.com"),
Ok((
"",
AddrSpec {
local_part: "alice".into(),
domain: "example.com".into()
}
))
);
assert_eq!(addr_spec("jsmith@[192.168.2.1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "192.168.2.1".into() }))); assert_eq!(
assert_eq!(addr_spec("jsmith@[IPv6:2001:db8::1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "IPv6:2001:db8::1".into() }))); addr_spec("jsmith@[192.168.2.1]"),
Ok((
"",
AddrSpec {
local_part: "jsmith".into(),
domain: "192.168.2.1".into()
}
))
);
assert_eq!(
addr_spec("jsmith@[IPv6:2001:db8::1]"),
Ok((
"",
AddrSpec {
local_part: "jsmith".into(),
domain: "IPv6:2001:db8::1".into()
}
))
);
// UTF-8 // UTF-8
assert_eq!(addr_spec("用户@例子.广告"), Ok(("", AddrSpec{local_part: "用户".into(), domain: "例子.广告".into()}))); assert_eq!(
addr_spec("用户@例子.广告"),
Ok((
"",
AddrSpec {
local_part: "用户".into(),
domain: "例子.广告".into()
}
))
);
// ASCII Edge cases // ASCII Edge cases
assert_eq!( assert_eq!(
addr_spec("user+mailbox/department=shipping@example.com"), addr_spec("user+mailbox/department=shipping@example.com"),
Ok(("", AddrSpec{local_part: "user+mailbox/department=shipping".into(), domain: "example.com".into()}))); Ok((
"",
AddrSpec {
local_part: "user+mailbox/department=shipping".into(),
domain: "example.com".into()
}
))
);
assert_eq!( assert_eq!(
addr_spec("!#$%&'*+-/=?^_`.{|}~@example.com"), addr_spec("!#$%&'*+-/=?^_`.{|}~@example.com"),
Ok(("", AddrSpec{local_part: "!#$%&'*+-/=?^_`.{|}~".into(), domain: "example.com".into()}))); Ok((
"",
AddrSpec {
local_part: "!#$%&'*+-/=?^_`.{|}~".into(),
domain: "example.com".into()
}
))
);
assert_eq!( assert_eq!(
addr_spec(r#""Abc@def"@example.com"#), addr_spec(r#""Abc@def"@example.com"#),
Ok(("", AddrSpec{local_part: "Abc@def".into(), domain: "example.com".into()}))); Ok((
assert_eq!(addr_spec(r#""Fred\ Bloggs"@example.com"#), Ok(("", AddrSpec{local_part: "Fred Bloggs".into(), domain: "example.com".into()}))); "",
assert_eq!(addr_spec(r#""Joe.\\Blow"@example.com"#), Ok(("", AddrSpec{local_part: r#"Joe.\Blow"#.into(), domain: "example.com".into()}))); AddrSpec {
local_part: "Abc@def".into(),
domain: "example.com".into()
}
))
);
assert_eq!(
addr_spec(r#""Fred\ Bloggs"@example.com"#),
Ok((
"",
AddrSpec {
local_part: "Fred Bloggs".into(),
domain: "example.com".into()
}
))
);
assert_eq!(
addr_spec(r#""Joe.\\Blow"@example.com"#),
Ok((
"",
AddrSpec {
local_part: r#"Joe.\Blow"#.into(),
domain: "example.com".into()
}
))
);
} }
#[test] #[test]
fn test_mailbox() { fn test_mailbox() {
assert_eq!(mailbox(r#""Joe Q. Public" <john.q.public@example.com>"#), Ok(("", MailboxRef { assert_eq!(
name: Some("Joe Q. Public".into()), mailbox(r#""Joe Q. Public" <john.q.public@example.com>"#),
addrspec: AddrSpec { Ok((
local_part: "john.q.public".into(), "",
domain: "example.com".into(), MailboxRef {
} name: Some("Joe Q. Public".into()),
}))); addrspec: AddrSpec {
local_part: "john.q.public".into(),
domain: "example.com".into(),
}
}
))
);
assert_eq!(mailbox(r#"Mary Smith <mary@x.test>"#), Ok(("", MailboxRef { assert_eq!(
name: Some("Mary Smith".into()), mailbox(r#"Mary Smith <mary@x.test>"#),
addrspec: AddrSpec { Ok((
local_part: "mary".into(), "",
domain: "x.test".into(), MailboxRef {
} name: Some("Mary Smith".into()),
}))); addrspec: AddrSpec {
local_part: "mary".into(),
domain: "x.test".into(),
}
}
))
);
assert_eq!(mailbox(r#"jdoe@example.org"#), Ok(("", MailboxRef { assert_eq!(
name: None, mailbox(r#"jdoe@example.org"#),
addrspec: AddrSpec { Ok((
local_part: "jdoe".into(), "",
domain: "example.org".into(), MailboxRef {
} name: None,
}))); addrspec: AddrSpec {
local_part: "jdoe".into(),
domain: "example.org".into(),
}
}
))
);
assert_eq!(mailbox(r#"Who? <one@y.test>"#), Ok(("", MailboxRef { assert_eq!(
name: Some("Who?".into()), mailbox(r#"Who? <one@y.test>"#),
addrspec: AddrSpec { Ok((
local_part: "one".into(), "",
domain: "y.test".into(), MailboxRef {
} name: Some("Who?".into()),
}))); addrspec: AddrSpec {
local_part: "one".into(),
domain: "y.test".into(),
}
}
))
);
assert_eq!(mailbox(r#"<boss@nil.test>"#), Ok(("", MailboxRef { assert_eq!(
name: None, mailbox(r#"<boss@nil.test>"#),
addrspec: AddrSpec { Ok((
local_part: "boss".into(), "",
domain: "nil.test".into(), MailboxRef {
} name: None,
}))); addrspec: AddrSpec {
local_part: "boss".into(),
domain: "nil.test".into(),
}
}
))
);
assert_eq!(mailbox(r#""Giant; \"Big\" Box" <sysservices@example.net>"#), Ok(("", MailboxRef { assert_eq!(
name: Some(r#"Giant; "Big" Box"#.into()), mailbox(r#""Giant; \"Big\" Box" <sysservices@example.net>"#),
addrspec: AddrSpec { Ok((
local_part: "sysservices".into(), "",
domain: "example.net".into(), MailboxRef {
} name: Some(r#"Giant; "Big" Box"#.into()),
}))); addrspec: AddrSpec {
local_part: "sysservices".into(),
domain: "example.net".into(),
}
}
))
);
} }
#[test] #[test]
fn test_obs_domain_list() { fn test_obs_domain_list() {
assert_eq!(obs_domain_list(r#"(shhh it's coming) assert_eq!(
obs_domain_list(
r#"(shhh it's coming)
, ,
(not yet) (not yet)
@33+4.com,,,, @33+4.com,,,,
,,,, ,,,,
(again) (again)
@example.com,@yep.com,@a,@b,,,@c"#), @example.com,@yep.com,@a,@b,,,@c"#
Ok(("", vec!["33+4.com".into(), "example.com".into(), "yep.com".into(), "a".into(), "b".into(), "c".into()])) ),
Ok((
"",
vec![
"33+4.com".into(),
"example.com".into(),
"yep.com".into(),
"a".into(),
"b".into(),
"c".into()
]
))
); );
} }
@ -288,10 +423,13 @@ mod tests {
fn test_enron1() { fn test_enron1() {
assert_eq!( assert_eq!(
addr_spec("a..howard@enron.com"), addr_spec("a..howard@enron.com"),
Ok(("", AddrSpec { Ok((
local_part: "a..howard".into(), "",
domain: "enron.com".into(), AddrSpec {
})) local_part: "a..howard".into(),
domain: "enron.com".into(),
}
))
); );
} }
@ -299,10 +437,13 @@ mod tests {
fn test_enron2() { fn test_enron2() {
assert_eq!( assert_eq!(
addr_spec(".nelson@enron.com"), addr_spec(".nelson@enron.com"),
Ok(("", AddrSpec { Ok((
local_part: ".nelson".into(), "",
domain: "enron.com".into(), AddrSpec {
})) local_part: ".nelson".into(),
domain: "enron.com".into(),
}
))
); );
} }
@ -310,25 +451,30 @@ mod tests {
fn test_enron3() { fn test_enron3() {
assert_eq!( assert_eq!(
addr_spec("ecn2760.conf.@enron.com"), addr_spec("ecn2760.conf.@enron.com"),
Ok(("", AddrSpec { Ok((
local_part: "ecn2760.conf.".into(), "",
domain: "enron.com".into(), AddrSpec {
})) local_part: "ecn2760.conf.".into(),
domain: "enron.com".into(),
}
))
); );
} }
#[test] #[test]
fn test_enron4() { fn test_enron4() {
assert_eq!( assert_eq!(
mailbox(r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#), mailbox(r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#),
Ok(("", MailboxRef { Ok((
name: None, "",
addrspec: AddrSpec { MailboxRef {
local_part: "mark_kopinski/intl/acim/americancentury".into(), name: None,
domain: "americancentury.com".into(), addrspec: AddrSpec {
local_part: "mark_kopinski/intl/acim/americancentury".into(),
domain: "americancentury.com".into(),
}
} }
})) ))
); );
} }
} }

View file

@ -1,19 +1,19 @@
use std::borrow::Cow;
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{take_while1, tag}, bytes::complete::{tag, take_while1},
character::complete::space0, character::complete::space0,
combinator::{into, opt}, combinator::{into, opt},
multi::{many0, many1, separated_list1}, multi::{many0, many1, separated_list1},
sequence::tuple, sequence::tuple,
IResult,
}; };
use std::borrow::Cow;
use crate::error::IMFError;
use crate::fragments::lazy; use crate::fragments::lazy;
use crate::fragments::quoted::quoted_string; use crate::fragments::quoted::quoted_string;
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl}; use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
use crate::fragments::words::{atom, is_vchar}; use crate::fragments::words::{atom, is_vchar};
use crate::error::IMFError;
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct Unstructured(pub String); pub struct Unstructured(pub String);
@ -28,7 +28,7 @@ impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
unstructured(input.0) unstructured(input.0)
.map(|(_, v)| Unstructured(v)) .map(|(_, v)| Unstructured(v))
.map_err(|e| IMFError::Unstructured(e)) .map_err(|e| IMFError::Unstructured(e))
} }
} }
impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList { impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
@ -101,7 +101,13 @@ mod tests {
#[test] #[test]
fn test_phrase() { fn test_phrase() {
assert_eq!(phrase("hello world"), Ok(("", "hello world".into()))); assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into()))); assert_eq!(
assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into()))); phrase("salut \"le\" monde"),
Ok(("", "salut le monde".into()))
);
assert_eq!(
phrase("fin\r\n du\r\nmonde"),
Ok(("\r\nmonde", "fin du".into()))
);
} }
} }

View file

@ -2,17 +2,17 @@
pub mod model; pub mod model;
// Generic // Generic
pub mod misc_token;
mod quoted;
pub mod whitespace; pub mod whitespace;
mod words; mod words;
mod quoted;
pub mod misc_token;
// Header specific // Header specific
mod mailbox;
mod address; mod address;
mod identification;
pub mod trace;
mod datetime; mod datetime;
pub mod lazy;
pub mod eager; pub mod eager;
mod identification;
pub mod lazy;
mod mailbox;
pub mod section; pub mod section;
pub mod trace;

View file

@ -1,5 +1,5 @@
use chrono::{DateTime, FixedOffset};
use std::collections::HashMap; use std::collections::HashMap;
use chrono::{DateTime,FixedOffset};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct AddrSpec { pub struct AddrSpec {
@ -126,7 +126,7 @@ pub struct HeaderSection<'a> {
pub msg_id: Option<MessageId<'a>>, pub msg_id: Option<MessageId<'a>>,
pub in_reply_to: Vec<MessageId<'a>>, pub in_reply_to: Vec<MessageId<'a>>,
pub references: Vec<MessageId<'a>>, pub references: Vec<MessageId<'a>>,
// 3.6.5. Informational Fields // 3.6.5. Informational Fields
pub subject: Option<String>, pub subject: Option<String>,
pub comments: Vec<String>, pub comments: Vec<String>,

View file

@ -1,14 +1,14 @@
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::{anychar, satisfy}, character::complete::{anychar, satisfy},
combinator::opt, combinator::opt,
multi::many0, multi::many0,
sequence::{pair, preceded}, sequence::{pair, preceded},
IResult,
}; };
use crate::fragments::whitespace::{fws, cfws, is_obs_no_ws_ctl}; use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
/// Quoted pair /// Quoted pair
/// ///
@ -53,29 +53,29 @@ fn qcontent(input: &str) -> IResult<&str, char> {
/// [CFWS] /// [CFWS]
/// ``` /// ```
pub fn quoted_string(input: &str) -> IResult<&str, String> { pub fn quoted_string(input: &str) -> IResult<&str, String> {
let (input, _) = opt(cfws)(input)?; let (input, _) = opt(cfws)(input)?;
let (input, _) = tag("\"")(input)?; let (input, _) = tag("\"")(input)?;
let (input, content) = many0(pair(opt(fws), qcontent))(input)?; let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
// Rebuild string // Rebuild string
let mut qstring = content.iter().fold( let mut qstring = content
String::with_capacity(16), .iter()
|mut acc, (maybe_wsp, c)| { .fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp { if let Some(wsp) = maybe_wsp {
acc.push(*wsp); acc.push(*wsp);
} }
acc.push(*c); acc.push(*c);
acc acc
}); });
let (input, maybe_wsp) = opt(fws)(input)?; let (input, maybe_wsp) = opt(fws)(input)?;
if let Some(wsp) = maybe_wsp { if let Some(wsp) = maybe_wsp {
qstring.push(wsp); qstring.push(wsp);
} }
let (input, _) = tag("\"")(input)?; let (input, _) = tag("\"")(input)?;
let (input, _) = opt(cfws)(input)?; let (input, _) = opt(cfws)(input)?;
Ok((input, qstring)) Ok((input, qstring))
} }
#[cfg(test)] #[cfg(test)]
@ -84,7 +84,13 @@ mod tests {
#[test] #[test]
fn test_quoted_string() { fn test_quoted_string() {
assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string()))); assert_eq!(
assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string()))); quoted_string(" \"hello\\\"world\" "),
Ok(("", "hello\"world".to_string()))
);
assert_eq!(
quoted_string("\"hello\r\n world\""),
Ok(("", "hello world".to_string()))
);
} }
} }

View file

@ -1,11 +1,11 @@
use std::collections::HashMap; use std::collections::HashMap;
use chrono::{DateTime, FixedOffset};
use crate::fragments::model::{MailboxRef,MessageId, AddressRef};
use crate::fragments::misc_token::{Unstructured, PhraseList};
use crate::fragments::trace::ReceivedLog;
use crate::fragments::eager::Field; use crate::fragments::eager::Field;
use crate::fragments::lazy; use crate::fragments::lazy;
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressRef, MailboxRef, MessageId};
use crate::fragments::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct Section<'a> { pub struct Section<'a> {
@ -26,7 +26,7 @@ pub struct Section<'a> {
pub msg_id: Option<&'a MessageId<'a>>, pub msg_id: Option<&'a MessageId<'a>>,
pub in_reply_to: Vec<&'a MessageId<'a>>, pub in_reply_to: Vec<&'a MessageId<'a>>,
pub references: Vec<&'a MessageId<'a>>, pub references: Vec<&'a MessageId<'a>>,
// 3.6.5. Informational Fields // 3.6.5. Informational Fields
pub subject: Option<&'a Unstructured>, pub subject: Option<&'a Unstructured>,
pub comments: Vec<&'a Unstructured>, pub comments: Vec<&'a Unstructured>,
@ -48,7 +48,7 @@ pub struct Section<'a> {
//@FIXME min and max limits are not enforced, //@FIXME min and max limits are not enforced,
// it may result in missing data or silently overriden data. // it may result in missing data or silently overriden data.
impl<'a> FromIterator<&'a Field<'a>> for Section<'a> { impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
fn from_iter<I: IntoIterator<Item=&'a Field<'a>>>(iter: I) -> Self { fn from_iter<I: IntoIterator<Item = &'a Field<'a>>>(iter: I) -> Self {
let mut section = Section::default(); let mut section = Section::default();
for field in iter { for field in iter {
match field { match field {
@ -67,11 +67,12 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
Field::Keywords(v) => section.keywords.push(v), Field::Keywords(v) => section.keywords.push(v),
Field::ReturnPath(v) => section.return_path.push(v), Field::ReturnPath(v) => section.return_path.push(v),
Field::Received(v) => section.received.push(v), Field::Received(v) => section.received.push(v),
Field::Optional(k, v) => { section.optional.insert(k, v); }, Field::Optional(k, v) => {
section.optional.insert(k, v);
}
Field::Rescue(v) => section.unparsed.push(v), Field::Rescue(v) => section.unparsed.push(v),
} }
} }
section section
} }
} }

View file

@ -1,13 +1,13 @@
use crate::error::IMFError;
use crate::fragments::{datetime, lazy, mailbox, misc_token, model, whitespace};
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
combinator::{map, opt, recognize}, combinator::{map, opt, recognize},
multi::many0, multi::many0,
sequence::tuple, sequence::tuple,
IResult,
}; };
use crate::fragments::{datetime, mailbox, model, misc_token, whitespace, lazy};
use crate::error::IMFError;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct ReceivedLog<'a>(pub &'a str); pub struct ReceivedLog<'a>(pub &'a str);
@ -29,15 +29,12 @@ pub fn received_body(input: &str) -> IResult<&str, &str> {
tag(";"), tag(";"),
datetime::section, datetime::section,
)), )),
|(tokens, _, _)| tokens, |(tokens, _, _)| tokens,
)(input) )(input)
} }
pub fn return_path_body(input: &str) -> IResult<&str, Option<model::MailboxRef>> { pub fn return_path_body(input: &str) -> IResult<&str, Option<model::MailboxRef>> {
alt(( alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
map(mailbox::angle_addr, |a| Some(a)),
empty_path
))(input)
} }
fn empty_path(input: &str) -> IResult<&str, Option<model::MailboxRef>> { fn empty_path(input: &str) -> IResult<&str, Option<model::MailboxRef>> {
@ -57,11 +54,10 @@ fn received_tokens(input: &str) -> IResult<&str, &str> {
recognize(mailbox::angle_addr), recognize(mailbox::angle_addr),
recognize(mailbox::addr_spec), recognize(mailbox::addr_spec),
recognize(mailbox::obs_domain), recognize(mailbox::obs_domain),
recognize(misc_token::word), recognize(misc_token::word),
))(input) ))(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -76,11 +72,14 @@ mod tests {
assert_eq!( assert_eq!(
received_body(hdrs), received_body(hdrs),
Ok(("", r#"from smtp.example.com ([10.83.2.2]) Ok((
"",
r#"from smtp.example.com ([10.83.2.2])
by server with LMTP by server with LMTP
id xxxxxxxxx id xxxxxxxxx
(envelope-from <gitlab@example.com>) (envelope-from <gitlab@example.com>)
for <me@example.com>"#)) for <me@example.com>"#
))
); );
} }
} }

View file

@ -1,13 +1,13 @@
use crate::fragments::quoted::quoted_pair;
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::{crlf, satisfy, space0, space1}, character::complete::{crlf, satisfy, space0, space1},
combinator::{recognize, opt}, combinator::{opt, recognize},
multi::{many0, many1}, multi::{many0, many1},
sequence::tuple, sequence::tuple,
IResult,
}; };
use crate::fragments::quoted::quoted_pair;
// --- whitespaces and comments // --- whitespaces and comments
@ -35,12 +35,11 @@ pub fn fws(input: &str) -> IResult<&str, char> {
Ok((input, ' ')) Ok((input, ' '))
} }
fn fold_marker(input: &str) -> IResult<&str, &str> { fn fold_marker(input: &str) -> IResult<&str, &str> {
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, _) = perm_crlf(input)?; let (input, _) = perm_crlf(input)?;
space1(input) space1(input)
} }
/// Folding White Space with Comment /// Folding White Space with Comment
/// ///
/// Note: we drop the comments for now... /// Note: we drop the comments for now...
@ -76,7 +75,7 @@ pub fn comment(input: &str) -> IResult<&str, ()> {
} }
pub fn ccontent(input: &str) -> IResult<&str, &str> { pub fn ccontent(input: &str) -> IResult<&str, &str> {
alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input) alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input)
} }
pub fn ctext(input: &str) -> IResult<&str, char> { pub fn ctext(input: &str) -> IResult<&str, char> {
@ -92,14 +91,17 @@ pub fn ctext(input: &str) -> IResult<&str, char> {
/// obs-ctext /// obs-ctext
///``` ///```
pub fn is_restr_ctext(c: char) -> bool { pub fn is_restr_ctext(c: char) -> bool {
(c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii() (c >= '\x21' && c <= '\x27')
|| (c >= '\x2A' && c <= '\x5B')
|| (c >= '\x5D' && c <= '\x7E')
|| !c.is_ascii()
} }
pub fn is_ctext(c: char) -> bool { pub fn is_ctext(c: char) -> bool {
is_restr_ctext(c) || is_obs_no_ws_ctl(c) is_restr_ctext(c) || is_obs_no_ws_ctl(c)
} }
/// US ASCII control characters without effect /// US ASCII control characters without effect
/// ///
/// ```abnf /// ```abnf
/// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control /// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
@ -109,7 +111,11 @@ pub fn is_ctext(c: char) -> bool {
/// %d127 ; white space characters /// %d127 ; white space characters
/// ``` /// ```
pub fn is_obs_no_ws_ctl(c: char) -> bool { pub fn is_obs_no_ws_ctl(c: char) -> bool {
(c >= '\x01' && c <= '\x08') || c == '\x0b' || c == '\x0b' || (c >= '\x0e' && c<= '\x1f') || c == '\x7F' (c >= '\x01' && c <= '\x08')
|| c == '\x0b'
|| c == '\x0b'
|| (c >= '\x0e' && c <= '\x1f')
|| c == '\x7F'
} }
#[cfg(test)] #[cfg(test)]
@ -133,8 +139,20 @@ mod tests {
#[test] #[test]
fn test_cfws() { fn test_cfws() {
assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) "))); assert_eq!(
assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)"))); cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"),
assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) "))); Ok((
"<pete(his account)@silly.test(his host)>",
"(A nice \\) chap) "
))
);
assert_eq!(
cfws("(Chris's host.)public.example>,"),
Ok(("public.example>,", "(Chris's host.)"))
);
assert_eq!(
cfws("(double (comment) is fun) wouch"),
Ok(("wouch", "(double (comment) is fun) "))
);
} }
} }

View file

@ -1,16 +1,15 @@
use crate::fragments::whitespace::cfws;
use nom::{ use nom::{
IResult,
bytes::complete::{tag, take_while1}, bytes::complete::{tag, take_while1},
combinator::{recognize, opt}, combinator::{opt, recognize},
multi::many0, multi::many0,
sequence::{delimited, pair}, sequence::{delimited, pair},
IResult,
}; };
use crate::fragments::whitespace::cfws;
/// VCHAR definition /// VCHAR definition
pub fn is_vchar(c: char) -> bool { pub fn is_vchar(c: char) -> bool {
(c >= '\x21' && c <= '\x7E') || !c.is_ascii() (c >= '\x21' && c <= '\x7E') || !c.is_ascii()
} }
/// Sequence of visible chars with the UTF-8 extension /// Sequence of visible chars with the UTF-8 extension
@ -23,7 +22,7 @@ pub fn is_vchar(c: char) -> bool {
///``` ///```
#[allow(dead_code)] #[allow(dead_code)]
pub fn vchar_seq(input: &str) -> IResult<&str, &str> { pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
take_while1(is_vchar)(input) take_while1(is_vchar)(input)
} }
/// Atom allowed characters /// Atom allowed characters
@ -31,7 +30,7 @@ fn is_atext(c: char) -> bool {
c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii() c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii()
} }
/// Atom /// Atom
/// ///
/// `[CFWS] 1*atext [CFWS]` /// `[CFWS] 1*atext [CFWS]`
pub fn atom(input: &str) -> IResult<&str, &str> { pub fn atom(input: &str) -> IResult<&str, &str> {
@ -42,7 +41,10 @@ pub fn atom(input: &str) -> IResult<&str, &str> {
/// ///
/// `1*atext *("." 1*atext)` /// `1*atext *("." 1*atext)`
pub fn dot_atom_text(input: &str) -> IResult<&str, &str> { pub fn dot_atom_text(input: &str) -> IResult<&str, &str> {
recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input) recognize(pair(
take_while1(is_atext),
many0(pair(tag("."), take_while1(is_atext))),
))(input)
} }
/// dot-atom /// dot-atom
@ -54,13 +56,19 @@ pub fn dot_atom(input: &str) -> IResult<&str, &str> {
#[allow(dead_code)] #[allow(dead_code)]
pub fn is_special(c: char) -> bool { pub fn is_special(c: char) -> bool {
c == '(' || c == ')' || c == '('
c == '<' || c == '>' || || c == ')'
c == '[' || c == ']' || || c == '<'
c == ':' || c == ';' || || c == '>'
c == '@' || c == '\\' || || c == '['
c == ',' || c == '.' || || c == ']'
c == '"' || c == ':'
|| c == ';'
|| c == '@'
|| c == '\\'
|| c == ','
|| c == '.'
|| c == '"'
} }
#[cfg(test)] #[cfg(test)]
@ -84,16 +92,25 @@ mod tests {
#[test] #[test]
fn test_atom() { fn test_atom() {
assert_eq!(atom("(skip) imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec"))); assert_eq!(
atom("(skip) imf_codec (hidden) aerogramme"),
Ok(("aerogramme", "imf_codec"))
);
} }
#[test] #[test]
fn test_dot_atom_text() { fn test_dot_atom_text() {
assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io"))); assert_eq!(
dot_atom_text("quentin.dufour.io abcdef"),
Ok((" abcdef", "quentin.dufour.io"))
);
} }
#[test] #[test]
fn test_dot_atom() { fn test_dot_atom() {
assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io"))); assert_eq!(
dot_atom(" (skip) quentin.dufour.io abcdef"),
Ok(("abcdef", "quentin.dufour.io"))
);
} }
} }

View file

@ -1,16 +1,16 @@
use nom::{ use nom::{
IResult,
character::complete::space1,
bytes::complete::is_not, bytes::complete::is_not,
character::complete::space1,
combinator::{all_consuming, recognize}, combinator::{all_consuming, recognize},
multi::{many0, many1}, multi::{many0, many1},
sequence::{pair, tuple}, sequence::{pair, tuple},
IResult,
}; };
use crate::error::IMFError; use crate::error::IMFError;
use crate::fragments::whitespace; use crate::fragments::whitespace;
use crate::multipass::guess_charset;
use crate::multipass::field_lazy; use crate::multipass::field_lazy;
use crate::multipass::guess_charset;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Parsed<'a> { pub struct Parsed<'a> {
@ -21,7 +21,10 @@ pub struct Parsed<'a> {
pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> { pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> {
all_consuming(many0(foldable_line))(&gcha.header) all_consuming(many0(foldable_line))(&gcha.header)
.map_err(|e| IMFError::ExtractFields(e)) .map_err(|e| IMFError::ExtractFields(e))
.map(|(_, fields)| Parsed { fields, body: gcha.body }) .map(|(_, fields)| Parsed {
fields,
body: gcha.body,
})
} }
impl<'a> Parsed<'a> { impl<'a> Parsed<'a> {
@ -35,11 +38,12 @@ impl<'a> Parsed<'a> {
/// ``` /// ```
fn foldable_line(input: &str) -> IResult<&str, &str> { fn foldable_line(input: &str) -> IResult<&str, &str> {
recognize(tuple(( recognize(tuple((
is_not("\r\n"), is_not("\r\n"),
many0(pair( many0(pair(
many1(pair(whitespace::perm_crlf, space1)), many1(pair(whitespace::perm_crlf, space1)),
is_not("\r\n"))), is_not("\r\n"),
whitespace::perm_crlf )),
whitespace::perm_crlf,
)))(input) )))(input)
} }

View file

@ -10,7 +10,8 @@ pub struct Parsed<'a> {
pub fn new<'a>(p: &'a field_lazy::Parsed<'a>) -> Parsed<'a> { pub fn new<'a>(p: &'a field_lazy::Parsed<'a>) -> Parsed<'a> {
Parsed { Parsed {
fields: p.fields fields: p
.fields
.iter() .iter()
.filter_map(|entry| entry.try_into().ok()) .filter_map(|entry| entry.try_into().ok())
.collect(), .collect(),
@ -33,47 +34,56 @@ mod tests {
#[test] #[test]
fn test_field_body() { fn test_field_body() {
assert_eq!(new(&field_lazy::Parsed { assert_eq!(
fields: vec![ new(&field_lazy::Parsed {
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")), fields: vec![
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), lazy::Field::From(lazy::MailboxList(
], "hello@world.com,\r\n\talice@wonderlands.com\r\n"
body: b"Hello world!", )),
}), lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
Parsed { ],
fields: vec![ body: b"Hello world!",
eager::Field::From(vec![ }),
model::MailboxRef { Parsed {
name: None, fields: vec![
addrspec: model::AddrSpec { eager::Field::From(vec![
local_part: "hello".into(), model::MailboxRef {
domain: "world.com".into() name: None,
} addrspec: model::AddrSpec {
}, local_part: "hello".into(),
model::MailboxRef { domain: "world.com".into()
name: None, }
addrspec: model::AddrSpec { },
local_part: "alice".into(), model::MailboxRef {
domain: "wonderlands.com".into() name: None,
} addrspec: model::AddrSpec {
}, local_part: "alice".into(),
]), domain: "wonderlands.com".into()
eager::Field::Date( }
FixedOffset::east_opt(0) },
.unwrap() ]),
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25) eager::Field::Date(
.unwrap() FixedOffset::east_opt(0)
), .unwrap()
], .with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
body: b"Hello world!", .unwrap()
}); ),
],
body: b"Hello world!",
}
);
} }
use crate::multipass::extract_fields;
use crate::fragments::misc_token; use crate::fragments::misc_token;
use crate::multipass::extract_fields;
fn lazy_eager<F>(input: &str, func: F) fn lazy_eager<F>(input: &str, func: F)
where F: Fn(&eager::Field) { where
let field = extract_fields::Parsed { fields: vec![input], body: b""}; F: Fn(&eager::Field),
{
let field = extract_fields::Parsed {
fields: vec![input],
body: b"",
};
let lazy = field_lazy::new(&field); let lazy = field_lazy::new(&field);
let eager = new(&lazy); let eager = new(&lazy);
func(eager.fields.first().unwrap()) func(eager.fields.first().unwrap())
@ -83,16 +93,18 @@ mod tests {
fn test_from() { fn test_from() {
lazy_eager( lazy_eager(
"From: \"Joe Q. Public\" <john.q.public@example.com>\r\n", "From: \"Joe Q. Public\" <john.q.public@example.com>\r\n",
|from| assert_eq!( |from| {
from, assert_eq!(
&eager::Field::From(vec![model::MailboxRef { from,
name: Some("Joe Q. Public".into()), &eager::Field::From(vec![model::MailboxRef {
addrspec: model::AddrSpec { name: Some("Joe Q. Public".into()),
local_part: "john.q.public".into(), addrspec: model::AddrSpec {
domain: "example.com".into(), local_part: "john.q.public".into(),
} domain: "example.com".into(),
}]) }
) }])
)
},
); );
} }
@ -100,16 +112,18 @@ mod tests {
fn test_sender() { fn test_sender() {
lazy_eager( lazy_eager(
"Sender: Michael Jones <mjones@machine.example>\r\n", "Sender: Michael Jones <mjones@machine.example>\r\n",
|sender| assert_eq!( |sender| {
sender, assert_eq!(
&eager::Field::Sender(model::MailboxRef { sender,
name: Some("Michael Jones".into()), &eager::Field::Sender(model::MailboxRef {
addrspec: model::AddrSpec { name: Some("Michael Jones".into()),
local_part: "mjones".into(), addrspec: model::AddrSpec {
domain: "machine.example".into(), local_part: "mjones".into(),
}, domain: "machine.example".into(),
}) },
) })
)
},
); );
} }
@ -117,18 +131,18 @@ mod tests {
fn test_reply_to() { fn test_reply_to() {
lazy_eager( lazy_eager(
"Reply-To: \"Mary Smith: Personal Account\" <smith@home.example>\r\n", "Reply-To: \"Mary Smith: Personal Account\" <smith@home.example>\r\n",
|reply_to| assert_eq!( |reply_to| {
reply_to, assert_eq!(
&eager::Field::ReplyTo( reply_to,
vec![model::AddressRef::Single(model::MailboxRef { &eager::Field::ReplyTo(vec![model::AddressRef::Single(model::MailboxRef {
name: Some("Mary Smith: Personal Account".into()), name: Some("Mary Smith: Personal Account".into()),
addrspec: model::AddrSpec { addrspec: model::AddrSpec {
local_part: "smith".into(), local_part: "smith".into(),
domain: "home.example".into(), domain: "home.example".into(),
}, },
})] })])
) )
) },
) )
} }
@ -136,177 +150,187 @@ mod tests {
fn test_to() { fn test_to() {
lazy_eager( lazy_eager(
"To: A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;\r\n", "To: A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;\r\n",
|to| assert_eq!( |to| {
to, assert_eq!(
&eager::Field::To(vec![model::AddressRef::Many(model::GroupRef { to,
name: "A Group".into(), &eager::Field::To(vec![model::AddressRef::Many(model::GroupRef {
participants: vec![ name: "A Group".into(),
model::MailboxRef { participants: vec![
name: Some("Ed Jones".into()), model::MailboxRef {
addrspec: model::AddrSpec { local_part: "c".into(), domain: "a.test".into() }, name: Some("Ed Jones".into()),
}, addrspec: model::AddrSpec {
model::MailboxRef { local_part: "c".into(),
name: None, domain: "a.test".into()
addrspec: model::AddrSpec { local_part: "joe".into(), domain: "where.test".into() }, },
}, },
model::MailboxRef { model::MailboxRef {
name: Some("John".into()), name: None,
addrspec: model::AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() }, addrspec: model::AddrSpec {
}, local_part: "joe".into(),
] domain: "where.test".into()
})]) },
) },
model::MailboxRef {
name: Some("John".into()),
addrspec: model::AddrSpec {
local_part: "jdoe".into(),
domain: "one.test".into()
},
},
]
})])
)
},
) )
} }
#[test] #[test]
fn test_cc() { fn test_cc() {
lazy_eager( lazy_eager("Cc: Undisclosed recipients:;\r\n", |cc| {
"Cc: Undisclosed recipients:;\r\n", assert_eq!(
|cc| assert_eq!( cc,
cc, &eager::Field::Cc(vec![model::AddressRef::Many(model::GroupRef {
&eager::Field::Cc(vec![model::AddressRef::Many(model::GroupRef { name: "Undisclosed recipients".into(),
name: "Undisclosed recipients".into(), participants: vec![],
participants: vec![], })]),
})]),
) )
) })
} }
#[test] #[test]
fn test_bcc() { fn test_bcc() {
lazy_eager( lazy_eager("Bcc: (empty)\r\n", |bcc| {
"Bcc: (empty)\r\n", assert_eq!(bcc, &eager::Field::Bcc(vec![]),)
|bcc| assert_eq!( });
bcc,
&eager::Field::Bcc(vec![]),
)
);
lazy_eager( lazy_eager("Bcc: \r\n", |bcc| {
"Bcc: \r\n", assert_eq!(bcc, &eager::Field::Bcc(vec![]),)
|bcc| assert_eq!( });
bcc,
&eager::Field::Bcc(vec![]),
)
);
} }
#[test] #[test]
fn test_message_id() { fn test_message_id() {
lazy_eager( lazy_eager("Message-ID: <310@[127.0.0.1]>\r\n", |msg_id| {
"Message-ID: <310@[127.0.0.1]>\r\n", assert_eq!(
|msg_id| assert_eq!( msg_id,
msg_id, &eager::Field::MessageID(model::MessageId {
&eager::Field::MessageID( left: "310",
model::MessageId { left: "310", right: "127.0.0.1" }, right: "127.0.0.1"
) },)
) )
) })
} }
#[test] #[test]
fn test_in_reply_to() { fn test_in_reply_to() {
lazy_eager( lazy_eager("In-Reply-To: <a@b> <c@example.com>\r\n", |irt| {
"In-Reply-To: <a@b> <c@example.com>\r\n", assert_eq!(
|irt| assert_eq!(
irt, irt,
&eager::Field::InReplyTo( &eager::Field::InReplyTo(vec![
vec![ model::MessageId {
model::MessageId { left: "a", right: "b" }, left: "a",
model::MessageId { left: "c", right: "example.com" }, right: "b"
] },
) model::MessageId {
left: "c",
right: "example.com"
},
])
) )
) })
} }
#[test] #[test]
fn test_references() { fn test_references() {
lazy_eager( lazy_eager(
"References: <1234@local.machine.example> <3456@example.net>\r\n", "References: <1234@local.machine.example> <3456@example.net>\r\n",
|refer| assert_eq!( |refer| {
refer, assert_eq!(
&eager::Field::References( refer,
vec![ &eager::Field::References(vec![
model::MessageId { left: "1234", right: "local.machine.example" }, model::MessageId {
model::MessageId { left: "3456", right: "example.net" }, left: "1234",
] right: "local.machine.example"
},
model::MessageId {
left: "3456",
right: "example.net"
},
])
) )
) },
) )
} }
#[test] #[test]
fn test_subject() { fn test_subject() {
lazy_eager( lazy_eager("Subject: Aérogramme\r\n", |subject| {
"Subject: Aérogramme\r\n", assert_eq!(
|subject| assert_eq!( subject,
subject, &eager::Field::Subject(misc_token::Unstructured("Aérogramme".into()))
&eager::Field::Subject(misc_token::Unstructured("Aérogramme".into()))
) )
) })
} }
#[test] #[test]
fn test_comments() { fn test_comments() {
lazy_eager( lazy_eager("Comments: 😛 easter egg!\r\n", |comments| {
"Comments: 😛 easter egg!\r\n", assert_eq!(
|comments| assert_eq!( comments,
comments, &eager::Field::Comments(misc_token::Unstructured("😛 easter egg!".into())),
&eager::Field::Comments(misc_token::Unstructured("😛 easter egg!".into())),
) )
) })
} }
#[test] #[test]
fn test_keywords() { fn test_keywords() {
lazy_eager( lazy_eager(
"Keywords: fantasque, farfelu, fanfreluche\r\n", "Keywords: fantasque, farfelu, fanfreluche\r\n",
|keywords| assert_eq!( |keywords| {
keywords, assert_eq!(
&eager::Field::Keywords(misc_token::PhraseList(vec![ keywords,
"fantasque".into(), &eager::Field::Keywords(misc_token::PhraseList(vec![
"farfelu".into(), "fantasque".into(),
"fanfreluche".into() "farfelu".into(),
])) "fanfreluche".into()
) ]))
)
},
) )
} }
//@FIXME non ported tests: //@FIXME non ported tests:
/* /*
#[test] #[test]
fn test_invalid_field_name() { fn test_invalid_field_name() {
assert!(known_field("Unknown: unknown\r\n").is_err()); assert!(known_field("Unknown: unknown\r\n").is_err());
} }
#[test] #[test]
fn test_rescue_field() { fn test_rescue_field() {
assert_eq!( assert_eq!(
rescue_field("Héron: élan\r\n\tnoël: test\r\nFrom: ..."), rescue_field("Héron: élan\r\n\tnoël: test\r\nFrom: ..."),
Ok(("From: ...", Field::Rescue("Héron: élan\r\n\tnoël: test"))), Ok(("From: ...", Field::Rescue("Héron: élan\r\n\tnoël: test"))),
); );
} }
#[test] #[test]
fn test_wrong_fields() { fn test_wrong_fields() {
let fullmail = r#"Return-Path: xoxo let fullmail = r#"Return-Path: xoxo
From: !!!! From: !!!!
Hello world"#; Hello world"#;
assert_eq!( assert_eq!(
section(fullmail), section(fullmail),
Ok(("Hello world", HeaderSection { Ok(("Hello world", HeaderSection {
bad_fields: vec![ bad_fields: vec![
Field::ReturnPath(FieldBody::Failed("xoxo")), Field::ReturnPath(FieldBody::Failed("xoxo")),
Field::From(FieldBody::Failed("!!!!")), Field::From(FieldBody::Failed("!!!!")),
], ],
..Default::default() ..Default::default()
})) }))
); );
} }
*/ */
} }

View file

@ -27,19 +27,23 @@ mod tests {
#[test] #[test]
fn test_field_name() { fn test_field_name() {
assert_eq!(new(&extract_fields::Parsed { assert_eq!(
fields: vec![ new(&extract_fields::Parsed {
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n", fields: vec![
"Date: 12 Mar 1997 07:33:25 Z\r\n", "From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
], "Date: 12 Mar 1997 07:33:25 Z\r\n",
body: b"Hello world!", ],
}), body: b"Hello world!",
Parsed { }),
fields: vec![ Parsed {
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")), fields: vec![
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), lazy::Field::From(lazy::MailboxList(
], "hello@world.com,\r\n\talice@wonderlands.com\r\n"
body: b"Hello world!", )),
}); lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
],
body: b"Hello world!",
}
);
} }
} }

View file

@ -1,10 +1,10 @@
use std::borrow::Cow;
use chardetng::EncodingDetector; use chardetng::EncodingDetector;
use encoding_rs::Encoding; use encoding_rs::Encoding;
use std::borrow::Cow;
use crate::error::IMFError; use crate::error::IMFError;
use crate::multipass::segment;
use crate::multipass::extract_fields; use crate::multipass::extract_fields;
use crate::multipass::segment;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Parsed<'a> { pub struct Parsed<'a> {
@ -26,11 +26,11 @@ pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> {
// Get encoding // Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8); let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, encoding, malformed) = enc.decode(&seg.header); let (header, encoding, malformed) = enc.decode(&seg.header);
Parsed { Parsed {
header, header,
encoding, encoding,
malformed, malformed,
body: seg.body body: seg.body,
} }
} }
@ -48,15 +48,15 @@ mod tests {
fn test_charset() { fn test_charset() {
assert_eq!( assert_eq!(
new(&segment::Parsed { new(&segment::Parsed {
body: b"Hello world!", body: b"Hello world!",
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
} }),
),
Parsed { Parsed {
header: "From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(), header: "From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(),
encoding: encoding_rs::UTF_8, encoding: encoding_rs::UTF_8,
malformed: false, malformed: false,
body: b"Hello world!", body: b"Hello world!",
}); }
);
} }
} }

View file

@ -23,60 +23,64 @@ mod tests {
#[test] #[test]
fn test_section() { fn test_section() {
assert_eq!(new(&field_eager::Parsed { assert_eq!(
fields: vec![ new(&field_eager::Parsed {
eager::Field::From(vec![ fields: vec![
model::MailboxRef { eager::Field::From(vec![
name: None, model::MailboxRef {
addrspec: model::AddrSpec { name: None,
local_part: "hello".into(), addrspec: model::AddrSpec {
domain: "world.com".into() local_part: "hello".into(),
} domain: "world.com".into()
}, }
model::MailboxRef { },
name: None, model::MailboxRef {
addrspec: model::AddrSpec { name: None,
local_part: "alice".into(), addrspec: model::AddrSpec {
domain: "wonderlands.com".into() local_part: "alice".into(),
} domain: "wonderlands.com".into()
}, }
]), },
eager::Field::Date( ]),
FixedOffset::east_opt(0) eager::Field::Date(
.unwrap() FixedOffset::east_opt(0)
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25) .unwrap()
.unwrap() .with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
), .unwrap()
], ),
body: b"Hello world!",
}),
Parsed {
fields: Section {
from: vec![
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
], ],
body: b"Hello world!",
}),
Parsed {
fields: Section {
from: vec![
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
],
date: Some(&FixedOffset::east_opt(0) date: Some(
.unwrap() &FixedOffset::east_opt(0)
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25) .unwrap()
.unwrap()), .with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
..Default::default() ..Default::default()
}, },
body: b"Hello world!", body: b"Hello world!",
}); }
);
} }
} }

View file

@ -1,6 +1,6 @@
pub mod segment;
pub mod guess_charset;
pub mod extract_fields; pub mod extract_fields;
pub mod field_lazy;
pub mod field_eager; pub mod field_eager;
pub mod field_lazy;
pub mod guess_charset;
pub mod header_section; pub mod header_section;
pub mod segment;

View file

@ -1,14 +1,14 @@
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{is_not, tag}, bytes::complete::{is_not, tag},
combinator::recognize, combinator::recognize,
sequence::{pair, terminated},
multi::many0, multi::many0,
sequence::{pair, terminated},
IResult,
}; };
use crate::multipass::guess_charset;
use crate::error::IMFError; use crate::error::IMFError;
use crate::multipass::guess_charset;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Parsed<'a> { pub struct Parsed<'a> {
@ -21,10 +21,7 @@ const LF: u8 = 0x0A;
const CRLF: &[u8] = &[CR, LF]; const CRLF: &[u8] = &[CR, LF];
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> { pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
terminated( terminated(recognize(many0(line)), obs_crlf)(buffer)
recognize(many0(line)),
obs_crlf
)(buffer)
.map_err(|e| IMFError::Segment(e)) .map_err(|e| IMFError::Segment(e))
.map(|(body, header)| Parsed { header, body }) .map(|(body, header)| Parsed { header, body })
} }
@ -36,10 +33,7 @@ impl<'a> Parsed<'a> {
} }
fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
pair( pair(is_not(CRLF), obs_crlf)(input)
is_not(CRLF),
obs_crlf,
)(input)
} }
fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> { fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
@ -56,7 +50,7 @@ mod tests {
new(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]), new(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]),
Ok(Parsed { Ok(Parsed {
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
body: b"Hello world!", body: b"Hello world!",
}) })
); );
} }

View file

@ -1,10 +1,12 @@
use imf_codec::multipass::segment;
use imf_codec::fragments::section::Section; use imf_codec::fragments::section::Section;
use imf_codec::multipass::segment;
use std::io; use std::io;
use std::io::Read; use std::io::Read;
fn parser<'a, F>(input: &'a [u8], func: F) -> () fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&Section) -> () { where
F: FnOnce(&Section) -> (),
{
let seg = segment::new(input).unwrap(); let seg = segment::new(input).unwrap();
let charset = seg.charset(); let charset = seg.charset();
let fields = charset.fields().unwrap(); let fields = charset.fields().unwrap();

View file

@ -1,13 +1,15 @@
use imf_codec::fragments::section;
use imf_codec::multipass;
use std::collections::HashSet; use std::collections::HashSet;
use std::path::PathBuf;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::Read;
use imf_codec::multipass; use std::path::PathBuf;
use imf_codec::fragments::section;
use walkdir::WalkDir; use walkdir::WalkDir;
fn parser<'a, F>(input: &'a [u8], func: F) -> () fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&section::Section) -> () { where
F: FnOnce(&section::Section) -> (),
{
let seg = multipass::segment::new(input).unwrap(); let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset(); let charset = seg.charset();
let fields = charset.fields().unwrap(); let fields = charset.fields().unwrap();
@ -27,51 +29,44 @@ fn test_enron500k() {
//d.push("williams-w3/"); //d.push("williams-w3/");
let known_bad_fields = HashSet::from([ let known_bad_fields = HashSet::from([
"white-s/calendar/113.", // To: east <7..> "white-s/calendar/113.", // To: east <7..>
"skilling-j/inbox/223.", // From: pep <performance.>
"skilling-j/inbox/223.", // From: pep <performance.>
"jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com> "jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
"jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com> "jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com>
"lokey-t/calendar/33.", // A second Date entry for the calendar containing
"lokey-t/calendar/33.", // A second Date entry for the calendar containing // Date: Monday, March 12
// Date: Monday, March 12 "zipper-a/inbox/199.", // To: e-mail <mari.>
"dasovich-j/deleted_items/128.", // To: f62489 <g>
"zipper-a/inbox/199.", // To: e-mail <mari.> "dasovich-j/all_documents/677.", // To: w/assts <govt.>
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com>
"dasovich-j/deleted_items/128.", // To: f62489 <g> "dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/677.", // To: w/assts <govt.> "dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com> "dasovich-j/all_documents/578.", // To: w/assts <govt.>
"dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com> "dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com> "dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/all_documents/578.", // To: w/assts <govt.> "dasovich-j/risk_analytics/3.", // To: w/assts <govt.>
"dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com> "dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com> "dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/risk_analytics/3.", // To: w/assts <govt.> "dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com> "dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com> "dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com> "dasovich-j/notes_inbox/5.", // To: w/assts <govt.>
"dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com> "kaminski-v/sites/19.", // To: <"the.desk":@enron.com>
"dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com> "kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
"dasovich-j/notes_inbox/5.", // To: w/assts <govt.> "kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
"kaminski-v/sites/19.", // To: <"the.desk":@enron.com> "kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
"kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.> "kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
"kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.> "kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.> "kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com "kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
"kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com> "kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
"kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com> "kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com>
"kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com> "kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/1.", // To: <"the.desk":@enron.com> "kaminski-v/technical/1.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/7.", // To: <"the.desk":@enron.com> "kaminski-v/technical/7.", // To: <"the.desk":@enron.com>
"kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.> "kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.> "kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
"kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com "kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/archiving/untitled/1688.", // To: w/assts <govt.> "kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
"kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com "kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
@ -79,11 +74,10 @@ fn test_enron500k() {
"kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com "kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/discussion_threads/577.", // To: w/assts <govt.> "kean-s/discussion_threads/577.", // To: w/assts <govt.>
"kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com "kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/calendar/untitled/640.", // To: w/assts <govt.> "kean-s/calendar/untitled/640.", // To: w/assts <govt.>
"kean-s/all_documents/640.", // To: w/assts <govt.> "kean-s/all_documents/640.", // To: w/assts <govt.>
"kean-s/all_documents/1095.", // To: w/assts <govt.> "kean-s/all_documents/1095.", // To: w/assts <govt.>
"kean-s/attachments/2030.", // To: w/assts <govt.> "kean-s/attachments/2030.", // To: w/assts <govt.>
"williams-w3/operations_committee_isas/10.", // To: z34655 <m> "williams-w3/operations_committee_isas/10.", // To: z34655 <m>
]); ]);
@ -92,7 +86,10 @@ fn test_enron500k() {
]); ]);
let mut i = 0; let mut i = 0;
for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) { for entry in WalkDir::new(d.as_path())
.into_iter()
.filter_map(|file| file.ok())
{
if entry.metadata().unwrap().is_file() { if entry.metadata().unwrap().is_file() {
let mail_path = entry.path(); let mail_path = entry.path();
let suffix = &mail_path.to_str().unwrap()[prefix_sz..]; let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
@ -127,6 +124,6 @@ fn test_enron500k() {
println!("Analyzed emails: {}", i); println!("Analyzed emails: {}", i);
} }
}) })
} }
} }
} }

View file

@ -1,10 +1,12 @@
use chrono::{FixedOffset, TimeZone}; use chrono::{FixedOffset, TimeZone};
use std::collections::HashMap; use imf_codec::fragments::{misc_token, model, section, trace};
use imf_codec::multipass; use imf_codec::multipass;
use imf_codec::fragments::{model, misc_token, trace, section}; use std::collections::HashMap;
fn parser<'a, F>(input: &'a [u8], func: F) -> () fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&section::Section) -> () { where
F: FnOnce(&section::Section) -> (),
{
let seg = multipass::segment::new(input).unwrap(); let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset(); let charset = seg.charset();
let fields = charset.fields().unwrap(); let fields = charset.fields().unwrap();
@ -48,29 +50,35 @@ References: <1234@local.machine.example>
Unknown: unknown Unknown: unknown
This is a reply to your hello. This is a reply to your hello.
"#.as_bytes(); "#
parser(fullmail, |parsed_section| .as_bytes();
parser(fullmail, |parsed_section| {
assert_eq!( assert_eq!(
parsed_section, parsed_section,
&section::Section { &section::Section {
date: Some(&FixedOffset::east_opt(2 * 3600) date: Some(
.unwrap() &FixedOffset::east_opt(2 * 3600)
.with_ymd_and_hms(2023, 06, 13, 10, 01, 10) .unwrap()
.unwrap()), .with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
.unwrap()
),
from: vec![&model::MailboxRef { from: vec![
name: Some("Mary Smith".into()), &model::MailboxRef {
addrspec: model::AddrSpec { name: Some("Mary Smith".into()),
local_part: "mary".into(), addrspec: model::AddrSpec {
domain: "example.net".into(), local_part: "mary".into(),
domain: "example.net".into(),
}
},
&model::MailboxRef {
name: Some("Alan".into()),
addrspec: model::AddrSpec {
local_part: "alan".into(),
domain: "example".into(),
}
} }
}, &model::MailboxRef { ],
name: Some("Alan".into()),
addrspec: model::AddrSpec {
local_part: "alan".into(),
domain: "example".into(),
}
}],
sender: Some(&model::MailboxRef { sender: Some(&model::MailboxRef {
name: None, name: None,
@ -106,33 +114,41 @@ This is a reply to your hello.
bcc: vec![], bcc: vec![],
msg_id: Some(&model::MessageId { left: "3456", right: "example.net" }), msg_id: Some(&model::MessageId {
in_reply_to: vec![&model::MessageId { left: "1234", right: "local.machine.example" }], left: "3456",
references: vec![&model::MessageId { left: "1234", right: "local.machine.example" }], right: "example.net"
}),
in_reply_to: vec![&model::MessageId {
left: "1234",
right: "local.machine.example"
}],
references: vec![&model::MessageId {
left: "1234",
right: "local.machine.example"
}],
subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())), subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
comments: vec![ comments: vec![
&misc_token::Unstructured("A simple message".into()), &misc_token::Unstructured("A simple message".into()),
&misc_token::Unstructured("Not that complicated".into()), &misc_token::Unstructured("Not that complicated".into()),
&misc_token::Unstructured("not valid header name but should be accepted by the parser.".into()), &misc_token::Unstructured(
"not valid header name but should be accepted by the parser.".into()
),
], ],
keywords: vec![ keywords: vec![
&misc_token::PhraseList(vec![ &misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
"hello".into(), &misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
"world".into(),
]),
&misc_token::PhraseList(vec![
"salut".into(),
"le".into(),
"monde".into(),
]),
], ],
received: vec![ received: vec![&trace::ReceivedLog(
&trace::ReceivedLog("from smtp.example.com ([10.83.2.2])\n\tby doradille with LMTP\n\tid xyzabcd\n\t(envelope-from <gitlab@example.com>)\n\tfor <quentin@example.com>") r#"from smtp.example.com ([10.83.2.2])
], by doradille with LMTP
id xyzabcd
(envelope-from <gitlab@example.com>)
for <quentin@example.com>"#
)],
return_path: vec![&model::MailboxRef { return_path: vec![&model::MailboxRef {
name: None, name: None,
@ -143,8 +159,11 @@ This is a reply to your hello.
}], }],
optional: HashMap::from([ optional: HashMap::from([
("Delivered-To", &misc_token::Unstructured("quentin@example.com".into())), (
("Unknown", &misc_token::Unstructured("unknown".into())), "Delivered-To",
&misc_token::Unstructured("quentin@example.com".into())
),
("Unknown", &misc_token::Unstructured("unknown".into())),
]), ]),
bad_fields: vec![], bad_fields: vec![],
@ -155,5 +174,5 @@ This is a reply to your hello.
], ],
} }
) )
) })
} }