cargo fmt

This commit is contained in:
Quentin 2023-06-22 15:08:50 +02:00
parent 99c6490eb2
commit 5fda64477c
Signed by: quentin
GPG key ID: E9602264D639FF68
24 changed files with 1173 additions and 728 deletions

View file

@ -1,18 +1,18 @@
use nom::{
IResult,
branch::alt,
bytes::complete::tag,
combinator::{into, opt},
multi::separated_list1,
sequence::tuple,
IResult,
};
use crate::error::IMFError;
use crate::fragments::lazy;
use crate::fragments::model::{GroupRef, AddressRef, MailboxRef, MailboxList, AddressList};
use crate::fragments::mailbox::mailbox;
use crate::fragments::misc_token::phrase;
use crate::fragments::whitespace::{cfws};
use crate::error::IMFError;
use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef};
use crate::fragments::whitespace::cfws;
impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>;
@ -70,13 +70,16 @@ pub fn address(input: &str) -> IResult<&str, AddressRef> {
/// display-name = phrase
/// ```
pub fn group(input: &str) -> IResult<&str, GroupRef> {
let (input, (grp_name, _, grp_list, _, _)) =
let (input, (grp_name, _, grp_list, _, _)) =
tuple((phrase, tag(":"), opt(group_list), tag(";"), opt(cfws)))(input)?;
Ok((input, GroupRef {
name: grp_name,
participants: grp_list.unwrap_or(vec![]),
}))
Ok((
input,
GroupRef {
name: grp_name,
participants: grp_list.unwrap_or(vec![]),
},
))
}
/// Group list
@ -128,7 +131,9 @@ mod tests {
_ => panic!(),
};
match mailbox_list(r#"Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>, <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>"#) {
match mailbox_list(
r#"Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>, <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>"#,
) {
Ok(("", _)) => (),
_ => panic!(),
};
@ -137,30 +142,47 @@ mod tests {
#[test]
fn test_address_list() {
assert_eq!(
address_list(r#"A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;, Mary Smith <mary@x.test>"#),
Ok(("", vec![
AddressRef::Many(GroupRef {
name: "A Group".to_string(),
participants: vec![
MailboxRef {
name: Some("Ed Jones".into()),
addrspec: AddrSpec { local_part: "c".into(), domain: "a.test".into() },
address_list(
r#"A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;, Mary Smith <mary@x.test>"#
),
Ok((
"",
vec![
AddressRef::Many(GroupRef {
name: "A Group".to_string(),
participants: vec![
MailboxRef {
name: Some("Ed Jones".into()),
addrspec: AddrSpec {
local_part: "c".into(),
domain: "a.test".into()
},
},
MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "joe".into(),
domain: "where.test".into()
},
},
MailboxRef {
name: Some("John".into()),
addrspec: AddrSpec {
local_part: "jdoe".into(),
domain: "one.test".into()
},
},
],
}),
AddressRef::Single(MailboxRef {
name: Some("Mary Smith".into()),
addrspec: AddrSpec {
local_part: "mary".into(),
domain: "x.test".into()
},
MailboxRef {
name: None,
addrspec: AddrSpec { local_part: "joe".into(), domain: "where.test".into() },
},
MailboxRef {
name: Some("John".into()),
addrspec: AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() },
},
],
}),
AddressRef::Single(MailboxRef {
name: Some("Mary Smith".into()),
addrspec: AddrSpec { local_part: "mary".into(), domain: "x.test".into() },
}),
]))
}),
]
))
);
}
}

View file

@ -1,16 +1,16 @@
use crate::error::IMFError;
use crate::fragments::lazy;
use crate::fragments::whitespace::{cfws, fws};
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime};
use nom::{
IResult,
branch::alt,
bytes::complete::{tag, tag_no_case, take_while_m_n, is_a},
bytes::complete::{is_a, tag, tag_no_case, take_while_m_n},
character,
character::complete::{one_of, alphanumeric1, digit0},
character::complete::{alphanumeric1, digit0, one_of},
combinator::{map, opt, value},
sequence::{preceded, terminated, tuple, delimited },
sequence::{delimited, preceded, terminated, tuple},
IResult,
};
use crate::fragments::lazy;
use crate::fragments::whitespace::{fws, cfws};
use crate::error::IMFError;
const MIN: i32 = 60;
const HOUR: i32 = 60 * MIN;
@ -43,20 +43,31 @@ impl<'a> TryFrom<&'a lazy::DateTime<'a>> for DateTime<FixedOffset> {
/// due to an error in RFC0822 but are interpreted as their respective
/// timezone according to the RFC5322 definition
pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> {
map(terminated(
map(
terminated(
alt((
tuple((opt(terminated(strict_day_of_week, tag(","))), strict_date, strict_time_of_day, strict_zone )),
tuple((opt(terminated(obs_day_of_week, tag(","))), obs_date, obs_time_of_day, alt((strict_zone, obs_zone)) )),
tuple((
opt(terminated(strict_day_of_week, tag(","))),
strict_date,
strict_time_of_day,
strict_zone,
)),
tuple((
opt(terminated(obs_day_of_week, tag(","))),
obs_date,
obs_time_of_day,
alt((strict_zone, obs_zone)),
)),
)),
opt(cfws)
), |res| {
match res {
(_, Some(date), Some(time), Some(tz)) => {
date.and_time(time).and_local_timezone(tz).earliest()
},
_ => None,
opt(cfws),
),
|res| match res {
(_, Some(date), Some(time), Some(tz)) => {
date.and_time(time).and_local_timezone(tz).earliest()
}
})(input)
_ => None,
},
)(input)
}
/// day-of-week = ([FWS] day-name) / obs-day-of-week
@ -85,18 +96,16 @@ fn day_name(input: &str) -> IResult<&str, &str> {
/// date = day month year
fn strict_date(input: &str) -> IResult<&str, Option<NaiveDate>> {
map(
tuple((strict_day, month, strict_year)),
|(d, m, y)| NaiveDate::from_ymd_opt(y, m, d)
)(input)
map(tuple((strict_day, month, strict_year)), |(d, m, y)| {
NaiveDate::from_ymd_opt(y, m, d)
})(input)
}
/// date = day month year
fn obs_date(input: &str) -> IResult<&str, Option<NaiveDate>> {
map(
tuple((obs_day, month, obs_year)),
|(d, m, y)| NaiveDate::from_ymd_opt(y, m, d)
)(input)
map(tuple((obs_day, month, obs_year)), |(d, m, y)| {
NaiveDate::from_ymd_opt(y, m, d)
})(input)
}
/// day = ([FWS] 1*2DIGIT FWS) / obs-day
@ -132,45 +141,63 @@ fn month(input: &str) -> IResult<&str, u32> {
/// year = (FWS 4*DIGIT FWS) / obs-year
fn strict_year(input: &str) -> IResult<&str, i32> {
delimited(
fws,
fws,
map(
terminated(take_while_m_n(4,9,|c| c >= '\x30' && c <= '\x39'), digit0),
|d: &str| d.parse::<i32>().unwrap()),
terminated(take_while_m_n(4, 9, |c| c >= '\x30' && c <= '\x39'), digit0),
|d: &str| d.parse::<i32>().unwrap(),
),
fws,
)(input)
}
/// obs-year = [CFWS] 2*DIGIT [CFWS]
fn obs_year(input: &str) -> IResult<&str, i32> {
map(delimited(
opt(cfws),
terminated(take_while_m_n(2,7,|c| c >= '\x30' && c <= '\x39'), digit0),
opt(cfws)
), |cap: &str| {
let d = cap.parse::<i32>().unwrap();
if d >= 0 && d <= 49 {
2000 + d
} else if d >= 50 && d <= 999 {
1900 + d
} else {
d
}
})(input)
map(
delimited(
opt(cfws),
terminated(take_while_m_n(2, 7, |c| c >= '\x30' && c <= '\x39'), digit0),
opt(cfws),
),
|cap: &str| {
let d = cap.parse::<i32>().unwrap();
if d >= 0 && d <= 49 {
2000 + d
} else if d >= 50 && d <= 999 {
1900 + d
} else {
d
}
},
)(input)
}
/// time-of-day = hour ":" minute [ ":" second ]
fn strict_time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
map(
tuple((strict_time_digit, tag(":"), strict_time_digit, opt(preceded(tag(":"), strict_time_digit)))),
|(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)),
tuple((
strict_time_digit,
tag(":"),
strict_time_digit,
opt(preceded(tag(":"), strict_time_digit)),
)),
|(hour, _, minute, maybe_sec)| {
NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0))
},
)(input)
}
/// time-of-day = hour ":" minute [ ":" second ]
fn obs_time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
map(
tuple((obs_time_digit, tag(":"), obs_time_digit, opt(preceded(tag(":"), obs_time_digit)))),
|(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)),
tuple((
obs_time_digit,
tag(":"),
obs_time_digit,
opt(preceded(tag(":"), obs_time_digit)),
)),
|(hour, _, minute, maybe_sec)| {
NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0))
},
)(input)
}
@ -189,15 +216,21 @@ fn obs_time_digit(input: &str) -> IResult<&str, u32> {
/// ```
fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
map(
tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))),
tuple((
opt(fws),
is_a("+-"),
take_while_m_n(2, 2, |c| c >= '\x30' && c <= '\x39'),
take_while_m_n(2, 2, |c| c >= '\x30' && c <= '\x39'),
)),
|(_, op, dig_zone_hour, dig_zone_min)| {
let zone_hour = dig_zone_hour.parse::<i32>().unwrap() * HOUR;
let zone_min = dig_zone_min.parse::<i32>().unwrap() * MIN;
match op {
"+" => FixedOffset::east_opt(zone_hour + zone_min),
"-" => FixedOffset::west_opt(zone_hour + zone_min),
_ => unreachable!(), }
}
_ => unreachable!(),
}
},
)(input)
}
@ -216,7 +249,7 @@ fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
/// %d97-105 / ; through "Z", both
/// %d107-122 / ; upper and lower case
/// ;
/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones
/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones
fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
// The writing of this function is volontarily verbose
// to keep it straightforward to understand.
@ -227,18 +260,27 @@ fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
opt(fws),
alt((
// Legacy UTC/GMT
value(FixedOffset::west_opt(0 * HOUR), alt((tag("UTC"), tag("UT"), tag("GMT")))),
value(
FixedOffset::west_opt(0 * HOUR),
alt((tag("UTC"), tag("UT"), tag("GMT"))),
),
// USA Timezones
value(FixedOffset::west_opt(4 * HOUR), tag("EDT")),
value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))),
value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))),
value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))),
value(
FixedOffset::west_opt(5 * HOUR),
alt((tag("EST"), tag("CDT"))),
),
value(
FixedOffset::west_opt(6 * HOUR),
alt((tag("CST"), tag("MDT"))),
),
value(
FixedOffset::west_opt(7 * HOUR),
alt((tag("MST"), tag("PDT"))),
),
value(FixedOffset::west_opt(8 * HOUR), tag("PST")),
// Military Timezone UTC
value(FixedOffset::west_opt(0 * HOUR), tag("Z")),
// Military Timezones East
map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c {
'A' | 'a' => FixedOffset::east_opt(1 * HOUR),
@ -255,7 +297,6 @@ fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
'M' | 'm' => FixedOffset::east_opt(12 * HOUR),
_ => unreachable!(),
}),
// Military Timezones West
map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c {
'N' | 'n' => FixedOffset::west_opt(1 * HOUR),
@ -272,7 +313,6 @@ fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
'Y' | 'y' => FixedOffset::west_opt(12 * HOUR),
_ => unreachable!(),
}),
// Unknown timezone
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
)),
@ -284,12 +324,19 @@ mod tests {
use super::*;
use chrono::TimeZone;
#[test]
fn test_section_rfc_strict() {
assert_eq!(
section("Fri, 21 Nov 1997 09:55:06 -0600"),
Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))),
section("Fri, 21 Nov 1997 09:55:06 -0600"),
Ok((
"",
Some(
FixedOffset::west_opt(6 * HOUR)
.unwrap()
.with_ymd_and_hms(1997, 11, 21, 9, 55, 6)
.unwrap()
)
)),
);
}
@ -297,7 +344,15 @@ mod tests {
fn test_section_received() {
assert_eq!(
section("Sun, 18 Jun 2023 15:39:08 +0200 (CEST)"),
Ok(("", Some(FixedOffset::east_opt(2 * HOUR).unwrap().with_ymd_and_hms(2023, 6, 18, 15, 39, 8).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(2 * HOUR)
.unwrap()
.with_ymd_and_hms(2023, 6, 18, 15, 39, 8)
.unwrap()
)
)),
);
}
@ -310,8 +365,17 @@ mod tests {
Feb
1969
23:32
-0330 (Newfoundland Time)"#),
Ok(("", Some(FixedOffset::west_opt(3 * HOUR + 30 * MIN).unwrap().with_ymd_and_hms(1969, 2, 13, 23, 32, 00).unwrap()))),
-0330 (Newfoundland Time)"#
),
Ok((
"",
Some(
FixedOffset::west_opt(3 * HOUR + 30 * MIN)
.unwrap()
.with_ymd_and_hms(1969, 2, 13, 23, 32, 00)
.unwrap()
)
)),
);
}
@ -319,7 +383,15 @@ mod tests {
fn test_section_rfc_obs() {
assert_eq!(
section("21 Nov 97 09:55:06 GMT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 11, 21, 9, 55, 6)
.unwrap()
)
)),
);
}
@ -327,7 +399,15 @@ mod tests {
fn test_section_3digit_year() {
assert_eq!(
section("21 Nov 103 09:55:06 UT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2003, 11, 21, 9, 55, 6).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2003, 11, 21, 9, 55, 6)
.unwrap()
)
)),
);
}
@ -335,7 +415,15 @@ mod tests {
fn test_section_rfc_obs_ws() {
assert_eq!(
section("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600"),
Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(1997, 11, 21, 9, 55, 6).unwrap()))),
Ok((
"",
Some(
FixedOffset::west_opt(6 * HOUR)
.unwrap()
.with_ymd_and_hms(1997, 11, 21, 9, 55, 6)
.unwrap()
)
)),
);
}
@ -343,55 +431,133 @@ mod tests {
fn test_section_2digit_year() {
assert_eq!(
section("21 Nov 23 09:55:06Z"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 9, 55, 6).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 9, 55, 6)
.unwrap()
)
)),
);
}
#[test]
fn test_section_military_zone_east() {
["a", "B", "c", "D", "e", "F", "g", "H", "i", "K", "l", "M"].iter().enumerate().for_each(|(i, x)| {
assert_eq!(
section(format!("1 Jan 22 08:00:00 {}", x).as_str()),
Ok(("", Some(FixedOffset::east_opt((i as i32 + 1) * HOUR).unwrap().with_ymd_and_hms(2022, 01, 01, 8, 0, 0).unwrap())))
);
});
["a", "B", "c", "D", "e", "F", "g", "H", "i", "K", "l", "M"]
.iter()
.enumerate()
.for_each(|(i, x)| {
assert_eq!(
section(format!("1 Jan 22 08:00:00 {}", x).as_str()),
Ok((
"",
Some(
FixedOffset::east_opt((i as i32 + 1) * HOUR)
.unwrap()
.with_ymd_and_hms(2022, 01, 01, 8, 0, 0)
.unwrap()
)
))
);
});
}
#[test]
fn test_section_military_zone_west() {
["N", "O", "P", "q", "r", "s", "T", "U", "V", "w", "x", "y"].iter().enumerate().for_each(|(i, x)| {
assert_eq!(
section(format!("1 Jan 22 08:00:00 {}", x).as_str()),
Ok(("", Some(FixedOffset::west_opt((i as i32 + 1) * HOUR).unwrap().with_ymd_and_hms(2022, 01, 01, 8, 0, 0).unwrap())))
);
});
["N", "O", "P", "q", "r", "s", "T", "U", "V", "w", "x", "y"]
.iter()
.enumerate()
.for_each(|(i, x)| {
assert_eq!(
section(format!("1 Jan 22 08:00:00 {}", x).as_str()),
Ok((
"",
Some(
FixedOffset::west_opt((i as i32 + 1) * HOUR)
.unwrap()
.with_ymd_and_hms(2022, 01, 01, 8, 0, 0)
.unwrap()
)
))
);
});
}
#[test]
fn test_section_gmt() {
assert_eq!(
section("21 Nov 2023 07:07:07 +0000"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
);
assert_eq!(
section("21 Nov 2023 07:07:07 -0000"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
);
assert_eq!(
section("21 Nov 2023 07:07:07 Z"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
);
assert_eq!(
section("21 Nov 2023 07:07:07 GMT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
);
assert_eq!(
section("21 Nov 2023 07:07:07 UT"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
);
assert_eq!(
section("21 Nov 2023 07:07:07 UTC"),
Ok(("", Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 11, 21, 7, 7, 7).unwrap()))),
Ok((
"",
Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 7, 7, 7)
.unwrap()
)
)),
);
}
@ -399,7 +565,15 @@ mod tests {
fn test_section_usa() {
assert_eq!(
section("21 Nov 2023 4:4:4 CST"),
Ok(("", Some(FixedOffset::west_opt(6 * HOUR).unwrap().with_ymd_and_hms(2023, 11, 21, 4, 4, 4).unwrap()))),
Ok((
"",
Some(
FixedOffset::west_opt(6 * HOUR)
.unwrap()
.with_ymd_and_hms(2023, 11, 21, 4, 4, 4)
.unwrap()
)
)),
);
}
}

View file

@ -1,11 +1,9 @@
use chrono::{DateTime, FixedOffset};
use crate::fragments::model::{
MailboxList, MailboxRef, AddressList,
MessageId, MessageIdList};
use crate::fragments::misc_token::{Unstructured, PhraseList};
use crate::fragments::trace::ReceivedLog;
use crate::fragments::lazy::Field as Lazy;
use crate::error::IMFError;
use crate::fragments::lazy::Field as Lazy;
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList};
use crate::fragments::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq)]
pub enum Field<'a> {

View file

@ -1,18 +1,18 @@
use nom::{
IResult,
branch::alt,
bytes::complete::{take_while, tag},
bytes::complete::{tag, take_while},
combinator::opt,
multi::many1,
sequence::{delimited, pair, tuple},
IResult,
};
use crate::error::IMFError;
use crate::fragments::lazy;
use crate::fragments::whitespace::cfws;
use crate::fragments::words::dot_atom_text;
use crate::fragments::mailbox::is_dtext;
use crate::fragments::model::{MessageId, MessageIdList};
use crate::error::IMFError;
use crate::fragments::whitespace::cfws;
use crate::fragments::words::dot_atom_text;
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
type Error = IMFError<'a>;
@ -45,12 +45,12 @@ pub fn msg_id(input: &str) -> IResult<&str, MessageId> {
tuple((id_left, tag("@"), id_right)),
pair(tag(">"), opt(cfws)),
)(input)?;
Ok((input, MessageId{ left, right }))
Ok((input, MessageId { left, right }))
}
// Missing obsolete
fn id_left(input: &str) -> IResult<&str, &str> {
dot_atom_text(input)
dot_atom_text(input)
}
// Missing obsolete
@ -70,7 +70,13 @@ mod tests {
fn test_msg_id() {
assert_eq!(
msg_id("<5678.21-Nov-1997@example.com>"),
Ok(("", MessageId{left: "5678.21-Nov-1997", right: "example.com"})),
Ok((
"",
MessageId {
left: "5678.21-Nov-1997",
right: "example.com"
}
)),
);
}
}

View file

@ -1,10 +1,10 @@
use std::convert::From;
use nom::{
IResult,
bytes::complete::{take_while1, tag},
bytes::complete::{tag, take_while1},
character::complete::space0,
sequence::{terminated, tuple},
IResult,
};
#[derive(Debug, PartialEq)]
@ -98,34 +98,38 @@ impl<'a> From<&'a str> for Field<'a> {
fn field_name(input: &str) -> IResult<&str, &str> {
terminated(
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
tuple((space0, tag(":"), space0))
tuple((space0, tag(":"), space0)),
)(input)
}
fn correct_field(input: &str) -> IResult<&str, Field> {
field_name(input)
.map(|(rest, name)| ("", match name.to_lowercase().as_ref() {
"date" => Date(DateTime(rest)),
field_name(input).map(|(rest, name)| {
(
"",
match name.to_lowercase().as_ref() {
"date" => Date(DateTime(rest)),
"from" => From(MailboxList(rest)),
"sender" => Sender(Mailbox(rest)),
"reply-to" => ReplyTo(AddressList(rest)),
"from" => From(MailboxList(rest)),
"sender" => Sender(Mailbox(rest)),
"reply-to" => ReplyTo(AddressList(rest)),
"to" => To(AddressList(rest)),
"cc" => Cc(AddressList(rest)),
"bcc" => Bcc(NullableAddressList(rest)),
"to" => To(AddressList(rest)),
"cc" => Cc(AddressList(rest)),
"bcc" => Bcc(NullableAddressList(rest)),
"message-id" => MessageID(Identifier(rest)),
"in-reply-to" => InReplyTo(IdentifierList(rest)),
"references" => References(IdentifierList(rest)),
"message-id" => MessageID(Identifier(rest)),
"in-reply-to" => InReplyTo(IdentifierList(rest)),
"references" => References(IdentifierList(rest)),
"subject" => Subject(Unstructured(rest)),
"comments" => Comments(Unstructured(rest)),
"keywords" => Keywords(PhraseList(rest)),
"subject" => Subject(Unstructured(rest)),
"comments" => Comments(Unstructured(rest)),
"keywords" => Keywords(PhraseList(rest)),
"return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)),
"return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)),
_ => Optional(name, Unstructured(rest)),
}))
_ => Optional(name, Unstructured(rest)),
},
)
})
}

View file

@ -1,19 +1,19 @@
use std::borrow::Cow;
use nom::{
IResult,
branch::alt,
bytes::complete::{tag, is_a},
bytes::complete::{is_a, tag},
character::complete::satisfy,
combinator::{into,map,opt,recognize},
multi::{separated_list1, fold_many0, many0},
sequence::{delimited,pair,preceded,terminated,tuple},
combinator::{into, map, opt, recognize},
multi::{fold_many0, many0, separated_list1},
sequence::{delimited, pair, preceded, terminated, tuple},
IResult,
};
use std::borrow::Cow;
use crate::fragments::model::{MailboxRef, AddrSpec};
use crate::fragments::misc_token::{phrase, word};
use crate::fragments::model::{AddrSpec, MailboxRef};
use crate::fragments::quoted::quoted_string;
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::fragments::words::{atom, dot_atom};
use crate::fragments::quoted::quoted_string;
/// Mailbox
///
@ -43,10 +43,10 @@ fn name_addr(input: &str) -> IResult<&str, MailboxRef> {
/// obs-angle-addr
/// ```
pub fn angle_addr(input: &str) -> IResult<&str, MailboxRef> {
delimited(
tuple((opt(cfws), tag("<"), opt(obs_route))),
into(addr_spec),
pair(tag(">"), opt(cfws)),
delimited(
tuple((opt(cfws), tag("<"), opt(obs_route))),
into(addr_spec),
pair(tag(">"), opt(cfws)),
)(input)
}
@ -61,7 +61,10 @@ fn obs_route(input: &str) -> IResult<&str, Vec<String>> {
/// ```
fn obs_domain_list(input: &str) -> IResult<&str, Vec<String>> {
//@FIXME complexity is O(n) in term of domains here.
let (input, head) = preceded(pair(many0(alt((recognize(cfws), tag(",")))), tag("@")), obs_domain)(input)?;
let (input, head) = preceded(
pair(many0(alt((recognize(cfws), tag(",")))), tag("@")),
obs_domain,
)(input)?;
let (input, mut rest) = obs_domain_list_rest(input)?;
rest.insert(0, head);
Ok((input, rest))
@ -73,7 +76,7 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec<String>> {
pair(tag(","), opt(cfws)),
opt(preceded(tag("@"), obs_domain)),
)),
|v: Vec<Option<String>>| v.into_iter().flatten().collect()
|v: Vec<Option<String>>| v.into_iter().flatten().collect(),
)(input)
}
@ -86,9 +89,13 @@ fn obs_domain_list_rest(input: &str) -> IResult<&str, Vec<String>> {
/// so I force obsolete for now...
pub fn addr_spec(input: &str) -> IResult<&str, AddrSpec> {
map(
tuple((obs_local_part, tag("@"), obs_domain, many0(pair(tag("@"), obs_domain)))),
|(local_part, _, domain, _)|
AddrSpec { local_part, domain },
tuple((
obs_local_part,
tag("@"),
obs_domain,
many0(pair(tag("@"), obs_domain)),
)),
|(local_part, _, domain, _)| AddrSpec { local_part, domain },
)(input)
}
@ -108,7 +115,7 @@ fn strict_local_part(input: &str) -> IResult<&str, String> {
/// This is found in Enron emails and supported by Gmail.
///
/// Obsolete local part is a superset of strict_local_part:
/// anything that is parsed by strict_local_part will be parsed by
/// anything that is parsed by strict_local_part will be parsed by
/// obs_local_part.
///
/// ```abnf
@ -118,7 +125,8 @@ fn obs_local_part(input: &str) -> IResult<&str, String> {
fold_many0(
alt((map(is_a("."), Cow::Borrowed), word)),
String::new,
|acc, chunk| acc + &chunk)(input)
|acc, chunk| acc + &chunk,
)(input)
}
/// Domain
@ -140,7 +148,10 @@ pub fn strict_domain(input: &str) -> IResult<&str, String> {
/// obs-domain = atom *("." atom) / domain-literal
/// ```
pub fn obs_domain(input: &str) -> IResult<&str, String> {
alt((map(separated_list1(tag("."), atom), |v| v.join(".")), domain_litteral))(input)
alt((
map(separated_list1(tag("."), atom), |v| v.join(".")),
domain_litteral,
))(input)
}
/// Domain litteral
@ -152,15 +163,16 @@ fn domain_litteral(input: &str) -> IResult<&str, String> {
delimited(
pair(opt(cfws), tag("[")),
inner_domain_litteral,
pair(tag("]"), opt(cfws))
pair(tag("]"), opt(cfws)),
)(input)
}
fn inner_domain_litteral(input: &str) -> IResult<&str, String> {
let (input, (cvec, maybe_wsp)) = pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?;
let mut domain = cvec.iter().fold(
String::with_capacity(16),
|mut acc, (maybe_wsp, c)| {
let (input, (cvec, maybe_wsp)) =
pair(many0(pair(opt(fws), satisfy(is_dtext))), opt(fws))(input)?;
let mut domain = cvec
.iter()
.fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp {
acc.push(*wsp);
}
@ -174,7 +186,6 @@ fn inner_domain_litteral(input: &str) -> IResult<&str, String> {
Ok((input, domain))
}
fn is_strict_dtext(c: char) -> bool {
(c >= '\x21' && c <= '\x5A') || (c >= '\x5E' && c <= '\x7E') || !c.is_ascii()
}
@ -188,7 +199,7 @@ fn is_strict_dtext(c: char) -> bool {
/// obs-dtext = obs-NO-WS-CTL / quoted-pair
/// ```
pub fn is_dtext(c: char) -> bool {
is_strict_dtext(c) || is_obs_no_ws_ctl(c)
is_strict_dtext(c) || is_obs_no_ws_ctl(c)
//@FIXME does not support quoted pair yet while RFC requires it
}
@ -198,89 +209,213 @@ mod tests {
#[test]
fn test_addr_spec() {
assert_eq!(addr_spec("alice@example.com"), Ok(("", AddrSpec{local_part: "alice".into(), domain: "example.com".into() })));
assert_eq!(
addr_spec("alice@example.com"),
Ok((
"",
AddrSpec {
local_part: "alice".into(),
domain: "example.com".into()
}
))
);
assert_eq!(addr_spec("jsmith@[192.168.2.1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "192.168.2.1".into() })));
assert_eq!(addr_spec("jsmith@[IPv6:2001:db8::1]"), Ok(("", AddrSpec{local_part: "jsmith".into(), domain: "IPv6:2001:db8::1".into() })));
assert_eq!(
addr_spec("jsmith@[192.168.2.1]"),
Ok((
"",
AddrSpec {
local_part: "jsmith".into(),
domain: "192.168.2.1".into()
}
))
);
assert_eq!(
addr_spec("jsmith@[IPv6:2001:db8::1]"),
Ok((
"",
AddrSpec {
local_part: "jsmith".into(),
domain: "IPv6:2001:db8::1".into()
}
))
);
// UTF-8
assert_eq!(addr_spec("用户@例子.广告"), Ok(("", AddrSpec{local_part: "用户".into(), domain: "例子.广告".into()})));
assert_eq!(
addr_spec("用户@例子.广告"),
Ok((
"",
AddrSpec {
local_part: "用户".into(),
domain: "例子.广告".into()
}
))
);
// ASCII Edge cases
assert_eq!(
addr_spec("user+mailbox/department=shipping@example.com"),
Ok(("", AddrSpec{local_part: "user+mailbox/department=shipping".into(), domain: "example.com".into()})));
Ok((
"",
AddrSpec {
local_part: "user+mailbox/department=shipping".into(),
domain: "example.com".into()
}
))
);
assert_eq!(
addr_spec("!#$%&'*+-/=?^_`.{|}~@example.com"),
Ok(("", AddrSpec{local_part: "!#$%&'*+-/=?^_`.{|}~".into(), domain: "example.com".into()})));
Ok((
"",
AddrSpec {
local_part: "!#$%&'*+-/=?^_`.{|}~".into(),
domain: "example.com".into()
}
))
);
assert_eq!(
addr_spec(r#""Abc@def"@example.com"#),
Ok(("", AddrSpec{local_part: "Abc@def".into(), domain: "example.com".into()})));
assert_eq!(addr_spec(r#""Fred\ Bloggs"@example.com"#), Ok(("", AddrSpec{local_part: "Fred Bloggs".into(), domain: "example.com".into()})));
assert_eq!(addr_spec(r#""Joe.\\Blow"@example.com"#), Ok(("", AddrSpec{local_part: r#"Joe.\Blow"#.into(), domain: "example.com".into()})));
Ok((
"",
AddrSpec {
local_part: "Abc@def".into(),
domain: "example.com".into()
}
))
);
assert_eq!(
addr_spec(r#""Fred\ Bloggs"@example.com"#),
Ok((
"",
AddrSpec {
local_part: "Fred Bloggs".into(),
domain: "example.com".into()
}
))
);
assert_eq!(
addr_spec(r#""Joe.\\Blow"@example.com"#),
Ok((
"",
AddrSpec {
local_part: r#"Joe.\Blow"#.into(),
domain: "example.com".into()
}
))
);
}
#[test]
fn test_mailbox() {
assert_eq!(mailbox(r#""Joe Q. Public" <john.q.public@example.com>"#), Ok(("", MailboxRef {
name: Some("Joe Q. Public".into()),
addrspec: AddrSpec {
local_part: "john.q.public".into(),
domain: "example.com".into(),
}
})));
assert_eq!(
mailbox(r#""Joe Q. Public" <john.q.public@example.com>"#),
Ok((
"",
MailboxRef {
name: Some("Joe Q. Public".into()),
addrspec: AddrSpec {
local_part: "john.q.public".into(),
domain: "example.com".into(),
}
}
))
);
assert_eq!(mailbox(r#"Mary Smith <mary@x.test>"#), Ok(("", MailboxRef {
name: Some("Mary Smith".into()),
addrspec: AddrSpec {
local_part: "mary".into(),
domain: "x.test".into(),
}
})));
assert_eq!(
mailbox(r#"Mary Smith <mary@x.test>"#),
Ok((
"",
MailboxRef {
name: Some("Mary Smith".into()),
addrspec: AddrSpec {
local_part: "mary".into(),
domain: "x.test".into(),
}
}
))
);
assert_eq!(mailbox(r#"jdoe@example.org"#), Ok(("", MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "jdoe".into(),
domain: "example.org".into(),
}
})));
assert_eq!(
mailbox(r#"jdoe@example.org"#),
Ok((
"",
MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "jdoe".into(),
domain: "example.org".into(),
}
}
))
);
assert_eq!(mailbox(r#"Who? <one@y.test>"#), Ok(("", MailboxRef {
name: Some("Who?".into()),
addrspec: AddrSpec {
local_part: "one".into(),
domain: "y.test".into(),
}
})));
assert_eq!(
mailbox(r#"Who? <one@y.test>"#),
Ok((
"",
MailboxRef {
name: Some("Who?".into()),
addrspec: AddrSpec {
local_part: "one".into(),
domain: "y.test".into(),
}
}
))
);
assert_eq!(mailbox(r#"<boss@nil.test>"#), Ok(("", MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "boss".into(),
domain: "nil.test".into(),
}
})));
assert_eq!(
mailbox(r#"<boss@nil.test>"#),
Ok((
"",
MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "boss".into(),
domain: "nil.test".into(),
}
}
))
);
assert_eq!(mailbox(r#""Giant; \"Big\" Box" <sysservices@example.net>"#), Ok(("", MailboxRef {
name: Some(r#"Giant; "Big" Box"#.into()),
addrspec: AddrSpec {
local_part: "sysservices".into(),
domain: "example.net".into(),
}
})));
assert_eq!(
mailbox(r#""Giant; \"Big\" Box" <sysservices@example.net>"#),
Ok((
"",
MailboxRef {
name: Some(r#"Giant; "Big" Box"#.into()),
addrspec: AddrSpec {
local_part: "sysservices".into(),
domain: "example.net".into(),
}
}
))
);
}
#[test]
fn test_obs_domain_list() {
assert_eq!(obs_domain_list(r#"(shhh it's coming)
assert_eq!(
obs_domain_list(
r#"(shhh it's coming)
,
(not yet)
@33+4.com,,,,
,,,,
(again)
@example.com,@yep.com,@a,@b,,,@c"#),
Ok(("", vec!["33+4.com".into(), "example.com".into(), "yep.com".into(), "a".into(), "b".into(), "c".into()]))
@example.com,@yep.com,@a,@b,,,@c"#
),
Ok((
"",
vec![
"33+4.com".into(),
"example.com".into(),
"yep.com".into(),
"a".into(),
"b".into(),
"c".into()
]
))
);
}
@ -288,10 +423,13 @@ mod tests {
fn test_enron1() {
assert_eq!(
addr_spec("a..howard@enron.com"),
Ok(("", AddrSpec {
local_part: "a..howard".into(),
domain: "enron.com".into(),
}))
Ok((
"",
AddrSpec {
local_part: "a..howard".into(),
domain: "enron.com".into(),
}
))
);
}
@ -299,10 +437,13 @@ mod tests {
fn test_enron2() {
assert_eq!(
addr_spec(".nelson@enron.com"),
Ok(("", AddrSpec {
local_part: ".nelson".into(),
domain: "enron.com".into(),
}))
Ok((
"",
AddrSpec {
local_part: ".nelson".into(),
domain: "enron.com".into(),
}
))
);
}
@ -310,25 +451,30 @@ mod tests {
fn test_enron3() {
assert_eq!(
addr_spec("ecn2760.conf.@enron.com"),
Ok(("", AddrSpec {
local_part: "ecn2760.conf.".into(),
domain: "enron.com".into(),
}))
Ok((
"",
AddrSpec {
local_part: "ecn2760.conf.".into(),
domain: "enron.com".into(),
}
))
);
}
#[test]
fn test_enron4() {
assert_eq!(
mailbox(r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#),
Ok(("", MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "mark_kopinski/intl/acim/americancentury".into(),
domain: "americancentury.com".into(),
Ok((
"",
MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: "mark_kopinski/intl/acim/americancentury".into(),
domain: "americancentury.com".into(),
}
}
}))
))
);
}
}

View file

@ -1,19 +1,19 @@
use std::borrow::Cow;
use nom::{
IResult,
branch::alt,
bytes::complete::{take_while1, tag},
bytes::complete::{tag, take_while1},
character::complete::space0,
combinator::{into, opt},
multi::{many0, many1, separated_list1},
sequence::tuple,
IResult,
};
use std::borrow::Cow;
use crate::error::IMFError;
use crate::fragments::lazy;
use crate::fragments::quoted::quoted_string;
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
use crate::fragments::words::{atom, is_vchar};
use crate::error::IMFError;
#[derive(Debug, PartialEq, Default)]
pub struct Unstructured(pub String);
@ -28,7 +28,7 @@ impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
unstructured(input.0)
.map(|(_, v)| Unstructured(v))
.map_err(|e| IMFError::Unstructured(e))
}
}
}
impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
@ -101,7 +101,13 @@ mod tests {
#[test]
fn test_phrase() {
assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into())));
assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into())));
assert_eq!(
phrase("salut \"le\" monde"),
Ok(("", "salut le monde".into()))
);
assert_eq!(
phrase("fin\r\n du\r\nmonde"),
Ok(("\r\nmonde", "fin du".into()))
);
}
}

View file

@ -2,17 +2,17 @@
pub mod model;
// Generic
pub mod misc_token;
mod quoted;
pub mod whitespace;
mod words;
mod quoted;
pub mod misc_token;
// Header specific
mod mailbox;
mod address;
mod identification;
pub mod trace;
mod datetime;
pub mod lazy;
pub mod eager;
mod identification;
pub mod lazy;
mod mailbox;
pub mod section;
pub mod trace;

View file

@ -1,5 +1,5 @@
use chrono::{DateTime, FixedOffset};
use std::collections::HashMap;
use chrono::{DateTime,FixedOffset};
#[derive(Debug, PartialEq)]
pub struct AddrSpec {
@ -126,7 +126,7 @@ pub struct HeaderSection<'a> {
pub msg_id: Option<MessageId<'a>>,
pub in_reply_to: Vec<MessageId<'a>>,
pub references: Vec<MessageId<'a>>,
// 3.6.5. Informational Fields
pub subject: Option<String>,
pub comments: Vec<String>,

View file

@ -1,14 +1,14 @@
use nom::{
IResult,
branch::alt,
bytes::complete::tag,
character::complete::{anychar, satisfy},
combinator::opt,
multi::many0,
sequence::{pair, preceded},
IResult,
};
use crate::fragments::whitespace::{fws, cfws, is_obs_no_ws_ctl};
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
/// Quoted pair
///
@ -53,29 +53,29 @@ fn qcontent(input: &str) -> IResult<&str, char> {
/// [CFWS]
/// ```
pub fn quoted_string(input: &str) -> IResult<&str, String> {
let (input, _) = opt(cfws)(input)?;
let (input, _) = tag("\"")(input)?;
let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
let (input, _) = opt(cfws)(input)?;
let (input, _) = tag("\"")(input)?;
let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
// Rebuild string
let mut qstring = content.iter().fold(
String::with_capacity(16),
|mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp {
acc.push(*wsp);
}
acc.push(*c);
acc
});
// Rebuild string
let mut qstring = content
.iter()
.fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
if let Some(wsp) = maybe_wsp {
acc.push(*wsp);
}
acc.push(*c);
acc
});
let (input, maybe_wsp) = opt(fws)(input)?;
if let Some(wsp) = maybe_wsp {
qstring.push(wsp);
}
let (input, maybe_wsp) = opt(fws)(input)?;
if let Some(wsp) = maybe_wsp {
qstring.push(wsp);
}
let (input, _) = tag("\"")(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((input, qstring))
let (input, _) = tag("\"")(input)?;
let (input, _) = opt(cfws)(input)?;
Ok((input, qstring))
}
#[cfg(test)]
@ -84,7 +84,13 @@ mod tests {
#[test]
fn test_quoted_string() {
assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string())));
assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string())));
assert_eq!(
quoted_string(" \"hello\\\"world\" "),
Ok(("", "hello\"world".to_string()))
);
assert_eq!(
quoted_string("\"hello\r\n world\""),
Ok(("", "hello world".to_string()))
);
}
}

View file

@ -1,11 +1,11 @@
use std::collections::HashMap;
use chrono::{DateTime, FixedOffset};
use crate::fragments::model::{MailboxRef,MessageId, AddressRef};
use crate::fragments::misc_token::{Unstructured, PhraseList};
use crate::fragments::trace::ReceivedLog;
use crate::fragments::eager::Field;
use crate::fragments::lazy;
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressRef, MailboxRef, MessageId};
use crate::fragments::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq, Default)]
pub struct Section<'a> {
@ -26,7 +26,7 @@ pub struct Section<'a> {
pub msg_id: Option<&'a MessageId<'a>>,
pub in_reply_to: Vec<&'a MessageId<'a>>,
pub references: Vec<&'a MessageId<'a>>,
// 3.6.5. Informational Fields
pub subject: Option<&'a Unstructured>,
pub comments: Vec<&'a Unstructured>,
@ -48,7 +48,7 @@ pub struct Section<'a> {
//@FIXME min and max limits are not enforced,
// it may result in missing data or silently overriden data.
impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
fn from_iter<I: IntoIterator<Item=&'a Field<'a>>>(iter: I) -> Self {
fn from_iter<I: IntoIterator<Item = &'a Field<'a>>>(iter: I) -> Self {
let mut section = Section::default();
for field in iter {
match field {
@ -67,11 +67,12 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
Field::Keywords(v) => section.keywords.push(v),
Field::ReturnPath(v) => section.return_path.push(v),
Field::Received(v) => section.received.push(v),
Field::Optional(k, v) => { section.optional.insert(k, v); },
Field::Optional(k, v) => {
section.optional.insert(k, v);
}
Field::Rescue(v) => section.unparsed.push(v),
}
}
section
}
}

View file

@ -1,13 +1,13 @@
use crate::error::IMFError;
use crate::fragments::{datetime, lazy, mailbox, misc_token, model, whitespace};
use nom::{
IResult,
branch::alt,
bytes::complete::tag,
combinator::{map, opt, recognize},
multi::many0,
sequence::tuple,
IResult,
};
use crate::fragments::{datetime, mailbox, model, misc_token, whitespace, lazy};
use crate::error::IMFError;
#[derive(Debug, PartialEq)]
pub struct ReceivedLog<'a>(pub &'a str);
@ -29,15 +29,12 @@ pub fn received_body(input: &str) -> IResult<&str, &str> {
tag(";"),
datetime::section,
)),
|(tokens, _, _)| tokens,
|(tokens, _, _)| tokens,
)(input)
}
pub fn return_path_body(input: &str) -> IResult<&str, Option<model::MailboxRef>> {
alt((
map(mailbox::angle_addr, |a| Some(a)),
empty_path
))(input)
alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
}
fn empty_path(input: &str) -> IResult<&str, Option<model::MailboxRef>> {
@ -57,11 +54,10 @@ fn received_tokens(input: &str) -> IResult<&str, &str> {
recognize(mailbox::angle_addr),
recognize(mailbox::addr_spec),
recognize(mailbox::obs_domain),
recognize(misc_token::word),
recognize(misc_token::word),
))(input)
}
#[cfg(test)]
mod tests {
use super::*;
@ -76,11 +72,14 @@ mod tests {
assert_eq!(
received_body(hdrs),
Ok(("", r#"from smtp.example.com ([10.83.2.2])
Ok((
"",
r#"from smtp.example.com ([10.83.2.2])
by server with LMTP
id xxxxxxxxx
(envelope-from <gitlab@example.com>)
for <me@example.com>"#))
for <me@example.com>"#
))
);
}
}

View file

@ -1,13 +1,13 @@
use crate::fragments::quoted::quoted_pair;
use nom::{
IResult,
branch::alt,
bytes::complete::tag,
character::complete::{crlf, satisfy, space0, space1},
combinator::{recognize, opt},
combinator::{opt, recognize},
multi::{many0, many1},
sequence::tuple,
IResult,
};
use crate::fragments::quoted::quoted_pair;
// --- whitespaces and comments
@ -35,12 +35,11 @@ pub fn fws(input: &str) -> IResult<&str, char> {
Ok((input, ' '))
}
fn fold_marker(input: &str) -> IResult<&str, &str> {
let (input, _) = space0(input)?;
let (input, _) = perm_crlf(input)?;
space1(input)
let (input, _) = space0(input)?;
let (input, _) = perm_crlf(input)?;
space1(input)
}
/// Folding White Space with Comment
///
/// Note: we drop the comments for now...
@ -76,7 +75,7 @@ pub fn comment(input: &str) -> IResult<&str, ()> {
}
pub fn ccontent(input: &str) -> IResult<&str, &str> {
alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input)
alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input)
}
pub fn ctext(input: &str) -> IResult<&str, char> {
@ -92,14 +91,17 @@ pub fn ctext(input: &str) -> IResult<&str, char> {
/// obs-ctext
///```
pub fn is_restr_ctext(c: char) -> bool {
(c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii()
(c >= '\x21' && c <= '\x27')
|| (c >= '\x2A' && c <= '\x5B')
|| (c >= '\x5D' && c <= '\x7E')
|| !c.is_ascii()
}
pub fn is_ctext(c: char) -> bool {
is_restr_ctext(c) || is_obs_no_ws_ctl(c)
}
/// US ASCII control characters without effect
/// US ASCII control characters without effect
///
/// ```abnf
/// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
@ -109,7 +111,11 @@ pub fn is_ctext(c: char) -> bool {
/// %d127 ; white space characters
/// ```
pub fn is_obs_no_ws_ctl(c: char) -> bool {
(c >= '\x01' && c <= '\x08') || c == '\x0b' || c == '\x0b' || (c >= '\x0e' && c<= '\x1f') || c == '\x7F'
(c >= '\x01' && c <= '\x08')
|| c == '\x0b'
|| c == '\x0b'
|| (c >= '\x0e' && c <= '\x1f')
|| c == '\x7F'
}
#[cfg(test)]
@ -133,8 +139,20 @@ mod tests {
#[test]
fn test_cfws() {
assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) ")));
assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)")));
assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) ")));
assert_eq!(
cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"),
Ok((
"<pete(his account)@silly.test(his host)>",
"(A nice \\) chap) "
))
);
assert_eq!(
cfws("(Chris's host.)public.example>,"),
Ok(("public.example>,", "(Chris's host.)"))
);
assert_eq!(
cfws("(double (comment) is fun) wouch"),
Ok(("wouch", "(double (comment) is fun) "))
);
}
}

View file

@ -1,16 +1,15 @@
use crate::fragments::whitespace::cfws;
use nom::{
IResult,
bytes::complete::{tag, take_while1},
combinator::{recognize, opt},
combinator::{opt, recognize},
multi::many0,
sequence::{delimited, pair},
IResult,
};
use crate::fragments::whitespace::cfws;
/// VCHAR definition
pub fn is_vchar(c: char) -> bool {
(c >= '\x21' && c <= '\x7E') || !c.is_ascii()
(c >= '\x21' && c <= '\x7E') || !c.is_ascii()
}
/// Sequence of visible chars with the UTF-8 extension
@ -23,7 +22,7 @@ pub fn is_vchar(c: char) -> bool {
///```
#[allow(dead_code)]
pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
take_while1(is_vchar)(input)
take_while1(is_vchar)(input)
}
/// Atom allowed characters
@ -31,7 +30,7 @@ fn is_atext(c: char) -> bool {
c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii()
}
/// Atom
/// Atom
///
/// `[CFWS] 1*atext [CFWS]`
pub fn atom(input: &str) -> IResult<&str, &str> {
@ -42,7 +41,10 @@ pub fn atom(input: &str) -> IResult<&str, &str> {
///
/// `1*atext *("." 1*atext)`
pub fn dot_atom_text(input: &str) -> IResult<&str, &str> {
recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input)
recognize(pair(
take_while1(is_atext),
many0(pair(tag("."), take_while1(is_atext))),
))(input)
}
/// dot-atom
@ -54,13 +56,19 @@ pub fn dot_atom(input: &str) -> IResult<&str, &str> {
#[allow(dead_code)]
pub fn is_special(c: char) -> bool {
c == '(' || c == ')' ||
c == '<' || c == '>' ||
c == '[' || c == ']' ||
c == ':' || c == ';' ||
c == '@' || c == '\\' ||
c == ',' || c == '.' ||
c == '"'
c == '('
|| c == ')'
|| c == '<'
|| c == '>'
|| c == '['
|| c == ']'
|| c == ':'
|| c == ';'
|| c == '@'
|| c == '\\'
|| c == ','
|| c == '.'
|| c == '"'
}
#[cfg(test)]
@ -84,16 +92,25 @@ mod tests {
#[test]
fn test_atom() {
assert_eq!(atom("(skip) imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec")));
assert_eq!(
atom("(skip) imf_codec (hidden) aerogramme"),
Ok(("aerogramme", "imf_codec"))
);
}
#[test]
fn test_dot_atom_text() {
assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io")));
assert_eq!(
dot_atom_text("quentin.dufour.io abcdef"),
Ok((" abcdef", "quentin.dufour.io"))
);
}
#[test]
fn test_dot_atom() {
assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
assert_eq!(
dot_atom(" (skip) quentin.dufour.io abcdef"),
Ok(("abcdef", "quentin.dufour.io"))
);
}
}

View file

@ -1,16 +1,16 @@
use nom::{
IResult,
character::complete::space1,
bytes::complete::is_not,
character::complete::space1,
combinator::{all_consuming, recognize},
multi::{many0, many1},
sequence::{pair, tuple},
IResult,
};
use crate::error::IMFError;
use crate::fragments::whitespace;
use crate::multipass::guess_charset;
use crate::multipass::field_lazy;
use crate::multipass::guess_charset;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
@ -21,7 +21,10 @@ pub struct Parsed<'a> {
pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> {
all_consuming(many0(foldable_line))(&gcha.header)
.map_err(|e| IMFError::ExtractFields(e))
.map(|(_, fields)| Parsed { fields, body: gcha.body })
.map(|(_, fields)| Parsed {
fields,
body: gcha.body,
})
}
impl<'a> Parsed<'a> {
@ -35,11 +38,12 @@ impl<'a> Parsed<'a> {
/// ```
fn foldable_line(input: &str) -> IResult<&str, &str> {
recognize(tuple((
is_not("\r\n"),
is_not("\r\n"),
many0(pair(
many1(pair(whitespace::perm_crlf, space1)),
is_not("\r\n"))),
whitespace::perm_crlf
many1(pair(whitespace::perm_crlf, space1)),
is_not("\r\n"),
)),
whitespace::perm_crlf,
)))(input)
}

View file

@ -10,7 +10,8 @@ pub struct Parsed<'a> {
pub fn new<'a>(p: &'a field_lazy::Parsed<'a>) -> Parsed<'a> {
Parsed {
fields: p.fields
fields: p
.fields
.iter()
.filter_map(|entry| entry.try_into().ok())
.collect(),
@ -33,47 +34,56 @@ mod tests {
#[test]
fn test_field_body() {
assert_eq!(new(&field_lazy::Parsed {
fields: vec![
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")),
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
],
body: b"Hello world!",
}),
Parsed {
fields: vec![
eager::Field::From(vec![
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
]),
eager::Field::Date(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
],
body: b"Hello world!",
});
assert_eq!(
new(&field_lazy::Parsed {
fields: vec![
lazy::Field::From(lazy::MailboxList(
"hello@world.com,\r\n\talice@wonderlands.com\r\n"
)),
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
],
body: b"Hello world!",
}),
Parsed {
fields: vec![
eager::Field::From(vec![
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
]),
eager::Field::Date(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
],
body: b"Hello world!",
}
);
}
use crate::multipass::extract_fields;
use crate::fragments::misc_token;
use crate::multipass::extract_fields;
fn lazy_eager<F>(input: &str, func: F)
where F: Fn(&eager::Field) {
let field = extract_fields::Parsed { fields: vec![input], body: b""};
where
F: Fn(&eager::Field),
{
let field = extract_fields::Parsed {
fields: vec![input],
body: b"",
};
let lazy = field_lazy::new(&field);
let eager = new(&lazy);
func(eager.fields.first().unwrap())
@ -83,16 +93,18 @@ mod tests {
fn test_from() {
lazy_eager(
"From: \"Joe Q. Public\" <john.q.public@example.com>\r\n",
|from| assert_eq!(
from,
&eager::Field::From(vec![model::MailboxRef {
name: Some("Joe Q. Public".into()),
addrspec: model::AddrSpec {
local_part: "john.q.public".into(),
domain: "example.com".into(),
}
}])
)
|from| {
assert_eq!(
from,
&eager::Field::From(vec![model::MailboxRef {
name: Some("Joe Q. Public".into()),
addrspec: model::AddrSpec {
local_part: "john.q.public".into(),
domain: "example.com".into(),
}
}])
)
},
);
}
@ -100,16 +112,18 @@ mod tests {
fn test_sender() {
lazy_eager(
"Sender: Michael Jones <mjones@machine.example>\r\n",
|sender| assert_eq!(
sender,
&eager::Field::Sender(model::MailboxRef {
name: Some("Michael Jones".into()),
addrspec: model::AddrSpec {
local_part: "mjones".into(),
domain: "machine.example".into(),
},
})
)
|sender| {
assert_eq!(
sender,
&eager::Field::Sender(model::MailboxRef {
name: Some("Michael Jones".into()),
addrspec: model::AddrSpec {
local_part: "mjones".into(),
domain: "machine.example".into(),
},
})
)
},
);
}
@ -117,18 +131,18 @@ mod tests {
fn test_reply_to() {
lazy_eager(
"Reply-To: \"Mary Smith: Personal Account\" <smith@home.example>\r\n",
|reply_to| assert_eq!(
reply_to,
&eager::Field::ReplyTo(
vec![model::AddressRef::Single(model::MailboxRef {
|reply_to| {
assert_eq!(
reply_to,
&eager::Field::ReplyTo(vec![model::AddressRef::Single(model::MailboxRef {
name: Some("Mary Smith: Personal Account".into()),
addrspec: model::AddrSpec {
local_part: "smith".into(),
domain: "home.example".into(),
},
})]
})])
)
)
},
)
}
@ -136,177 +150,187 @@ mod tests {
fn test_to() {
lazy_eager(
"To: A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;\r\n",
|to| assert_eq!(
to,
&eager::Field::To(vec![model::AddressRef::Many(model::GroupRef {
name: "A Group".into(),
participants: vec![
model::MailboxRef {
name: Some("Ed Jones".into()),
addrspec: model::AddrSpec { local_part: "c".into(), domain: "a.test".into() },
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec { local_part: "joe".into(), domain: "where.test".into() },
},
model::MailboxRef {
name: Some("John".into()),
addrspec: model::AddrSpec { local_part: "jdoe".into(), domain: "one.test".into() },
},
]
})])
)
|to| {
assert_eq!(
to,
&eager::Field::To(vec![model::AddressRef::Many(model::GroupRef {
name: "A Group".into(),
participants: vec![
model::MailboxRef {
name: Some("Ed Jones".into()),
addrspec: model::AddrSpec {
local_part: "c".into(),
domain: "a.test".into()
},
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "joe".into(),
domain: "where.test".into()
},
},
model::MailboxRef {
name: Some("John".into()),
addrspec: model::AddrSpec {
local_part: "jdoe".into(),
domain: "one.test".into()
},
},
]
})])
)
},
)
}
#[test]
fn test_cc() {
lazy_eager(
"Cc: Undisclosed recipients:;\r\n",
|cc| assert_eq!(
cc,
&eager::Field::Cc(vec![model::AddressRef::Many(model::GroupRef {
name: "Undisclosed recipients".into(),
participants: vec![],
})]),
lazy_eager("Cc: Undisclosed recipients:;\r\n", |cc| {
assert_eq!(
cc,
&eager::Field::Cc(vec![model::AddressRef::Many(model::GroupRef {
name: "Undisclosed recipients".into(),
participants: vec![],
})]),
)
)
})
}
#[test]
fn test_bcc() {
lazy_eager(
"Bcc: (empty)\r\n",
|bcc| assert_eq!(
bcc,
&eager::Field::Bcc(vec![]),
)
);
lazy_eager("Bcc: (empty)\r\n", |bcc| {
assert_eq!(bcc, &eager::Field::Bcc(vec![]),)
});
lazy_eager(
"Bcc: \r\n",
|bcc| assert_eq!(
bcc,
&eager::Field::Bcc(vec![]),
)
);
lazy_eager("Bcc: \r\n", |bcc| {
assert_eq!(bcc, &eager::Field::Bcc(vec![]),)
});
}
#[test]
fn test_message_id() {
lazy_eager(
"Message-ID: <310@[127.0.0.1]>\r\n",
|msg_id| assert_eq!(
msg_id,
&eager::Field::MessageID(
model::MessageId { left: "310", right: "127.0.0.1" },
)
lazy_eager("Message-ID: <310@[127.0.0.1]>\r\n", |msg_id| {
assert_eq!(
msg_id,
&eager::Field::MessageID(model::MessageId {
left: "310",
right: "127.0.0.1"
},)
)
)
})
}
#[test]
fn test_in_reply_to() {
lazy_eager(
"In-Reply-To: <a@b> <c@example.com>\r\n",
|irt| assert_eq!(
lazy_eager("In-Reply-To: <a@b> <c@example.com>\r\n", |irt| {
assert_eq!(
irt,
&eager::Field::InReplyTo(
vec![
model::MessageId { left: "a", right: "b" },
model::MessageId { left: "c", right: "example.com" },
]
)
&eager::Field::InReplyTo(vec![
model::MessageId {
left: "a",
right: "b"
},
model::MessageId {
left: "c",
right: "example.com"
},
])
)
)
})
}
#[test]
fn test_references() {
lazy_eager(
"References: <1234@local.machine.example> <3456@example.net>\r\n",
|refer| assert_eq!(
refer,
&eager::Field::References(
vec![
model::MessageId { left: "1234", right: "local.machine.example" },
model::MessageId { left: "3456", right: "example.net" },
]
lazy_eager(
"References: <1234@local.machine.example> <3456@example.net>\r\n",
|refer| {
assert_eq!(
refer,
&eager::Field::References(vec![
model::MessageId {
left: "1234",
right: "local.machine.example"
},
model::MessageId {
left: "3456",
right: "example.net"
},
])
)
)
},
)
}
#[test]
fn test_subject() {
lazy_eager(
"Subject: Aérogramme\r\n",
|subject| assert_eq!(
subject,
&eager::Field::Subject(misc_token::Unstructured("Aérogramme".into()))
lazy_eager("Subject: Aérogramme\r\n", |subject| {
assert_eq!(
subject,
&eager::Field::Subject(misc_token::Unstructured("Aérogramme".into()))
)
)
})
}
#[test]
fn test_comments() {
lazy_eager(
"Comments: 😛 easter egg!\r\n",
|comments| assert_eq!(
comments,
&eager::Field::Comments(misc_token::Unstructured("😛 easter egg!".into())),
lazy_eager("Comments: 😛 easter egg!\r\n", |comments| {
assert_eq!(
comments,
&eager::Field::Comments(misc_token::Unstructured("😛 easter egg!".into())),
)
)
})
}
#[test]
fn test_keywords() {
lazy_eager(
"Keywords: fantasque, farfelu, fanfreluche\r\n",
|keywords| assert_eq!(
keywords,
&eager::Field::Keywords(misc_token::PhraseList(vec![
"fantasque".into(),
"farfelu".into(),
"fanfreluche".into()
]))
)
lazy_eager(
"Keywords: fantasque, farfelu, fanfreluche\r\n",
|keywords| {
assert_eq!(
keywords,
&eager::Field::Keywords(misc_token::PhraseList(vec![
"fantasque".into(),
"farfelu".into(),
"fanfreluche".into()
]))
)
},
)
}
//@FIXME non ported tests:
/*
#[test]
fn test_invalid_field_name() {
assert!(known_field("Unknown: unknown\r\n").is_err());
}
#[test]
fn test_invalid_field_name() {
assert!(known_field("Unknown: unknown\r\n").is_err());
}
#[test]
fn test_rescue_field() {
assert_eq!(
rescue_field("Héron: élan\r\n\tnoël: test\r\nFrom: ..."),
Ok(("From: ...", Field::Rescue("Héron: élan\r\n\tnoël: test"))),
);
}
#[test]
fn test_rescue_field() {
assert_eq!(
rescue_field("Héron: élan\r\n\tnoël: test\r\nFrom: ..."),
Ok(("From: ...", Field::Rescue("Héron: élan\r\n\tnoël: test"))),
);
}
#[test]
fn test_wrong_fields() {
let fullmail = r#"Return-Path: xoxo
From: !!!!
#[test]
fn test_wrong_fields() {
let fullmail = r#"Return-Path: xoxo
From: !!!!
Hello world"#;
assert_eq!(
section(fullmail),
Ok(("Hello world", HeaderSection {
bad_fields: vec![
Field::ReturnPath(FieldBody::Failed("xoxo")),
Field::From(FieldBody::Failed("!!!!")),
],
..Default::default()
}))
);
}
*/
Hello world"#;
assert_eq!(
section(fullmail),
Ok(("Hello world", HeaderSection {
bad_fields: vec![
Field::ReturnPath(FieldBody::Failed("xoxo")),
Field::From(FieldBody::Failed("!!!!")),
],
..Default::default()
}))
);
}
*/
}

View file

@ -27,19 +27,23 @@ mod tests {
#[test]
fn test_field_name() {
assert_eq!(new(&extract_fields::Parsed {
fields: vec![
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
"Date: 12 Mar 1997 07:33:25 Z\r\n",
],
body: b"Hello world!",
}),
Parsed {
fields: vec![
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")),
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
],
body: b"Hello world!",
});
assert_eq!(
new(&extract_fields::Parsed {
fields: vec![
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
"Date: 12 Mar 1997 07:33:25 Z\r\n",
],
body: b"Hello world!",
}),
Parsed {
fields: vec![
lazy::Field::From(lazy::MailboxList(
"hello@world.com,\r\n\talice@wonderlands.com\r\n"
)),
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
],
body: b"Hello world!",
}
);
}
}

View file

@ -1,10 +1,10 @@
use std::borrow::Cow;
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use std::borrow::Cow;
use crate::error::IMFError;
use crate::multipass::segment;
use crate::multipass::extract_fields;
use crate::multipass::segment;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
@ -26,11 +26,11 @@ pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> {
// Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, encoding, malformed) = enc.decode(&seg.header);
Parsed {
header,
encoding,
malformed,
body: seg.body
Parsed {
header,
encoding,
malformed,
body: seg.body,
}
}
@ -48,15 +48,15 @@ mod tests {
fn test_charset() {
assert_eq!(
new(&segment::Parsed {
body: b"Hello world!",
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
}
),
body: b"Hello world!",
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
}),
Parsed {
header: "From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(),
encoding: encoding_rs::UTF_8,
malformed: false,
body: b"Hello world!",
});
}
);
}
}

View file

@ -23,60 +23,64 @@ mod tests {
#[test]
fn test_section() {
assert_eq!(new(&field_eager::Parsed {
fields: vec![
eager::Field::From(vec![
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
]),
eager::Field::Date(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
],
body: b"Hello world!",
}),
Parsed {
fields: Section {
from: vec![
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
assert_eq!(
new(&field_eager::Parsed {
fields: vec![
eager::Field::From(vec![
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
]),
eager::Field::Date(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
],
body: b"Hello world!",
}),
Parsed {
fields: Section {
from: vec![
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
],
date: Some(&FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()),
date: Some(
&FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
..Default::default()
},
body: b"Hello world!",
});
..Default::default()
},
body: b"Hello world!",
}
);
}
}

View file

@ -1,6 +1,6 @@
pub mod segment;
pub mod guess_charset;
pub mod extract_fields;
pub mod field_lazy;
pub mod field_eager;
pub mod field_lazy;
pub mod guess_charset;
pub mod header_section;
pub mod segment;

View file

@ -1,14 +1,14 @@
use nom::{
IResult,
branch::alt,
bytes::complete::{is_not, tag},
combinator::recognize,
sequence::{pair, terminated},
multi::many0,
sequence::{pair, terminated},
IResult,
};
use crate::multipass::guess_charset;
use crate::error::IMFError;
use crate::multipass::guess_charset;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
@ -21,10 +21,7 @@ const LF: u8 = 0x0A;
const CRLF: &[u8] = &[CR, LF];
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
terminated(
recognize(many0(line)),
obs_crlf
)(buffer)
terminated(recognize(many0(line)), obs_crlf)(buffer)
.map_err(|e| IMFError::Segment(e))
.map(|(body, header)| Parsed { header, body })
}
@ -36,10 +33,7 @@ impl<'a> Parsed<'a> {
}
fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
pair(
is_not(CRLF),
obs_crlf,
)(input)
pair(is_not(CRLF), obs_crlf)(input)
}
fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
@ -56,7 +50,7 @@ mod tests {
new(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]),
Ok(Parsed {
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
body: b"Hello world!",
body: b"Hello world!",
})
);
}

View file

@ -1,10 +1,12 @@
use imf_codec::multipass::segment;
use imf_codec::fragments::section::Section;
use imf_codec::multipass::segment;
use std::io;
use std::io::Read;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&Section) -> () {
where
F: FnOnce(&Section) -> (),
{
let seg = segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();

View file

@ -1,13 +1,15 @@
use imf_codec::fragments::section;
use imf_codec::multipass;
use std::collections::HashSet;
use std::path::PathBuf;
use std::fs::File;
use std::io::Read;
use imf_codec::multipass;
use imf_codec::fragments::section;
use std::path::PathBuf;
use walkdir::WalkDir;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&section::Section) -> () {
where
F: FnOnce(&section::Section) -> (),
{
let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
@ -27,51 +29,44 @@ fn test_enron500k() {
//d.push("williams-w3/");
let known_bad_fields = HashSet::from([
"white-s/calendar/113.", // To: east <7..>
"skilling-j/inbox/223.", // From: pep <performance.>
"white-s/calendar/113.", // To: east <7..>
"skilling-j/inbox/223.", // From: pep <performance.>
"jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
"jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com>
"lokey-t/calendar/33.", // A second Date entry for the calendar containing
// Date: Monday, March 12
"zipper-a/inbox/199.", // To: e-mail <mari.>
"dasovich-j/deleted_items/128.", // To: f62489 <g>
"dasovich-j/all_documents/677.", // To: w/assts <govt.>
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/578.", // To: w/assts <govt.>
"dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/risk_analytics/3.", // To: w/assts <govt.>
"dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/notes_inbox/5.", // To: w/assts <govt.>
"kaminski-v/sites/19.", // To: <"the.desk":@enron.com>
"kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
"jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com>
"lokey-t/calendar/33.", // A second Date entry for the calendar containing
// Date: Monday, March 12
"zipper-a/inbox/199.", // To: e-mail <mari.>
"dasovich-j/deleted_items/128.", // To: f62489 <g>
"dasovich-j/all_documents/677.", // To: w/assts <govt.>
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/578.", // To: w/assts <govt.>
"dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/risk_analytics/3.", // To: w/assts <govt.>
"dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/notes_inbox/5.", // To: w/assts <govt.>
"kaminski-v/sites/19.", // To: <"the.desk":@enron.com>
"kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
"kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
"kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
"kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com>
"kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/1.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/7.", // To: <"the.desk":@enron.com>
"kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com>
"kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/1.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/7.", // To: <"the.desk":@enron.com>
"kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
"kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
"kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
@ -79,11 +74,10 @@ fn test_enron500k() {
"kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/discussion_threads/577.", // To: w/assts <govt.>
"kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/calendar/untitled/640.", // To: w/assts <govt.>
"kean-s/all_documents/640.", // To: w/assts <govt.>
"kean-s/all_documents/1095.", // To: w/assts <govt.>
"kean-s/attachments/2030.", // To: w/assts <govt.>
"kean-s/calendar/untitled/640.", // To: w/assts <govt.>
"kean-s/all_documents/640.", // To: w/assts <govt.>
"kean-s/all_documents/1095.", // To: w/assts <govt.>
"kean-s/attachments/2030.", // To: w/assts <govt.>
"williams-w3/operations_committee_isas/10.", // To: z34655 <m>
]);
@ -92,7 +86,10 @@ fn test_enron500k() {
]);
let mut i = 0;
for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) {
for entry in WalkDir::new(d.as_path())
.into_iter()
.filter_map(|file| file.ok())
{
if entry.metadata().unwrap().is_file() {
let mail_path = entry.path();
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
@ -127,6 +124,6 @@ fn test_enron500k() {
println!("Analyzed emails: {}", i);
}
})
}
}
}
}

View file

@ -1,10 +1,12 @@
use chrono::{FixedOffset, TimeZone};
use std::collections::HashMap;
use imf_codec::fragments::{misc_token, model, section, trace};
use imf_codec::multipass;
use imf_codec::fragments::{model, misc_token, trace, section};
use std::collections::HashMap;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&section::Section) -> () {
where
F: FnOnce(&section::Section) -> (),
{
let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
@ -48,29 +50,35 @@ References: <1234@local.machine.example>
Unknown: unknown
This is a reply to your hello.
"#.as_bytes();
parser(fullmail, |parsed_section|
"#
.as_bytes();
parser(fullmail, |parsed_section| {
assert_eq!(
parsed_section,
&section::Section {
date: Some(&FixedOffset::east_opt(2 * 3600)
.unwrap()
.with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
.unwrap()),
date: Some(
&FixedOffset::east_opt(2 * 3600)
.unwrap()
.with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
.unwrap()
),
from: vec![&model::MailboxRef {
name: Some("Mary Smith".into()),
addrspec: model::AddrSpec {
local_part: "mary".into(),
domain: "example.net".into(),
from: vec![
&model::MailboxRef {
name: Some("Mary Smith".into()),
addrspec: model::AddrSpec {
local_part: "mary".into(),
domain: "example.net".into(),
}
},
&model::MailboxRef {
name: Some("Alan".into()),
addrspec: model::AddrSpec {
local_part: "alan".into(),
domain: "example".into(),
}
}
}, &model::MailboxRef {
name: Some("Alan".into()),
addrspec: model::AddrSpec {
local_part: "alan".into(),
domain: "example".into(),
}
}],
],
sender: Some(&model::MailboxRef {
name: None,
@ -106,33 +114,41 @@ This is a reply to your hello.
bcc: vec![],
msg_id: Some(&model::MessageId { left: "3456", right: "example.net" }),
in_reply_to: vec![&model::MessageId { left: "1234", right: "local.machine.example" }],
references: vec![&model::MessageId { left: "1234", right: "local.machine.example" }],
msg_id: Some(&model::MessageId {
left: "3456",
right: "example.net"
}),
in_reply_to: vec![&model::MessageId {
left: "1234",
right: "local.machine.example"
}],
references: vec![&model::MessageId {
left: "1234",
right: "local.machine.example"
}],
subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
comments: vec![
&misc_token::Unstructured("A simple message".into()),
&misc_token::Unstructured("Not that complicated".into()),
&misc_token::Unstructured("not valid header name but should be accepted by the parser.".into()),
&misc_token::Unstructured(
"not valid header name but should be accepted by the parser.".into()
),
],
keywords: vec![
&misc_token::PhraseList(vec![
"hello".into(),
"world".into(),
]),
&misc_token::PhraseList(vec![
"salut".into(),
"le".into(),
"monde".into(),
]),
&misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
&misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
],
received: vec![
&trace::ReceivedLog("from smtp.example.com ([10.83.2.2])\n\tby doradille with LMTP\n\tid xyzabcd\n\t(envelope-from <gitlab@example.com>)\n\tfor <quentin@example.com>")
],
received: vec![&trace::ReceivedLog(
r#"from smtp.example.com ([10.83.2.2])
by doradille with LMTP
id xyzabcd
(envelope-from <gitlab@example.com>)
for <quentin@example.com>"#
)],
return_path: vec![&model::MailboxRef {
name: None,
@ -143,8 +159,11 @@ This is a reply to your hello.
}],
optional: HashMap::from([
("Delivered-To", &misc_token::Unstructured("quentin@example.com".into())),
("Unknown", &misc_token::Unstructured("unknown".into())),
(
"Delivered-To",
&misc_token::Unstructured("quentin@example.com".into())
),
("Unknown", &misc_token::Unstructured("unknown".into())),
]),
bad_fields: vec![],
@ -155,5 +174,5 @@ This is a reply to your hello.
],
}
)
)
})
}