wip, still broken

This commit is contained in:
Quentin 2023-07-19 22:27:59 +02:00
parent b3bec8656d
commit cd5289c8c1
Signed by: quentin
GPG key ID: E9602264D639FF68
12 changed files with 640 additions and 103 deletions

View file

@ -29,19 +29,7 @@ Current known limitations/bugs:
## Design ## Design
Multipass design: each pass is in charge of a specific work. *Todo*
*Having multiple pass does not necessarily lead to abyssmal performances.
For example, the [Chez Scheme compiler](https://legacy.cs.indiana.edu/~dyb/pubs/commercial-nanopass.pdf)
pioneered the "Nanopass" concept and showcases excellent performances.*
Currently, you can use the following passes:
- `segment.rs` - Extract the header section by finding the `CRLFCRLF` token.
- `guess_charset.rs` - Find the header section encoding (should be ASCII or UTF8 but some corpus contains ISO-8859-1 headers)
- `extract_fields.rs` - Extract the headers line by lines, taking into account Foldable White Space.
- `field_lazy.rs` - Try to recognize the header fields (`From`, `To`, `Date`, etc.) but do not parse their value.
- `field_eager.rs` - Parse the value of each known header fields.
- `header_section.rs` - Aggregate the various fields in a single structure.
## Testing strategy ## Testing strategy

129
ignore.test/enron.rs Normal file
View file

@ -0,0 +1,129 @@
use imf_codec::fragments::section;
use imf_codec::multipass;
use std::collections::HashSet;
use std::fs::File;
use std::io::Read;
use std::path::PathBuf;
use walkdir::WalkDir;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where
F: FnOnce(&section::Section) -> (),
{
let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
let field_names = fields.names();
let field_body = field_names.body();
let section = field_body.section();
func(&section.fields);
}
#[test]
#[ignore]
fn test_enron500k() {
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
d.push("resources/enron/maildir/");
let prefix_sz = d.as_path().to_str().unwrap().len();
//d.push("williams-w3/");
let known_bad_fields = HashSet::from([
"white-s/calendar/113.", // To: east <7..>
"skilling-j/inbox/223.", // From: pep <performance.>
"jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
"jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com>
"lokey-t/calendar/33.", // A second Date entry for the calendar containing
// Date: Monday, March 12
"zipper-a/inbox/199.", // To: e-mail <mari.>
"dasovich-j/deleted_items/128.", // To: f62489 <g>
"dasovich-j/all_documents/677.", // To: w/assts <govt.>
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com>
"dasovich-j/all_documents/578.", // To: w/assts <govt.>
"dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/risk_analytics/3.", // To: w/assts <govt.>
"dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com>
"dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com>
"dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com>
"dasovich-j/notes_inbox/5.", // To: w/assts <govt.>
"kaminski-v/sites/19.", // To: <"the.desk":@enron.com>
"kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
"kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
"kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
"kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
"kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com>
"kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/1.", // To: <"the.desk":@enron.com>
"kaminski-v/technical/7.", // To: <"the.desk":@enron.com>
"kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
"kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
"kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/reg_risk/9.", // To: w/assts <govt.>
"kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/discussion_threads/577.", // To: w/assts <govt.>
"kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
"kean-s/calendar/untitled/640.", // To: w/assts <govt.>
"kean-s/all_documents/640.", // To: w/assts <govt.>
"kean-s/all_documents/1095.", // To: w/assts <govt.>
"kean-s/attachments/2030.", // To: w/assts <govt.>
"williams-w3/operations_committee_isas/10.", // To: z34655 <m>
]);
let known_bad_from = HashSet::from([
"skilling-j/inbox/223.", // From: pep <performance.>
]);
let mut i = 0;
for entry in WalkDir::new(d.as_path())
.into_iter()
.filter_map(|file| file.ok())
{
if entry.metadata().unwrap().is_file() {
let mail_path = entry.path();
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
// read file
let mut raw = Vec::new();
let mut f = File::open(mail_path).unwrap();
f.read_to_end(&mut raw).unwrap();
// parse
parser(&raw, |hdrs| {
let ok_date = hdrs.date.is_some();
let ok_from = hdrs.from.len() > 0;
let ok_fields = hdrs.bad_fields.len() == 0;
if !ok_date || !ok_from || !ok_fields {
println!("Issue with: {}", suffix);
}
assert!(ok_date);
if !known_bad_from.contains(suffix) {
assert!(ok_from);
}
if !known_bad_fields.contains(suffix) {
assert!(ok_fields);
}
i += 1;
if i % 1000 == 0 {
println!("Analyzed emails: {}", i);
}
})
}
}
}

340
ignore.test/known.rs Normal file
View file

@ -0,0 +1,340 @@
use chrono::{FixedOffset, TimeZone};
use imf_codec::fragments::{misc_token, model, section, part, trace};
use imf_codec::multipass;
use std::collections::HashMap;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where
F: FnOnce(&section::Section) -> (),
{
let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
let field_names = fields.names();
let field_body = field_names.body();
let section = field_body.section();
func(&section.fields);
}
#[test]
fn test_headers() {
let fullmail: &[u8] = r#"Return-Path: <gitlab@example.com>
Delivered-To: quentin@example.com
Received: from smtp.example.com ([10.83.2.2])
by doradille with LMTP
id xyzabcd
(envelope-from <gitlab@example.com>)
for <quentin@example.com>; Tue, 13 Jun 2023 19:01:08 +0000
Date: Tue, 13 Jun 2023 10:01:10 +0200
From: Mary Smith
<mary@example.net>, "A\lan" <alan@example>
Sender: imf@example.com
Reply-To: "Mary Smith: Personal Account" <smith@home.example>
To: John Doe <jdoe@machine.example>
Cc: imf2@example.com
Bcc: (hidden)
Subject: Re: Saying Hello
Comments: A simple message
Comments: Not that complicated
comments : not valid header name but should be accepted
by the parser.
Keywords: hello, world
Héron: Raté
Raté raté
Keywords: salut, le, monde
Not a real header but should still recover
Message-ID: <3456@example.net>
In-Reply-To: <1234@local.machine.example>
References: <1234@local.machine.example>
Unknown: unknown
This is a reply to your hello.
"#
.as_bytes();
parser(fullmail, |parsed_section| {
assert_eq!(
parsed_section,
&section::Section {
date: Some(
&FixedOffset::east_opt(2 * 3600)
.unwrap()
.with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
.unwrap()
),
from: vec![
&model::MailboxRef {
name: Some("Mary Smith".into()),
addrspec: model::AddrSpec {
local_part: "mary".into(),
domain: "example.net".into(),
}
},
&model::MailboxRef {
name: Some("Alan".into()),
addrspec: model::AddrSpec {
local_part: "alan".into(),
domain: "example".into(),
}
}
],
sender: Some(&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "imf".into(),
domain: "example.com".into(),
}
}),
reply_to: vec![&model::AddressRef::Single(model::MailboxRef {
name: Some("Mary Smith: Personal Account".into()),
addrspec: model::AddrSpec {
local_part: "smith".into(),
domain: "home.example".into(),
}
})],
to: vec![&model::AddressRef::Single(model::MailboxRef {
name: Some("John Doe".into()),
addrspec: model::AddrSpec {
local_part: "jdoe".into(),
domain: "machine.example".into(),
}
})],
cc: vec![&model::AddressRef::Single(model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "imf2".into(),
domain: "example.com".into(),
}
})],
bcc: vec![],
msg_id: Some(&model::MessageId {
left: "3456",
right: "example.net"
}),
in_reply_to: vec![&model::MessageId {
left: "1234",
right: "local.machine.example"
}],
references: vec![&model::MessageId {
left: "1234",
right: "local.machine.example"
}],
subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
comments: vec![
&misc_token::Unstructured("A simple message".into()),
&misc_token::Unstructured("Not that complicated".into()),
&misc_token::Unstructured(
"not valid header name but should be accepted by the parser.".into()
),
],
keywords: vec![
&misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
&misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
],
received: vec![&trace::ReceivedLog(
r#"from smtp.example.com ([10.83.2.2])
by doradille with LMTP
id xyzabcd
(envelope-from <gitlab@example.com>)
for <quentin@example.com>"#
)],
return_path: vec![&model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "gitlab".into(),
domain: "example.com".into(),
}
}],
optional: HashMap::from([
(
"Delivered-To",
&misc_token::Unstructured("quentin@example.com".into())
),
("Unknown", &misc_token::Unstructured("unknown".into())),
]),
bad_fields: vec![],
unparsed: vec![
"Héron: Raté\n Raté raté\n",
"Not a real header but should still recover\n",
],
..section::Section::default()
}
)
})
}
#[test]
fn test_headers_mime() {
use imf_codec::fragments::mime;
let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
MIME-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-ID: <a@example.com>
Content-Description: hello
Now's the time =
for all folk to come=
to the aid of their country.
"#
.as_bytes();
parser(fullmail, |parsed_section| {
assert_eq!(
parsed_section,
&section::Section {
from: vec![
&model::MailboxRef {
name: Some("Keith Moore".into()),
addrspec: model::AddrSpec {
local_part: "moore".into(),
domain: "cs.utk.edu".into(),
}
},
],
to: vec![&model::AddressRef::Single(model::MailboxRef {
name: Some("Keld Jørn Simonsen".into()),
addrspec: model::AddrSpec {
local_part: "keld".into(),
domain: "dkuug.dk".into(),
}
})],
cc: vec![&model::AddressRef::Single(model::MailboxRef {
name: Some("André Pirard".into()),
addrspec: model::AddrSpec {
local_part: "PIRARD".into(),
domain: "vm1.ulg.ac.be".into(),
}
})],
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
mime: section::MIMESection {
content_type: Some(&mime::Type::Text(mime::TextDesc {
charset: Some(mime::EmailCharset::ISO_8859_1),
subtype: mime::TextSubtype::Plain,
unknown_parameters: vec![]
})),
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
content_id: Some(&model::MessageId {
left: "a",
right: "example.com"
}),
content_description: Some(&misc_token::Unstructured("hello".into())),
..section::MIMESection::default()
},
..section::Section::default()
}
);
})
}
fn parser_bodystruct<'a, F>(input: &'a [u8], func: F) -> ()
where
F: FnOnce(&part::PartNode) -> (),
{
let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
let field_names = fields.names();
let field_body = field_names.body();
let section = field_body.section();
let bodystruct = section.body_structure();
func(&bodystruct.body);
}
#[test]
fn test_multipart() {
let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
From: Grrrnd Zero <grrrndzero@example.org>
To: John Doe <jdoe@machine.example>
Subject: Re: Saying Hello
Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
MIME-Version: 1.0
Content-Type: multipart/alternative;
boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
Content-Transfer-Encoding: 7bit
This is a multi-part message in MIME format.
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
GZ
OoOoO
oOoOoOoOo
oOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/html; charset=us-ascii
<div style="text-align: center;"><strong>GZ</strong><br />
OoOoO<br />
oOoOoOoOo<br />
oOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
--b1_e376dc71bafc953c0b0fdeb9983a9956--
"#.as_bytes();
parser_bodystruct(fullmail, |part| {
assert_eq!(part, &part::PartNode::Composite(
part::PartHeader {
..part::PartHeader::default()
},
vec![
part::PartNode::Discrete(
part::PartHeader {
..part::PartHeader::default()
},
r#"GZ
OoOoO
oOoOoOoOo
oOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO"#.as_bytes()
),
part::PartNode::Discrete(
part::PartHeader {
..part::PartHeader::default()
},
r#"<div style="text-align: center;"><strong>GZ</strong><br />
OoOoO<br />
oOoOoOoOo<br />
oOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />"#.as_bytes()
),
]));
});
}

View file

@ -15,12 +15,6 @@ use crate::fragments::lazy;
use crate::fragments::whitespace::cfws; use crate::fragments::whitespace::cfws;
use crate::fragments::quoted::quoted_string; use crate::fragments::quoted::quoted_string;
#[derive(Debug, PartialEq)]
pub struct Version {
pub major: u32,
pub minor: u32,
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Type<'a> { pub enum Type<'a> {
// Composite types // Composite types
@ -278,20 +272,6 @@ impl<'a> From<&'a str> for TextSubtype<'a> {
} }
} }
pub fn version(input: &str) -> IResult<&str, Version> {
let (rest, (_, major, _, _, _, minor, _)) = tuple((
opt(cfws),
character::u32,
opt(cfws),
tag("."),
opt(cfws),
character::u32,
opt(cfws),
))(input)?;
Ok((rest, Version { major, minor }))
}
pub fn parameter(input: &str) -> IResult<&str, Parameter> { pub fn parameter(input: &str) -> IResult<&str, Parameter> {
let (rest, (pname, _, pvalue)) = tuple(( let (rest, (pname, _, pvalue)) = tuple((
token, token,

28
src/mime/section.rs Normal file
View file

@ -0,0 +1,28 @@
#[derive(Debug, PartialEq, Default)]
pub struct MIMESection<'a> {
pub content_type: Option<&'a Type<'a>>,
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
pub content_id: Option<&'a MessageId<'a>>,
pub content_description: Option<&'a Unstructured>,
pub optional: HashMap<&'a str, &'a Unstructured>,
pub unparsed: Vec<&'a str>,
}
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
let mut section = MIMESection::default();
for field in iter {
match field {
MIMEField::ContentType(v) => section.content_type = Some(v),
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
MIMEField::ContentID(v) => section.content_id = Some(v),
MIMEField::ContentDescription(v) => section.content_description = Some(v),
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
MIMEField::Rescue(v) => section.unparsed.push(v),
};
}
section
}
}

View file

@ -140,6 +140,13 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
Ok((input, vec![])) Ok((input, vec![]))
} }
pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<>> {
map(
opt(alt((address_list, address_list_cfws))),
|v| v.unwrap_or(vec![]),
)(input)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

88
src/rfc5322/field.rs Normal file
View file

@ -0,0 +1,88 @@
use nom::{
IResult,
};
use crate::rfc5322::address::{MailboxList, AddressList};
use crate::rfc5322::mailbox::MailboxRef;
use crate::rfc5322::identification::{MessageId, MessageIdList};
use crate::rfc5322::trace::ReceivedLog;
use crate::text::misc_token::{Unstructured, PhraseList};
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
// 3.6.1. The Origination Date Field
Date(DateTime<'a>),
// 3.6.2. Originator Fields
From(MailboxList<'a>),
Sender(Mailbox<'a>),
ReplyTo(AddressList<'a>),
// 3.6.3. Destination Address Fields
To(AddressList<'a>),
Cc(AddressList<'a>),
Bcc(NullableAddressList<'a>),
// 3.6.4. Identification Fields
MessageID(Identifier<'a>),
InReplyTo(IdentifierList<'a>),
References(IdentifierList<'a>),
// 3.6.5. Informational Fields
Subject(Unstructured<'a>),
Comments(Unstructured<'a>),
Keywords(PhraseList<'a>),
// 3.6.6 Resent Fields (not implemented)
// 3.6.7 Trace Fields
Received(ReceivedLog<'a>),
ReturnPath(Option<AddrSpec<'a>>),
MIMEVersion(Version<'a>),
Optional(&'a [u8], Unstructured<'a>),
}
pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> {
let (name, rest) = field_name(input)?;
match name.to_lowercase().as_ref() {
"date" => datetime::section(rest).map(Field::Date),
"from" => mailbox_list(rest).map(Field::From),
"sender" => mailbox(rest).map(Field::Sender),
"reply-to" => address_list(rest).map(Field::ReplyTo),
"to" => address_list(rest).map(Field::To),
"cc" => address_list(rest).map(Field::Cc),
"bcc" => nullable_address_list(rest).map(Field::Bcc),
"message-id" => msg_id(rest).map(Field::MessageID),
"in-reply-to" => msg_list(rest).map(Field::InReplyTo),
"references" => msg_list(rest).map(Field::References),
"subject" => unstructured(rest).map(Field::Subject),
"comments" => unstructured(rest).map(Field::Comments),
"keywords" => phrase_list(rest).map(Field::Keywords),
"return-path" => return_path(rest).map(Field::ReturnPath),
"received" => received_log(rest).map(Field::ReceivedLog),
"mime-version" => version(rest).map(Field::MIMEVersion),
_ => unstructured(rest).map(|v| Field::Optional(name, v)),
}
}
/// Optional field
///
/// ```abnf
/// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext
/// ftext = %d33-57 / ; Printable US-ASCII
/// %d59-126 ; characters not including
/// ; ":".
/// ```
fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(
take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
tuple((space0, tag(b":"), space0)),
)(input)
}

View file

@ -18,27 +18,6 @@ pub struct MessageId<'a> {
} }
pub type MessageIdList<'a> = Vec<MessageId<'a>>; pub type MessageIdList<'a> = Vec<MessageId<'a>>;
/*
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
type Error = IMFError<'a>;
fn try_from(id: &'a lazy::Identifier<'a>) -> Result<Self, Self::Error> {
msg_id(id.0)
.map(|(_, i)| i)
.map_err(|e| IMFError::MessageID(e))
}
}
impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> {
type Error = IMFError<'a>;
fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
many1(msg_id)(id.0)
.map(|(_, i)| i)
.map_err(|e| IMFError::MessageIDList(e))
}
}*/
/// Message identifier /// Message identifier
/// ///
/// ```abnf /// ```abnf
@ -53,6 +32,10 @@ pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageId> {
Ok((input, MessageId { left, right })) Ok((input, MessageId { left, right }))
} }
pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIdList> {
many1(msg_id)(input)
}
// @FIXME Missing obsolete // @FIXME Missing obsolete
fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> { fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> {
dot_atom_text(input) dot_atom_text(input)

View file

@ -9,57 +9,63 @@ use crate::fragments::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct Section<'a> { pub struct Message<'a> {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
pub date: Option<&'a DateTime<FixedOffset>>, pub date: Option<DateTime<FixedOffset>>,
// 3.6.2. Originator Fields // 3.6.2. Originator Fields
pub from: Vec<&'a MailboxRef>, pub from: Vec<MailboxRef<'a>>,
pub sender: Option<&'a MailboxRef>, pub sender: Option<MailboxRef<'a>>,
pub reply_to: Vec<&'a AddressRef>, pub reply_to: Vec<AddressRef<'a>>,
// 3.6.3. Destination Address Fields // 3.6.3. Destination Address Fields
pub to: Vec<&'a AddressRef>, pub to: Vec<AddressRef<'a>>,
pub cc: Vec<&'a AddressRef>, pub cc: Vec<AddressRef<'a>>,
pub bcc: Vec<&'a AddressRef>, pub bcc: Vec<AddressRef<'a>>,
// 3.6.4. Identification Fields // 3.6.4. Identification Fields
pub msg_id: Option<&'a MessageId<'a>>, pub msg_id: Option<MessageId<'a>>,
pub in_reply_to: Vec<&'a MessageId<'a>>, pub in_reply_to: Vec<MessageId<'a>>,
pub references: Vec<&'a MessageId<'a>>, pub references: Vec<MessageId<'a>>,
// 3.6.5. Informational Fields // 3.6.5. Informational Fields
pub subject: Option<&'a Unstructured>, pub subject: Option<Unstructured<'a>>,
pub comments: Vec<&'a Unstructured>, pub comments: Vec<Unstructured<'a>>,
pub keywords: Vec<&'a PhraseList>, pub keywords: Vec<PhraseList<'a>>,
// 3.6.6 Not implemented // 3.6.6 Not implemented
// 3.6.7 Trace Fields // 3.6.7 Trace Fields
pub return_path: Vec<&'a MailboxRef>, pub return_path: Vec<MailboxRef<'a>>,
pub received: Vec<&'a ReceivedLog<'a>>, pub received: Vec<ReceivedLog<'a>>,
// 3.6.8. Optional Fields // 3.6.8. Optional Fields
pub optional: HashMap<&'a str, &'a Unstructured>, pub optional: HashMap<&'a [u8], Unstructured<'a>>,
// MIME
pub mime_version: Option<&'a Version>,
pub mime: MIMESection<'a>,
// Recovery // Recovery
pub bad_fields: Vec<&'a lazy::Field<'a>>, pub unparsed: Vec<&'a [u8]>,
pub unparsed: Vec<&'a str>,
} }
#[derive(Debug, PartialEq, Default)] impl<'a> FromIterator<&'a [u8]> for Message<'a> {
pub struct MIMESection<'a> { fn from_iter<I: IntoIterator<Item = &'a [u8]>>(iter: I) -> Self {
pub content_type: Option<&'a Type<'a>>, iter.fold(
pub content_transfer_encoding: Option<&'a Mechanism<'a>>, Message::default(),
pub content_id: Option<&'a MessageId<'a>>, |mut msg, field| {
pub content_description: Option<&'a Unstructured>, match field_name(field) {
pub optional: HashMap<&'a str, &'a Unstructured>, Ok((name, value)) => xx,
pub unparsed: Vec<&'a str>,
} }
match field {
}
msg
}
)
}
}
//@FIXME min and max limits are not enforced, //@FIXME min and max limits are not enforced,
// it may result in missing data or silently overriden data. // it may result in missing data or silently overriden data.
impl<'a> FromIterator<&'a Field<'a>> for Section<'a> { impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
@ -104,19 +110,3 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
} }
} }
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
let mut section = MIMESection::default();
for field in iter {
match field {
MIMEField::ContentType(v) => section.content_type = Some(v),
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
MIMEField::ContentID(v) => section.content_id = Some(v),
MIMEField::ContentDescription(v) => section.content_description = Some(v),
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
MIMEField::Rescue(v) => section.unparsed.push(v),
};
}
section
}
}

View file

@ -4,3 +4,4 @@ pub mod datetime;
pub mod trace; pub mod trace;
pub mod identification; pub mod identification;
pub mod mime; pub mod mime;
pub mod field;

View file

@ -35,7 +35,7 @@ impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
} }
}*/ }*/
pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> { pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
map( map(
tuple(( tuple((
many0(received_tokens), many0(received_tokens),
@ -46,7 +46,7 @@ pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
)(input) )(input)
} }
pub fn return_path_body(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> { pub fn return_path(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input) alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
} }

View file

@ -18,6 +18,9 @@ use crate::text::{
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct PhraseList(pub Vec<String>); pub struct PhraseList(pub Vec<String>);
pub fn phrase_list(input: &'a [u8]) -> IResult<&[u8], PhraseList> {
separated_list1(tag(","), phrase)(input)
}
/* /*
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured { impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {