wip, still broken
This commit is contained in:
parent
b3bec8656d
commit
cd5289c8c1
12 changed files with 640 additions and 103 deletions
14
README.md
14
README.md
|
@ -29,19 +29,7 @@ Current known limitations/bugs:
|
||||||
|
|
||||||
## Design
|
## Design
|
||||||
|
|
||||||
Multipass design: each pass is in charge of a specific work.
|
*Todo*
|
||||||
*Having multiple pass does not necessarily lead to abyssmal performances.
|
|
||||||
For example, the [Chez Scheme compiler](https://legacy.cs.indiana.edu/~dyb/pubs/commercial-nanopass.pdf)
|
|
||||||
pioneered the "Nanopass" concept and showcases excellent performances.*
|
|
||||||
|
|
||||||
Currently, you can use the following passes:
|
|
||||||
- `segment.rs` - Extract the header section by finding the `CRLFCRLF` token.
|
|
||||||
- `guess_charset.rs` - Find the header section encoding (should be ASCII or UTF8 but some corpus contains ISO-8859-1 headers)
|
|
||||||
- `extract_fields.rs` - Extract the headers line by lines, taking into account Foldable White Space.
|
|
||||||
- `field_lazy.rs` - Try to recognize the header fields (`From`, `To`, `Date`, etc.) but do not parse their value.
|
|
||||||
- `field_eager.rs` - Parse the value of each known header fields.
|
|
||||||
- `header_section.rs` - Aggregate the various fields in a single structure.
|
|
||||||
|
|
||||||
|
|
||||||
## Testing strategy
|
## Testing strategy
|
||||||
|
|
||||||
|
|
129
ignore.test/enron.rs
Normal file
129
ignore.test/enron.rs
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
use imf_codec::fragments::section;
|
||||||
|
use imf_codec::multipass;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Read;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
|
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
||||||
|
where
|
||||||
|
F: FnOnce(§ion::Section) -> (),
|
||||||
|
{
|
||||||
|
let seg = multipass::segment::new(input).unwrap();
|
||||||
|
let charset = seg.charset();
|
||||||
|
let fields = charset.fields().unwrap();
|
||||||
|
let field_names = fields.names();
|
||||||
|
let field_body = field_names.body();
|
||||||
|
let section = field_body.section();
|
||||||
|
|
||||||
|
func(§ion.fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[ignore]
|
||||||
|
fn test_enron500k() {
|
||||||
|
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||||
|
d.push("resources/enron/maildir/");
|
||||||
|
let prefix_sz = d.as_path().to_str().unwrap().len();
|
||||||
|
//d.push("williams-w3/");
|
||||||
|
|
||||||
|
let known_bad_fields = HashSet::from([
|
||||||
|
"white-s/calendar/113.", // To: east <7..>
|
||||||
|
"skilling-j/inbox/223.", // From: pep <performance.>
|
||||||
|
"jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
|
||||||
|
"jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com>
|
||||||
|
"lokey-t/calendar/33.", // A second Date entry for the calendar containing
|
||||||
|
// Date: Monday, March 12
|
||||||
|
"zipper-a/inbox/199.", // To: e-mail <mari.>
|
||||||
|
"dasovich-j/deleted_items/128.", // To: f62489 <g>
|
||||||
|
"dasovich-j/all_documents/677.", // To: w/assts <govt.>
|
||||||
|
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"dasovich-j/all_documents/578.", // To: w/assts <govt.>
|
||||||
|
"dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com>
|
||||||
|
"dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com>
|
||||||
|
"dasovich-j/risk_analytics/3.", // To: w/assts <govt.>
|
||||||
|
"dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com>
|
||||||
|
"dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com>
|
||||||
|
"dasovich-j/notes_inbox/5.", // To: w/assts <govt.>
|
||||||
|
"kaminski-v/sites/19.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
||||||
|
"kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
|
||||||
|
"kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
|
||||||
|
"kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
|
||||||
|
"kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
|
||||||
|
"kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/technical/1.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/technical/7.", // To: <"the.desk":@enron.com>
|
||||||
|
"kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
|
||||||
|
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
||||||
|
"kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
||||||
|
"kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
|
||||||
|
"kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
||||||
|
"kean-s/reg_risk/9.", // To: w/assts <govt.>
|
||||||
|
"kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
||||||
|
"kean-s/discussion_threads/577.", // To: w/assts <govt.>
|
||||||
|
"kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
||||||
|
"kean-s/calendar/untitled/640.", // To: w/assts <govt.>
|
||||||
|
"kean-s/all_documents/640.", // To: w/assts <govt.>
|
||||||
|
"kean-s/all_documents/1095.", // To: w/assts <govt.>
|
||||||
|
"kean-s/attachments/2030.", // To: w/assts <govt.>
|
||||||
|
"williams-w3/operations_committee_isas/10.", // To: z34655 <m>
|
||||||
|
]);
|
||||||
|
|
||||||
|
let known_bad_from = HashSet::from([
|
||||||
|
"skilling-j/inbox/223.", // From: pep <performance.>
|
||||||
|
]);
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
for entry in WalkDir::new(d.as_path())
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|file| file.ok())
|
||||||
|
{
|
||||||
|
if entry.metadata().unwrap().is_file() {
|
||||||
|
let mail_path = entry.path();
|
||||||
|
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
|
||||||
|
|
||||||
|
// read file
|
||||||
|
let mut raw = Vec::new();
|
||||||
|
let mut f = File::open(mail_path).unwrap();
|
||||||
|
f.read_to_end(&mut raw).unwrap();
|
||||||
|
|
||||||
|
// parse
|
||||||
|
parser(&raw, |hdrs| {
|
||||||
|
let ok_date = hdrs.date.is_some();
|
||||||
|
let ok_from = hdrs.from.len() > 0;
|
||||||
|
let ok_fields = hdrs.bad_fields.len() == 0;
|
||||||
|
|
||||||
|
if !ok_date || !ok_from || !ok_fields {
|
||||||
|
println!("Issue with: {}", suffix);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(ok_date);
|
||||||
|
|
||||||
|
if !known_bad_from.contains(suffix) {
|
||||||
|
assert!(ok_from);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !known_bad_fields.contains(suffix) {
|
||||||
|
assert!(ok_fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
if i % 1000 == 0 {
|
||||||
|
println!("Analyzed emails: {}", i);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
340
ignore.test/known.rs
Normal file
340
ignore.test/known.rs
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
use chrono::{FixedOffset, TimeZone};
|
||||||
|
use imf_codec::fragments::{misc_token, model, section, part, trace};
|
||||||
|
use imf_codec::multipass;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
||||||
|
where
|
||||||
|
F: FnOnce(§ion::Section) -> (),
|
||||||
|
{
|
||||||
|
let seg = multipass::segment::new(input).unwrap();
|
||||||
|
let charset = seg.charset();
|
||||||
|
let fields = charset.fields().unwrap();
|
||||||
|
let field_names = fields.names();
|
||||||
|
let field_body = field_names.body();
|
||||||
|
let section = field_body.section();
|
||||||
|
|
||||||
|
func(§ion.fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_headers() {
|
||||||
|
let fullmail: &[u8] = r#"Return-Path: <gitlab@example.com>
|
||||||
|
Delivered-To: quentin@example.com
|
||||||
|
Received: from smtp.example.com ([10.83.2.2])
|
||||||
|
by doradille with LMTP
|
||||||
|
id xyzabcd
|
||||||
|
(envelope-from <gitlab@example.com>)
|
||||||
|
for <quentin@example.com>; Tue, 13 Jun 2023 19:01:08 +0000
|
||||||
|
Date: Tue, 13 Jun 2023 10:01:10 +0200
|
||||||
|
From: Mary Smith
|
||||||
|
<mary@example.net>, "A\lan" <alan@example>
|
||||||
|
Sender: imf@example.com
|
||||||
|
Reply-To: "Mary Smith: Personal Account" <smith@home.example>
|
||||||
|
To: John Doe <jdoe@machine.example>
|
||||||
|
Cc: imf2@example.com
|
||||||
|
Bcc: (hidden)
|
||||||
|
Subject: Re: Saying Hello
|
||||||
|
Comments: A simple message
|
||||||
|
Comments: Not that complicated
|
||||||
|
comments : not valid header name but should be accepted
|
||||||
|
by the parser.
|
||||||
|
Keywords: hello, world
|
||||||
|
Héron: Raté
|
||||||
|
Raté raté
|
||||||
|
Keywords: salut, le, monde
|
||||||
|
Not a real header but should still recover
|
||||||
|
Message-ID: <3456@example.net>
|
||||||
|
In-Reply-To: <1234@local.machine.example>
|
||||||
|
References: <1234@local.machine.example>
|
||||||
|
Unknown: unknown
|
||||||
|
|
||||||
|
This is a reply to your hello.
|
||||||
|
"#
|
||||||
|
.as_bytes();
|
||||||
|
parser(fullmail, |parsed_section| {
|
||||||
|
assert_eq!(
|
||||||
|
parsed_section,
|
||||||
|
§ion::Section {
|
||||||
|
date: Some(
|
||||||
|
&FixedOffset::east_opt(2 * 3600)
|
||||||
|
.unwrap()
|
||||||
|
.with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
|
||||||
|
.unwrap()
|
||||||
|
),
|
||||||
|
|
||||||
|
from: vec![
|
||||||
|
&model::MailboxRef {
|
||||||
|
name: Some("Mary Smith".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "mary".into(),
|
||||||
|
domain: "example.net".into(),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
&model::MailboxRef {
|
||||||
|
name: Some("Alan".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "alan".into(),
|
||||||
|
domain: "example".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
|
||||||
|
sender: Some(&model::MailboxRef {
|
||||||
|
name: None,
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "imf".into(),
|
||||||
|
domain: "example.com".into(),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
|
reply_to: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: Some("Mary Smith: Personal Account".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "smith".into(),
|
||||||
|
domain: "home.example".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
to: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: Some("John Doe".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "jdoe".into(),
|
||||||
|
domain: "machine.example".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
cc: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: None,
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "imf2".into(),
|
||||||
|
domain: "example.com".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
bcc: vec![],
|
||||||
|
|
||||||
|
msg_id: Some(&model::MessageId {
|
||||||
|
left: "3456",
|
||||||
|
right: "example.net"
|
||||||
|
}),
|
||||||
|
in_reply_to: vec![&model::MessageId {
|
||||||
|
left: "1234",
|
||||||
|
right: "local.machine.example"
|
||||||
|
}],
|
||||||
|
references: vec![&model::MessageId {
|
||||||
|
left: "1234",
|
||||||
|
right: "local.machine.example"
|
||||||
|
}],
|
||||||
|
|
||||||
|
subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
|
||||||
|
|
||||||
|
comments: vec![
|
||||||
|
&misc_token::Unstructured("A simple message".into()),
|
||||||
|
&misc_token::Unstructured("Not that complicated".into()),
|
||||||
|
&misc_token::Unstructured(
|
||||||
|
"not valid header name but should be accepted by the parser.".into()
|
||||||
|
),
|
||||||
|
],
|
||||||
|
|
||||||
|
keywords: vec![
|
||||||
|
&misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
|
||||||
|
&misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
|
||||||
|
],
|
||||||
|
|
||||||
|
received: vec![&trace::ReceivedLog(
|
||||||
|
r#"from smtp.example.com ([10.83.2.2])
|
||||||
|
by doradille with LMTP
|
||||||
|
id xyzabcd
|
||||||
|
(envelope-from <gitlab@example.com>)
|
||||||
|
for <quentin@example.com>"#
|
||||||
|
)],
|
||||||
|
|
||||||
|
return_path: vec![&model::MailboxRef {
|
||||||
|
name: None,
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "gitlab".into(),
|
||||||
|
domain: "example.com".into(),
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
|
||||||
|
optional: HashMap::from([
|
||||||
|
(
|
||||||
|
"Delivered-To",
|
||||||
|
&misc_token::Unstructured("quentin@example.com".into())
|
||||||
|
),
|
||||||
|
("Unknown", &misc_token::Unstructured("unknown".into())),
|
||||||
|
]),
|
||||||
|
|
||||||
|
bad_fields: vec![],
|
||||||
|
|
||||||
|
unparsed: vec![
|
||||||
|
"Héron: Raté\n Raté raté\n",
|
||||||
|
"Not a real header but should still recover\n",
|
||||||
|
],
|
||||||
|
..section::Section::default()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_headers_mime() {
|
||||||
|
use imf_codec::fragments::mime;
|
||||||
|
let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
|
||||||
|
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
|
||||||
|
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
|
||||||
|
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
|
||||||
|
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=ISO-8859-1
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
Content-ID: <a@example.com>
|
||||||
|
Content-Description: hello
|
||||||
|
|
||||||
|
Now's the time =
|
||||||
|
for all folk to come=
|
||||||
|
to the aid of their country.
|
||||||
|
"#
|
||||||
|
.as_bytes();
|
||||||
|
|
||||||
|
parser(fullmail, |parsed_section| {
|
||||||
|
assert_eq!(
|
||||||
|
parsed_section,
|
||||||
|
§ion::Section {
|
||||||
|
from: vec![
|
||||||
|
&model::MailboxRef {
|
||||||
|
name: Some("Keith Moore".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "moore".into(),
|
||||||
|
domain: "cs.utk.edu".into(),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
|
||||||
|
to: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: Some("Keld Jørn Simonsen".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "keld".into(),
|
||||||
|
domain: "dkuug.dk".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
cc: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: Some("André Pirard".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "PIRARD".into(),
|
||||||
|
domain: "vm1.ulg.ac.be".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
|
||||||
|
mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
|
||||||
|
mime: section::MIMESection {
|
||||||
|
content_type: Some(&mime::Type::Text(mime::TextDesc {
|
||||||
|
charset: Some(mime::EmailCharset::ISO_8859_1),
|
||||||
|
subtype: mime::TextSubtype::Plain,
|
||||||
|
unknown_parameters: vec![]
|
||||||
|
})),
|
||||||
|
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
|
||||||
|
content_id: Some(&model::MessageId {
|
||||||
|
left: "a",
|
||||||
|
right: "example.com"
|
||||||
|
}),
|
||||||
|
content_description: Some(&misc_token::Unstructured("hello".into())),
|
||||||
|
..section::MIMESection::default()
|
||||||
|
},
|
||||||
|
..section::Section::default()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parser_bodystruct<'a, F>(input: &'a [u8], func: F) -> ()
|
||||||
|
where
|
||||||
|
F: FnOnce(&part::PartNode) -> (),
|
||||||
|
{
|
||||||
|
let seg = multipass::segment::new(input).unwrap();
|
||||||
|
let charset = seg.charset();
|
||||||
|
let fields = charset.fields().unwrap();
|
||||||
|
let field_names = fields.names();
|
||||||
|
let field_body = field_names.body();
|
||||||
|
let section = field_body.section();
|
||||||
|
let bodystruct = section.body_structure();
|
||||||
|
|
||||||
|
func(&bodystruct.body);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multipart() {
|
||||||
|
let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
|
||||||
|
From: Grrrnd Zero <grrrndzero@example.org>
|
||||||
|
To: John Doe <jdoe@machine.example>
|
||||||
|
Subject: Re: Saying Hello
|
||||||
|
Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: multipart/alternative;
|
||||||
|
boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
|
||||||
|
Content-Transfer-Encoding: 7bit
|
||||||
|
|
||||||
|
This is a multi-part message in MIME format.
|
||||||
|
|
||||||
|
--b1_e376dc71bafc953c0b0fdeb9983a9956
|
||||||
|
Content-Type: text/plain; charset=utf-8
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
|
||||||
|
GZ
|
||||||
|
OoOoO
|
||||||
|
oOoOoOoOo
|
||||||
|
oOoOoOoOoOoOoOoOo
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOo
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
|
||||||
|
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
|
||||||
|
|
||||||
|
--b1_e376dc71bafc953c0b0fdeb9983a9956
|
||||||
|
Content-Type: text/html; charset=us-ascii
|
||||||
|
|
||||||
|
<div style="text-align: center;"><strong>GZ</strong><br />
|
||||||
|
OoOoO<br />
|
||||||
|
oOoOoOoOo<br />
|
||||||
|
oOoOoOoOoOoOoOoOo<br />
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOo<br />
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
|
||||||
|
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
|
||||||
|
|
||||||
|
--b1_e376dc71bafc953c0b0fdeb9983a9956--
|
||||||
|
"#.as_bytes();
|
||||||
|
|
||||||
|
parser_bodystruct(fullmail, |part| {
|
||||||
|
assert_eq!(part, &part::PartNode::Composite(
|
||||||
|
part::PartHeader {
|
||||||
|
..part::PartHeader::default()
|
||||||
|
},
|
||||||
|
vec![
|
||||||
|
part::PartNode::Discrete(
|
||||||
|
part::PartHeader {
|
||||||
|
..part::PartHeader::default()
|
||||||
|
},
|
||||||
|
r#"GZ
|
||||||
|
OoOoO
|
||||||
|
oOoOoOoOo
|
||||||
|
oOoOoOoOoOoOoOoOo
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOo
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
|
||||||
|
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO"#.as_bytes()
|
||||||
|
),
|
||||||
|
part::PartNode::Discrete(
|
||||||
|
part::PartHeader {
|
||||||
|
..part::PartHeader::default()
|
||||||
|
},
|
||||||
|
r#"<div style="text-align: center;"><strong>GZ</strong><br />
|
||||||
|
OoOoO<br />
|
||||||
|
oOoOoOoOo<br />
|
||||||
|
oOoOoOoOoOoOoOoOo<br />
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOo<br />
|
||||||
|
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
|
||||||
|
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />"#.as_bytes()
|
||||||
|
),
|
||||||
|
]));
|
||||||
|
});
|
||||||
|
}
|
|
@ -15,12 +15,6 @@ use crate::fragments::lazy;
|
||||||
use crate::fragments::whitespace::cfws;
|
use crate::fragments::whitespace::cfws;
|
||||||
use crate::fragments::quoted::quoted_string;
|
use crate::fragments::quoted::quoted_string;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct Version {
|
|
||||||
pub major: u32,
|
|
||||||
pub minor: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Type<'a> {
|
pub enum Type<'a> {
|
||||||
// Composite types
|
// Composite types
|
||||||
|
@ -278,20 +272,6 @@ impl<'a> From<&'a str> for TextSubtype<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pub fn version(input: &str) -> IResult<&str, Version> {
|
|
||||||
let (rest, (_, major, _, _, _, minor, _)) = tuple((
|
|
||||||
opt(cfws),
|
|
||||||
character::u32,
|
|
||||||
opt(cfws),
|
|
||||||
tag("."),
|
|
||||||
opt(cfws),
|
|
||||||
character::u32,
|
|
||||||
opt(cfws),
|
|
||||||
))(input)?;
|
|
||||||
Ok((rest, Version { major, minor }))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
|
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
|
||||||
let (rest, (pname, _, pvalue)) = tuple((
|
let (rest, (pname, _, pvalue)) = tuple((
|
||||||
token,
|
token,
|
||||||
|
|
28
src/mime/section.rs
Normal file
28
src/mime/section.rs
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Default)]
|
||||||
|
pub struct MIMESection<'a> {
|
||||||
|
pub content_type: Option<&'a Type<'a>>,
|
||||||
|
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
|
||||||
|
pub content_id: Option<&'a MessageId<'a>>,
|
||||||
|
pub content_description: Option<&'a Unstructured>,
|
||||||
|
pub optional: HashMap<&'a str, &'a Unstructured>,
|
||||||
|
pub unparsed: Vec<&'a str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
|
||||||
|
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
|
||||||
|
let mut section = MIMESection::default();
|
||||||
|
for field in iter {
|
||||||
|
match field {
|
||||||
|
MIMEField::ContentType(v) => section.content_type = Some(v),
|
||||||
|
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
|
||||||
|
MIMEField::ContentID(v) => section.content_id = Some(v),
|
||||||
|
MIMEField::ContentDescription(v) => section.content_description = Some(v),
|
||||||
|
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
|
||||||
|
MIMEField::Rescue(v) => section.unparsed.push(v),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
section
|
||||||
|
}
|
||||||
|
}
|
|
@ -140,6 +140,13 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
|
||||||
Ok((input, vec![]))
|
Ok((input, vec![]))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<>> {
|
||||||
|
map(
|
||||||
|
opt(alt((address_list, address_list_cfws))),
|
||||||
|
|v| v.unwrap_or(vec![]),
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
88
src/rfc5322/field.rs
Normal file
88
src/rfc5322/field.rs
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
use nom::{
|
||||||
|
IResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::rfc5322::address::{MailboxList, AddressList};
|
||||||
|
use crate::rfc5322::mailbox::MailboxRef;
|
||||||
|
use crate::rfc5322::identification::{MessageId, MessageIdList};
|
||||||
|
use crate::rfc5322::trace::ReceivedLog;
|
||||||
|
use crate::text::misc_token::{Unstructured, PhraseList};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum Field<'a> {
|
||||||
|
// 3.6.1. The Origination Date Field
|
||||||
|
Date(DateTime<'a>),
|
||||||
|
|
||||||
|
// 3.6.2. Originator Fields
|
||||||
|
From(MailboxList<'a>),
|
||||||
|
Sender(Mailbox<'a>),
|
||||||
|
ReplyTo(AddressList<'a>),
|
||||||
|
|
||||||
|
// 3.6.3. Destination Address Fields
|
||||||
|
To(AddressList<'a>),
|
||||||
|
Cc(AddressList<'a>),
|
||||||
|
Bcc(NullableAddressList<'a>),
|
||||||
|
|
||||||
|
// 3.6.4. Identification Fields
|
||||||
|
MessageID(Identifier<'a>),
|
||||||
|
InReplyTo(IdentifierList<'a>),
|
||||||
|
References(IdentifierList<'a>),
|
||||||
|
|
||||||
|
// 3.6.5. Informational Fields
|
||||||
|
Subject(Unstructured<'a>),
|
||||||
|
Comments(Unstructured<'a>),
|
||||||
|
Keywords(PhraseList<'a>),
|
||||||
|
|
||||||
|
// 3.6.6 Resent Fields (not implemented)
|
||||||
|
// 3.6.7 Trace Fields
|
||||||
|
Received(ReceivedLog<'a>),
|
||||||
|
ReturnPath(Option<AddrSpec<'a>>),
|
||||||
|
|
||||||
|
MIMEVersion(Version<'a>),
|
||||||
|
Optional(&'a [u8], Unstructured<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> {
|
||||||
|
let (name, rest) = field_name(input)?;
|
||||||
|
match name.to_lowercase().as_ref() {
|
||||||
|
"date" => datetime::section(rest).map(Field::Date),
|
||||||
|
"from" => mailbox_list(rest).map(Field::From),
|
||||||
|
"sender" => mailbox(rest).map(Field::Sender),
|
||||||
|
"reply-to" => address_list(rest).map(Field::ReplyTo),
|
||||||
|
|
||||||
|
"to" => address_list(rest).map(Field::To),
|
||||||
|
"cc" => address_list(rest).map(Field::Cc),
|
||||||
|
"bcc" => nullable_address_list(rest).map(Field::Bcc),
|
||||||
|
|
||||||
|
"message-id" => msg_id(rest).map(Field::MessageID),
|
||||||
|
"in-reply-to" => msg_list(rest).map(Field::InReplyTo),
|
||||||
|
"references" => msg_list(rest).map(Field::References),
|
||||||
|
|
||||||
|
"subject" => unstructured(rest).map(Field::Subject),
|
||||||
|
"comments" => unstructured(rest).map(Field::Comments),
|
||||||
|
"keywords" => phrase_list(rest).map(Field::Keywords),
|
||||||
|
|
||||||
|
"return-path" => return_path(rest).map(Field::ReturnPath),
|
||||||
|
"received" => received_log(rest).map(Field::ReceivedLog),
|
||||||
|
|
||||||
|
"mime-version" => version(rest).map(Field::MIMEVersion),
|
||||||
|
_ => unstructured(rest).map(|v| Field::Optional(name, v)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Optional field
|
||||||
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// field = field-name ":" unstructured CRLF
|
||||||
|
/// field-name = 1*ftext
|
||||||
|
/// ftext = %d33-57 / ; Printable US-ASCII
|
||||||
|
/// %d59-126 ; characters not including
|
||||||
|
/// ; ":".
|
||||||
|
/// ```
|
||||||
|
fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
terminated(
|
||||||
|
take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
|
||||||
|
tuple((space0, tag(b":"), space0)),
|
||||||
|
)(input)
|
||||||
|
}
|
|
@ -18,27 +18,6 @@ pub struct MessageId<'a> {
|
||||||
}
|
}
|
||||||
pub type MessageIdList<'a> = Vec<MessageId<'a>>;
|
pub type MessageIdList<'a> = Vec<MessageId<'a>>;
|
||||||
|
|
||||||
/*
|
|
||||||
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
|
|
||||||
type Error = IMFError<'a>;
|
|
||||||
|
|
||||||
fn try_from(id: &'a lazy::Identifier<'a>) -> Result<Self, Self::Error> {
|
|
||||||
msg_id(id.0)
|
|
||||||
.map(|(_, i)| i)
|
|
||||||
.map_err(|e| IMFError::MessageID(e))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> {
|
|
||||||
type Error = IMFError<'a>;
|
|
||||||
|
|
||||||
fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
|
|
||||||
many1(msg_id)(id.0)
|
|
||||||
.map(|(_, i)| i)
|
|
||||||
.map_err(|e| IMFError::MessageIDList(e))
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
/// Message identifier
|
/// Message identifier
|
||||||
///
|
///
|
||||||
/// ```abnf
|
/// ```abnf
|
||||||
|
@ -53,6 +32,10 @@ pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageId> {
|
||||||
Ok((input, MessageId { left, right }))
|
Ok((input, MessageId { left, right }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIdList> {
|
||||||
|
many1(msg_id)(input)
|
||||||
|
}
|
||||||
|
|
||||||
// @FIXME Missing obsolete
|
// @FIXME Missing obsolete
|
||||||
fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
dot_atom_text(input)
|
dot_atom_text(input)
|
||||||
|
|
|
@ -9,57 +9,63 @@ use crate::fragments::trace::ReceivedLog;
|
||||||
use chrono::{DateTime, FixedOffset};
|
use chrono::{DateTime, FixedOffset};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub struct Section<'a> {
|
pub struct Message<'a> {
|
||||||
// 3.6.1. The Origination Date Field
|
// 3.6.1. The Origination Date Field
|
||||||
pub date: Option<&'a DateTime<FixedOffset>>,
|
pub date: Option<DateTime<FixedOffset>>,
|
||||||
|
|
||||||
// 3.6.2. Originator Fields
|
// 3.6.2. Originator Fields
|
||||||
pub from: Vec<&'a MailboxRef>,
|
pub from: Vec<MailboxRef<'a>>,
|
||||||
pub sender: Option<&'a MailboxRef>,
|
pub sender: Option<MailboxRef<'a>>,
|
||||||
pub reply_to: Vec<&'a AddressRef>,
|
pub reply_to: Vec<AddressRef<'a>>,
|
||||||
|
|
||||||
// 3.6.3. Destination Address Fields
|
// 3.6.3. Destination Address Fields
|
||||||
pub to: Vec<&'a AddressRef>,
|
pub to: Vec<AddressRef<'a>>,
|
||||||
pub cc: Vec<&'a AddressRef>,
|
pub cc: Vec<AddressRef<'a>>,
|
||||||
pub bcc: Vec<&'a AddressRef>,
|
pub bcc: Vec<AddressRef<'a>>,
|
||||||
|
|
||||||
// 3.6.4. Identification Fields
|
// 3.6.4. Identification Fields
|
||||||
pub msg_id: Option<&'a MessageId<'a>>,
|
pub msg_id: Option<MessageId<'a>>,
|
||||||
pub in_reply_to: Vec<&'a MessageId<'a>>,
|
pub in_reply_to: Vec<MessageId<'a>>,
|
||||||
pub references: Vec<&'a MessageId<'a>>,
|
pub references: Vec<MessageId<'a>>,
|
||||||
|
|
||||||
// 3.6.5. Informational Fields
|
// 3.6.5. Informational Fields
|
||||||
pub subject: Option<&'a Unstructured>,
|
pub subject: Option<Unstructured<'a>>,
|
||||||
pub comments: Vec<&'a Unstructured>,
|
pub comments: Vec<Unstructured<'a>>,
|
||||||
pub keywords: Vec<&'a PhraseList>,
|
pub keywords: Vec<PhraseList<'a>>,
|
||||||
|
|
||||||
// 3.6.6 Not implemented
|
// 3.6.6 Not implemented
|
||||||
// 3.6.7 Trace Fields
|
// 3.6.7 Trace Fields
|
||||||
pub return_path: Vec<&'a MailboxRef>,
|
pub return_path: Vec<MailboxRef<'a>>,
|
||||||
pub received: Vec<&'a ReceivedLog<'a>>,
|
pub received: Vec<ReceivedLog<'a>>,
|
||||||
|
|
||||||
// 3.6.8. Optional Fields
|
// 3.6.8. Optional Fields
|
||||||
pub optional: HashMap<&'a str, &'a Unstructured>,
|
pub optional: HashMap<&'a [u8], Unstructured<'a>>,
|
||||||
|
|
||||||
// MIME
|
|
||||||
pub mime_version: Option<&'a Version>,
|
|
||||||
pub mime: MIMESection<'a>,
|
|
||||||
|
|
||||||
// Recovery
|
// Recovery
|
||||||
pub bad_fields: Vec<&'a lazy::Field<'a>>,
|
pub unparsed: Vec<&'a [u8]>,
|
||||||
pub unparsed: Vec<&'a str>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
impl<'a> FromIterator<&'a [u8]> for Message<'a> {
|
||||||
pub struct MIMESection<'a> {
|
fn from_iter<I: IntoIterator<Item = &'a [u8]>>(iter: I) -> Self {
|
||||||
pub content_type: Option<&'a Type<'a>>,
|
iter.fold(
|
||||||
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
|
Message::default(),
|
||||||
pub content_id: Option<&'a MessageId<'a>>,
|
|mut msg, field| {
|
||||||
pub content_description: Option<&'a Unstructured>,
|
match field_name(field) {
|
||||||
pub optional: HashMap<&'a str, &'a Unstructured>,
|
Ok((name, value)) => xx,
|
||||||
pub unparsed: Vec<&'a str>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match field {
|
||||||
|
|
||||||
|
}
|
||||||
|
msg
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//@FIXME min and max limits are not enforced,
|
//@FIXME min and max limits are not enforced,
|
||||||
// it may result in missing data or silently overriden data.
|
// it may result in missing data or silently overriden data.
|
||||||
impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
|
impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
|
||||||
|
@ -104,19 +110,3 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
|
|
||||||
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
|
|
||||||
let mut section = MIMESection::default();
|
|
||||||
for field in iter {
|
|
||||||
match field {
|
|
||||||
MIMEField::ContentType(v) => section.content_type = Some(v),
|
|
||||||
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
|
|
||||||
MIMEField::ContentID(v) => section.content_id = Some(v),
|
|
||||||
MIMEField::ContentDescription(v) => section.content_description = Some(v),
|
|
||||||
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
|
|
||||||
MIMEField::Rescue(v) => section.unparsed.push(v),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
section
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -4,3 +4,4 @@ pub mod datetime;
|
||||||
pub mod trace;
|
pub mod trace;
|
||||||
pub mod identification;
|
pub mod identification;
|
||||||
pub mod mime;
|
pub mod mime;
|
||||||
|
pub mod field;
|
||||||
|
|
|
@ -35,7 +35,7 @@ impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
|
||||||
}
|
}
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
|
pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
|
||||||
map(
|
map(
|
||||||
tuple((
|
tuple((
|
||||||
many0(received_tokens),
|
many0(received_tokens),
|
||||||
|
@ -46,7 +46,7 @@ pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn return_path_body(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
|
pub fn return_path(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
|
||||||
alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
|
alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,9 @@ use crate::text::{
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub struct PhraseList(pub Vec<String>);
|
pub struct PhraseList(pub Vec<String>);
|
||||||
|
pub fn phrase_list(input: &'a [u8]) -> IResult<&[u8], PhraseList> {
|
||||||
|
separated_list1(tag(","), phrase)(input)
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
|
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
|
||||||
|
|
Loading…
Reference in a new issue