wip, still broken

2023-07-19 22:27:59 +02:00 · 2023-07-19 22:27:59 +02:00 · cd5289c8c1
commit cd5289c8c1
parent b3bec8656d
12 changed files with 640 additions and 103 deletions
--- a/README.md
+++ b/README.md
@ -29,19 +29,7 @@ Current known limitations/bugs:

 ## Design

-Multipass design: each pass is in charge of a specific work.
-*Having multiple pass does not necessarily lead to abyssmal performances.
-For example, the [Chez Scheme compiler](https://legacy.cs.indiana.edu/~dyb/pubs/commercial-nanopass.pdf) 
-pioneered the "Nanopass" concept and showcases excellent performances.*
-
-Currently, you can use the following passes:
- - `segment.rs` - Extract the header section by finding the `CRLFCRLF` token.
- - `guess_charset.rs` - Find the header section encoding (should be ASCII or UTF8 but some corpus contains ISO-8859-1 headers)
- - `extract_fields.rs` - Extract the headers line by lines, taking into account Foldable White Space.
- - `field_lazy.rs` - Try to recognize the header fields (`From`, `To`, `Date`, etc.) but do not parse their value.  
- - `field_eager.rs` - Parse the value of each known header fields.  
- - `header_section.rs` - Aggregate the various fields in a single structure.  
-
+*Todo*

 ## Testing strategy

--- a/ignore.test/enron.rs
+++ b/ignore.test/enron.rs
@ -0,0 +1,129 @@
+use imf_codec::fragments::section;
+use imf_codec::multipass;
+use std::collections::HashSet;
+use std::fs::File;
+use std::io::Read;
+use std::path::PathBuf;
+use walkdir::WalkDir;
+
+fn parser<'a, F>(input: &'a [u8], func: F) -> ()
+where
+    F: FnOnce(&section::Section) -> (),
+{
+    let seg = multipass::segment::new(input).unwrap();
+    let charset = seg.charset();
+    let fields = charset.fields().unwrap();
+    let field_names = fields.names();
+    let field_body = field_names.body();
+    let section = field_body.section();
+
+    func(&section.fields);
+}
+
+#[test]
+#[ignore]
+fn test_enron500k() {
+    let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    d.push("resources/enron/maildir/");
+    let prefix_sz = d.as_path().to_str().unwrap().len();
+    //d.push("williams-w3/");
+
+    let known_bad_fields = HashSet::from([
+        "white-s/calendar/113.",       // To: east <7..>
+        "skilling-j/inbox/223.",       // From: pep <performance.>
+        "jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
+        "jones-t/notes_inbox/3303.",   // To: <"tibor.vizkelety":@enron.com>
+        "lokey-t/calendar/33.",        // A second Date entry for the calendar containing
+        // Date:       Monday, March 12
+        "zipper-a/inbox/199.",                       // To: e-mail <mari.>
+        "dasovich-j/deleted_items/128.",             // To: f62489 <g>
+        "dasovich-j/all_documents/677.",             // To: w/assts <govt.>
+        "dasovich-j/all_documents/8984.",            // To: <"ft.com.users":@enron.com>
+        "dasovich-j/all_documents/3514.",            // To: <"ft.com.users":@enron.com>
+        "dasovich-j/all_documents/4467.",            // To: <"ft.com.users":@enron.com>
+        "dasovich-j/all_documents/578.",             // To: w/assts <govt.>
+        "dasovich-j/all_documents/3148.",            // To: <"economist.com.readers":@enron.com>
+        "dasovich-j/all_documents/9953.",            // To: <"economist.com.reader":@enron.com>
+        "dasovich-j/risk_analytics/3.",              // To: w/assts <govt.>
+        "dasovich-j/notes_inbox/5391.",              // To: <"ft.com.users":@enron.com>
+        "dasovich-j/notes_inbox/4952.",              // To: <"economist.com.reader":@enron.com>
+        "dasovich-j/notes_inbox/2386.",              // To: <"ft.com.users":@enron.com>
+        "dasovich-j/notes_inbox/1706.",              // To: <"ft.com.users":@enron.com>
+        "dasovich-j/notes_inbox/1489.",              // To: <"economist.com.readers":@enron.com>
+        "dasovich-j/notes_inbox/5.",                 // To: w/assts <govt.>
+        "kaminski-v/sites/19.",                      // To: <"the.desk":@enron.com>
+        "kaminski-v/sites/1.",                       // To: <"the.desk":@enron.com>
+        "kaminski-v/discussion_threads/5082.",       // To: <"ft.com.users":@enron.com>
+        "kaminski-v/discussion_threads/4046.",       // To: <"the.desk":@enron.com>
+        "kaminski-v/discussion_threads/4187.",       // To: <"the.desk":@enron.com>
+        "kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
+        "kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
+        "kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
+        "kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
+        "kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
+        "kaminski-v/all_documents/92.",   // To: <"the.desk":@enron.com>
+        "kaminski-v/all_documents/276.",  // To: <"the.desk":@enron.com>
+        "kaminski-v/technical/1.",        // To: <"the.desk":@enron.com>
+        "kaminski-v/technical/7.",        // To: <"the.desk":@enron.com>
+        "kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
+        "kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
+        "kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
+        "kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
+        "kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
+        "kean-s/reg_risk/9.", // To: w/assts <govt.>
+        "kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
+        "kean-s/discussion_threads/577.", // To: w/assts <govt.>
+        "kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
+        "kean-s/calendar/untitled/640.",  // To: w/assts <govt.>
+        "kean-s/all_documents/640.",      // To: w/assts <govt.>
+        "kean-s/all_documents/1095.",     // To: w/assts <govt.>
+        "kean-s/attachments/2030.",       // To: w/assts <govt.>
+        "williams-w3/operations_committee_isas/10.", // To: z34655 <m>
+    ]);
+
+    let known_bad_from = HashSet::from([
+        "skilling-j/inbox/223.", // From: pep <performance.>
+    ]);
+
+    let mut i = 0;
+    for entry in WalkDir::new(d.as_path())
+        .into_iter()
+        .filter_map(|file| file.ok())
+    {
+        if entry.metadata().unwrap().is_file() {
+            let mail_path = entry.path();
+            let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
+
+            // read file
+            let mut raw = Vec::new();
+            let mut f = File::open(mail_path).unwrap();
+            f.read_to_end(&mut raw).unwrap();
+
+            // parse
+            parser(&raw, |hdrs| {
+                let ok_date = hdrs.date.is_some();
+                let ok_from = hdrs.from.len() > 0;
+                let ok_fields = hdrs.bad_fields.len() == 0;
+
+                if !ok_date || !ok_from || !ok_fields {
+                    println!("Issue with: {}", suffix);
+                }
+
+                assert!(ok_date);
+
+                if !known_bad_from.contains(suffix) {
+                    assert!(ok_from);
+                }
+
+                if !known_bad_fields.contains(suffix) {
+                    assert!(ok_fields);
+                }
+
+                i += 1;
+                if i % 1000 == 0 {
+                    println!("Analyzed emails: {}", i);
+                }
+            })
+        }
+    }
+}
--- a/ignore.test/known.rs
+++ b/ignore.test/known.rs
@ -0,0 +1,340 @@
+use chrono::{FixedOffset, TimeZone};
+use imf_codec::fragments::{misc_token, model, section, part, trace};
+use imf_codec::multipass;
+use std::collections::HashMap;
+
+fn parser<'a, F>(input: &'a [u8], func: F) -> ()
+where
+    F: FnOnce(&section::Section) -> (),
+{
+    let seg = multipass::segment::new(input).unwrap();
+    let charset = seg.charset();
+    let fields = charset.fields().unwrap();
+    let field_names = fields.names();
+    let field_body = field_names.body();
+    let section = field_body.section();
+
+    func(&section.fields);
+}
+
+#[test]
+fn test_headers() {
+    let fullmail: &[u8] = r#"Return-Path: <gitlab@example.com>
+Delivered-To: quentin@example.com
+Received: from smtp.example.com ([10.83.2.2])
+	by doradille with LMTP
+	id xyzabcd
+	(envelope-from <gitlab@example.com>)
+	for <quentin@example.com>; Tue, 13 Jun 2023 19:01:08 +0000
+Date: Tue, 13 Jun 2023 10:01:10 +0200
+From: Mary Smith
+ <mary@example.net>, "A\lan" <alan@example>
+Sender: imf@example.com
+Reply-To: "Mary Smith: Personal Account" <smith@home.example>
+To: John Doe <jdoe@machine.example>
+Cc: imf2@example.com
+Bcc: (hidden)
+Subject: Re: Saying Hello
+Comments: A simple message
+Comments: Not that complicated
+comments : not valid header name but should be accepted
+    by the parser.
+Keywords: hello, world
+Héron: Raté
+ Raté raté
+Keywords: salut, le, monde
+Not a real header but should still recover
+Message-ID: <3456@example.net>
+In-Reply-To: <1234@local.machine.example>
+References: <1234@local.machine.example>
+Unknown: unknown
+
+This is a reply to your hello.
+"#
+    .as_bytes();
+    parser(fullmail, |parsed_section| {
+        assert_eq!(
+            parsed_section,
+            &section::Section {
+                date: Some(
+                    &FixedOffset::east_opt(2 * 3600)
+                        .unwrap()
+                        .with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
+                        .unwrap()
+                ),
+
+                from: vec![
+                    &model::MailboxRef {
+                        name: Some("Mary Smith".into()),
+                        addrspec: model::AddrSpec {
+                            local_part: "mary".into(),
+                            domain: "example.net".into(),
+                        }
+                    },
+                    &model::MailboxRef {
+                        name: Some("Alan".into()),
+                        addrspec: model::AddrSpec {
+                            local_part: "alan".into(),
+                            domain: "example".into(),
+                        }
+                    }
+                ],
+
+                sender: Some(&model::MailboxRef {
+                    name: None,
+                    addrspec: model::AddrSpec {
+                        local_part: "imf".into(),
+                        domain: "example.com".into(),
+                    }
+                }),
+
+                reply_to: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: Some("Mary Smith: Personal Account".into()),
+                    addrspec: model::AddrSpec {
+                        local_part: "smith".into(),
+                        domain: "home.example".into(),
+                    }
+                })],
+
+                to: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: Some("John Doe".into()),
+                    addrspec: model::AddrSpec {
+                        local_part: "jdoe".into(),
+                        domain: "machine.example".into(),
+                    }
+                })],
+
+                cc: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: None,
+                    addrspec: model::AddrSpec {
+                        local_part: "imf2".into(),
+                        domain: "example.com".into(),
+                    }
+                })],
+
+                bcc: vec![],
+
+                msg_id: Some(&model::MessageId {
+                    left: "3456",
+                    right: "example.net"
+                }),
+                in_reply_to: vec![&model::MessageId {
+                    left: "1234",
+                    right: "local.machine.example"
+                }],
+                references: vec![&model::MessageId {
+                    left: "1234",
+                    right: "local.machine.example"
+                }],
+
+                subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
+
+                comments: vec![
+                    &misc_token::Unstructured("A simple message".into()),
+                    &misc_token::Unstructured("Not that complicated".into()),
+                    &misc_token::Unstructured(
+                        "not valid header name but should be accepted by the parser.".into()
+                    ),
+                ],
+
+                keywords: vec![
+                    &misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
+                    &misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
+                ],
+
+                received: vec![&trace::ReceivedLog(
+                    r#"from smtp.example.com ([10.83.2.2])
+	by doradille with LMTP
+	id xyzabcd
+	(envelope-from <gitlab@example.com>)
+	for <quentin@example.com>"#
+                )],
+
+                return_path: vec![&model::MailboxRef {
+                    name: None,
+                    addrspec: model::AddrSpec {
+                        local_part: "gitlab".into(),
+                        domain: "example.com".into(),
+                    }
+                }],
+
+                optional: HashMap::from([
+                    (
+                        "Delivered-To",
+                        &misc_token::Unstructured("quentin@example.com".into())
+                    ),
+                    ("Unknown", &misc_token::Unstructured("unknown".into())),
+                ]),
+
+                bad_fields: vec![],
+
+                unparsed: vec![
+                    "Héron: Raté\n Raté raté\n",
+                    "Not a real header but should still recover\n",
+                ],
+                ..section::Section::default()
+            }
+        )
+    })
+}
+
+#[test]
+fn test_headers_mime() {
+    use imf_codec::fragments::mime;
+    let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
+To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
+CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
+Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
+    =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: quoted-printable
+Content-ID: <a@example.com>
+Content-Description: hello
+
+Now's the time =
+for all folk to come=
+ to the aid of their country.
+"#
+    .as_bytes();
+
+   parser(fullmail, |parsed_section| {
+        assert_eq!(
+            parsed_section,
+            &section::Section {
+                from: vec![
+                    &model::MailboxRef {
+                        name: Some("Keith Moore".into()),
+                        addrspec: model::AddrSpec {
+                            local_part: "moore".into(),
+                            domain: "cs.utk.edu".into(),
+                        }
+                    },
+                ],
+
+                to: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: Some("Keld Jørn Simonsen".into()),
+                    addrspec: model::AddrSpec {
+                        local_part: "keld".into(),
+                        domain: "dkuug.dk".into(),
+                    }
+                })],
+
+                cc: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: Some("André Pirard".into()),
+                    addrspec: model::AddrSpec {
+                        local_part: "PIRARD".into(),
+                        domain: "vm1.ulg.ac.be".into(),
+                    }
+                })],
+
+                subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
+                mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
+                mime: section::MIMESection {
+                    content_type: Some(&mime::Type::Text(mime::TextDesc { 
+                        charset: Some(mime::EmailCharset::ISO_8859_1), 
+                        subtype: mime::TextSubtype::Plain, 
+                        unknown_parameters: vec![]
+                    })),
+                    content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
+                    content_id: Some(&model::MessageId {
+                        left: "a",
+                        right: "example.com"
+                    }),
+                    content_description: Some(&misc_token::Unstructured("hello".into())),
+                    ..section::MIMESection::default()
+                },
+                ..section::Section::default()
+            }
+        );
+   })
+}
+
+fn parser_bodystruct<'a, F>(input: &'a [u8], func: F) -> ()
+where
+    F: FnOnce(&part::PartNode) -> (),
+{
+    let seg = multipass::segment::new(input).unwrap();
+    let charset = seg.charset();
+    let fields = charset.fields().unwrap();
+    let field_names = fields.names();
+    let field_body = field_names.body();
+    let section = field_body.section();
+    let bodystruct = section.body_structure();
+
+    func(&bodystruct.body);
+}
+
+#[test]
+fn test_multipart() {
+    let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
+From: Grrrnd Zero <grrrndzero@example.org>
+To: John Doe <jdoe@machine.example>
+Subject: Re: Saying Hello
+Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
+MIME-Version: 1.0
+Content-Type: multipart/alternative;
+ boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
+Content-Transfer-Encoding: 7bit
+
+This is a multi-part message in MIME format.
+
+--b1_e376dc71bafc953c0b0fdeb9983a9956
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+
+GZ
+OoOoO
+oOoOoOoOo
+oOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
+OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
+
+--b1_e376dc71bafc953c0b0fdeb9983a9956
+Content-Type: text/html; charset=us-ascii
+
+<div style="text-align: center;"><strong>GZ</strong><br />
+OoOoO<br />
+oOoOoOoOo<br />
+oOoOoOoOoOoOoOoOo<br />
+oOoOoOoOoOoOoOoOoOoOoOo<br />
+oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
+OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
+
+--b1_e376dc71bafc953c0b0fdeb9983a9956--
+"#.as_bytes();
+    
+    parser_bodystruct(fullmail, |part| {
+        assert_eq!(part, &part::PartNode::Composite(
+            part::PartHeader {
+                ..part::PartHeader::default()
+            },
+            vec![
+                part::PartNode::Discrete(
+                    part::PartHeader {
+                        ..part::PartHeader::default()
+                    },
+                    r#"GZ
+OoOoO
+oOoOoOoOo
+oOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
+OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO"#.as_bytes()
+                ),
+                part::PartNode::Discrete(
+                    part::PartHeader {
+                        ..part::PartHeader::default()
+                    },
+                    r#"<div style="text-align: center;"><strong>GZ</strong><br />
+OoOoO<br />
+oOoOoOoOo<br />
+oOoOoOoOoOoOoOoOo<br />
+oOoOoOoOoOoOoOoOoOoOoOo<br />
+oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
+OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />"#.as_bytes()
+                ),
+            ]));
+    });
+}
--- a/src/mime/content_fields.rs
+++ b/src/mime/content_fields.rs
@ -15,12 +15,6 @@ use crate::fragments::lazy;
 use crate::fragments::whitespace::cfws;
 use crate::fragments::quoted::quoted_string;

-#[derive(Debug, PartialEq)]
-pub struct Version {
-    pub major: u32,
-    pub minor: u32,
-}
-
 #[derive(Debug, PartialEq)]
 pub enum Type<'a> {
    // Composite types
@ -278,20 +272,6 @@ impl<'a> From<&'a str> for TextSubtype<'a> {
    }
 }

-
-pub fn version(input: &str) -> IResult<&str, Version> {
-    let (rest, (_, major, _, _, _, minor, _)) = tuple((
-        opt(cfws),
-        character::u32,
-        opt(cfws),
-        tag("."),
-        opt(cfws),
-        character::u32,
-        opt(cfws),
-    ))(input)?;
-    Ok((rest, Version { major, minor }))
-}
-
 pub fn parameter(input: &str) -> IResult<&str, Parameter> {
    let (rest, (pname, _, pvalue)) = tuple((
            token, 
--- a/src/mime/section.rs
+++ b/src/mime/section.rs
@ -0,0 +1,28 @@
+
+#[derive(Debug, PartialEq, Default)]
+pub struct MIMESection<'a> {
+    pub content_type: Option<&'a Type<'a>>,
+    pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
+    pub content_id: Option<&'a MessageId<'a>>,
+    pub content_description: Option<&'a Unstructured>,
+    pub optional: HashMap<&'a str, &'a Unstructured>,
+    pub unparsed: Vec<&'a str>,
+}
+
+
+impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
+    fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
+        let mut section = MIMESection::default();
+        for field in iter {
+            match field {
+                MIMEField::ContentType(v) => section.content_type = Some(v),
+                MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
+                MIMEField::ContentID(v) => section.content_id = Some(v),
+                MIMEField::ContentDescription(v) => section.content_description = Some(v),
+                MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
+                MIMEField::Rescue(v) => section.unparsed.push(v),
+            };
+        }
+        section
+    }
+}
--- a/src/rfc5322/address.rs
+++ b/src/rfc5322/address.rs
@ -140,6 +140,13 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
    Ok((input, vec![]))
 }

+pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<>> {
+    map(
+        opt(alt((address_list, address_list_cfws))), 
+        |v| v.unwrap_or(vec![]),
+    )(input)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/rfc5322/field.rs
+++ b/src/rfc5322/field.rs
@ -0,0 +1,88 @@
+use nom::{
+    IResult,
+};
+
+use crate::rfc5322::address::{MailboxList, AddressList};
+use crate::rfc5322::mailbox::MailboxRef;
+use crate::rfc5322::identification::{MessageId, MessageIdList};
+use crate::rfc5322::trace::ReceivedLog;
+use crate::text::misc_token::{Unstructured, PhraseList};
+
+#[derive(Debug, PartialEq)]
+pub enum Field<'a> {
+    // 3.6.1.  The Origination Date Field
+    Date(DateTime<'a>),
+
+    // 3.6.2.  Originator Fields
+    From(MailboxList<'a>),
+    Sender(Mailbox<'a>),
+    ReplyTo(AddressList<'a>),
+
+    // 3.6.3.  Destination Address Fields
+    To(AddressList<'a>),
+    Cc(AddressList<'a>),
+    Bcc(NullableAddressList<'a>),
+
+    // 3.6.4.  Identification Fields
+    MessageID(Identifier<'a>),
+    InReplyTo(IdentifierList<'a>),
+    References(IdentifierList<'a>),
+
+    // 3.6.5.  Informational Fields
+    Subject(Unstructured<'a>),
+    Comments(Unstructured<'a>),
+    Keywords(PhraseList<'a>),
+
+    // 3.6.6   Resent Fields (not implemented)
+    // 3.6.7   Trace Fields
+    Received(ReceivedLog<'a>),
+    ReturnPath(Option<AddrSpec<'a>>),
+
+    MIMEVersion(Version<'a>),
+    Optional(&'a [u8], Unstructured<'a>),
+}
+
+pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> {
+    let (name, rest) = field_name(input)?;
+    match name.to_lowercase().as_ref() {
+        "date" => datetime::section(rest).map(Field::Date),
+        "from" => mailbox_list(rest).map(Field::From),
+        "sender" => mailbox(rest).map(Field::Sender),
+        "reply-to" => address_list(rest).map(Field::ReplyTo),
+
+        "to" => address_list(rest).map(Field::To),
+        "cc" => address_list(rest).map(Field::Cc),
+        "bcc" => nullable_address_list(rest).map(Field::Bcc), 
+
+        "message-id" => msg_id(rest).map(Field::MessageID),
+        "in-reply-to" => msg_list(rest).map(Field::InReplyTo),
+        "references" => msg_list(rest).map(Field::References),
+
+        "subject" => unstructured(rest).map(Field::Subject),
+        "comments" => unstructured(rest).map(Field::Comments),
+        "keywords" => phrase_list(rest).map(Field::Keywords),
+
+        "return-path" => return_path(rest).map(Field::ReturnPath), 
+        "received" => received_log(rest).map(Field::ReceivedLog), 
+
+        "mime-version" => version(rest).map(Field::MIMEVersion), 
+         _ => unstructured(rest).map(|v| Field::Optional(name, v)),
+    }
+}
+
+
+/// Optional field
+///
+/// ```abnf
+/// field      =   field-name ":" unstructured CRLF
+/// field-name =   1*ftext
+/// ftext      =   %d33-57 /          ; Printable US-ASCII
+///                %d59-126           ;  characters not including
+///                                   ;  ":".
+/// ```
+fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    terminated(
+        take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
+        tuple((space0, tag(b":"), space0)),
+    )(input)
+}
--- a/src/rfc5322/identification.rs
+++ b/src/rfc5322/identification.rs
@ -18,27 +18,6 @@ pub struct MessageId<'a> {
 }
 pub type MessageIdList<'a> = Vec<MessageId<'a>>;

-/*
-impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
-    type Error = IMFError<'a>;
-
-    fn try_from(id: &'a lazy::Identifier<'a>) -> Result<Self, Self::Error> {
-        msg_id(id.0)
-            .map(|(_, i)| i)
-            .map_err(|e| IMFError::MessageID(e))
-    }
-}
-
-impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> {
-    type Error = IMFError<'a>;
-
-    fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
-        many1(msg_id)(id.0)
-            .map(|(_, i)| i)
-            .map_err(|e| IMFError::MessageIDList(e))
-    }
-}*/
-
 /// Message identifier
 ///
 /// ```abnf
@ -53,6 +32,10 @@ pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageId> {
    Ok((input, MessageId { left, right }))
 }

+pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIdList> {
+    many1(msg_id)(input)
+}
+
 // @FIXME Missing obsolete
 fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> {
    dot_atom_text(input)
--- a/src/rfc5322/message.rs
+++ b/src/rfc5322/message.rs
@ -9,57 +9,63 @@ use crate::fragments::trace::ReceivedLog;
 use chrono::{DateTime, FixedOffset};

 #[derive(Debug, PartialEq, Default)]
-pub struct Section<'a> {
+pub struct Message<'a> {
    // 3.6.1.  The Origination Date Field
-    pub date: Option<&'a DateTime<FixedOffset>>,
+    pub date: Option<DateTime<FixedOffset>>,

    // 3.6.2.  Originator Fields
-    pub from: Vec<&'a MailboxRef>,
-    pub sender: Option<&'a MailboxRef>,
-    pub reply_to: Vec<&'a AddressRef>,
+    pub from: Vec<MailboxRef<'a>>,
+    pub sender: Option<MailboxRef<'a>>,
+    pub reply_to: Vec<AddressRef<'a>>,

    // 3.6.3.  Destination Address Fields
-    pub to: Vec<&'a AddressRef>,
-    pub cc: Vec<&'a AddressRef>,
-    pub bcc: Vec<&'a AddressRef>,
+    pub to: Vec<AddressRef<'a>>,
+    pub cc: Vec<AddressRef<'a>>,
+    pub bcc: Vec<AddressRef<'a>>,

    // 3.6.4.  Identification Fields
-    pub msg_id: Option<&'a MessageId<'a>>,
-    pub in_reply_to: Vec<&'a MessageId<'a>>,
-    pub references: Vec<&'a MessageId<'a>>,
+    pub msg_id: Option<MessageId<'a>>,
+    pub in_reply_to: Vec<MessageId<'a>>,
+    pub references: Vec<MessageId<'a>>,

    // 3.6.5.  Informational Fields
-    pub subject: Option<&'a Unstructured>,
-    pub comments: Vec<&'a Unstructured>,
-    pub keywords: Vec<&'a PhraseList>,
+    pub subject: Option<Unstructured<'a>>,
+    pub comments: Vec<Unstructured<'a>>,
+    pub keywords: Vec<PhraseList<'a>>,

    // 3.6.6 Not implemented
    // 3.6.7 Trace Fields
-    pub return_path: Vec<&'a MailboxRef>,
-    pub received: Vec<&'a ReceivedLog<'a>>,
+    pub return_path: Vec<MailboxRef<'a>>,
+    pub received: Vec<ReceivedLog<'a>>,

    // 3.6.8.  Optional Fields
-    pub optional: HashMap<&'a str, &'a Unstructured>,
-
-    // MIME
-    pub mime_version: Option<&'a Version>,
-    pub mime: MIMESection<'a>,
+    pub optional: HashMap<&'a [u8], Unstructured<'a>>,

    // Recovery
-    pub bad_fields: Vec<&'a lazy::Field<'a>>,
-    pub unparsed: Vec<&'a str>,
+    pub unparsed: Vec<&'a [u8]>,
 }

-#[derive(Debug, PartialEq, Default)]
-pub struct MIMESection<'a> {
-    pub content_type: Option<&'a Type<'a>>,
-    pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
-    pub content_id: Option<&'a MessageId<'a>>,
-    pub content_description: Option<&'a Unstructured>,
-    pub optional: HashMap<&'a str, &'a Unstructured>,
-    pub unparsed: Vec<&'a str>,
+impl<'a> FromIterator<&'a [u8]> for Message<'a> {
+    fn from_iter<I: IntoIterator<Item = &'a [u8]>>(iter: I) -> Self {
+        iter.fold(
+            Message::default(),
+            |mut msg, field| {
+                match field_name(field) {
+                    Ok((name, value)) => xx,
+
+                }
+
+                match field {
+
+                }
+                msg
+            }
+        )
+    }
 }

+
+
 //@FIXME min and max limits are not enforced,
 // it may result in missing data or silently overriden data.
 impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
@ -104,19 +110,3 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
    }
 }

-impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
-    fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
-        let mut section = MIMESection::default();
-        for field in iter {
-            match field {
-                MIMEField::ContentType(v) => section.content_type = Some(v),
-                MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
-                MIMEField::ContentID(v) => section.content_id = Some(v),
-                MIMEField::ContentDescription(v) => section.content_description = Some(v),
-                MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
-                MIMEField::Rescue(v) => section.unparsed.push(v),
-            };
-        }
-        section
-    }
-}
--- a/src/rfc5322/mod.rs
+++ b/src/rfc5322/mod.rs
@ -4,3 +4,4 @@ pub mod datetime;
 pub mod trace;
 pub mod identification;
 pub mod mime;
+pub mod field;
--- a/src/rfc5322/trace.rs
+++ b/src/rfc5322/trace.rs
@ -35,7 +35,7 @@ impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
    }
 }*/

-pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
+pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
    map(
        tuple((
            many0(received_tokens),
@ -46,7 +46,7 @@ pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
    )(input)
 }

-pub fn return_path_body(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
+pub fn return_path(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
    alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
 }

--- a/src/text/misc_token.rs
+++ b/src/text/misc_token.rs
@ -18,6 +18,9 @@ use crate::text::{

 #[derive(Debug, PartialEq, Default)]
 pub struct PhraseList(pub Vec<String>);
+pub fn phrase_list(input: &'a [u8]) -> IResult<&[u8], PhraseList> {
+    separated_list1(tag(","), phrase)(input)
+}

 /*
 impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {