wip, still broken

2023-07-19 22:27:59 +02:00 · 2023-07-19 22:27:59 +02:00 · cd5289c8c1
commit cd5289c8c1
parent b3bec8656d
12 changed files with 640 additions and 103 deletions
--- a/README.md
+++ b/README.md
@ -29,19 +29,7 @@ Current known limitations/bugs:
 ## Design
-Multipass design: each pass is in charge of a specific work.
+*Todo*
 *Having multiple pass does not necessarily lead to abyssmal performances.
 For example, the [Chez Scheme compiler](https://legacy.cs.indiana.edu/~dyb/pubs/commercial-nanopass.pdf) 
 pioneered the "Nanopass" concept and showcases excellent performances.*
 Currently, you can use the following passes:
 - `segment.rs` - Extract the header section by finding the `CRLFCRLF` token.
 - `guess_charset.rs` - Find the header section encoding (should be ASCII or UTF8 but some corpus contains ISO-8859-1 headers)
 - `extract_fields.rs` - Extract the headers line by lines, taking into account Foldable White Space.
 - `field_lazy.rs` - Try to recognize the header fields (`From`, `To`, `Date`, etc.) but do not parse their value.  
 - `field_eager.rs` - Parse the value of each known header fields.  
 - `header_section.rs` - Aggregate the various fields in a single structure.  
 ## Testing strategy
--- a/ignore.test/enron.rs
+++ b/ignore.test/enron.rs
@ -0,0 +1,129 @@
 use imf_codec::fragments::section;
 use imf_codec::multipass;
 use std::collections::HashSet;
 use std::fs::File;
 use std::io::Read;
 use std::path::PathBuf;
 use walkdir::WalkDir;
 fn parser<'a, F>(input: &'a [u8], func: F) -> ()
 where
    F: FnOnce(&section::Section) -> (),
 {
    let seg = multipass::segment::new(input).unwrap();
    let charset = seg.charset();
    let fields = charset.fields().unwrap();
    let field_names = fields.names();
    let field_body = field_names.body();
    let section = field_body.section();
    func(&section.fields);
 }
 #[test]
 #[ignore]
 fn test_enron500k() {
    let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
    d.push("resources/enron/maildir/");
    let prefix_sz = d.as_path().to_str().unwrap().len();
    //d.push("williams-w3/");
    let known_bad_fields = HashSet::from([
        "white-s/calendar/113.",       // To: east <7..>
        "skilling-j/inbox/223.",       // From: pep <performance.>
        "jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
        "jones-t/notes_inbox/3303.",   // To: <"tibor.vizkelety":@enron.com>
        "lokey-t/calendar/33.",        // A second Date entry for the calendar containing
        // Date:       Monday, March 12
        "zipper-a/inbox/199.",                       // To: e-mail <mari.>
        "dasovich-j/deleted_items/128.",             // To: f62489 <g>
        "dasovich-j/all_documents/677.",             // To: w/assts <govt.>
        "dasovich-j/all_documents/8984.",            // To: <"ft.com.users":@enron.com>
        "dasovich-j/all_documents/3514.",            // To: <"ft.com.users":@enron.com>
        "dasovich-j/all_documents/4467.",            // To: <"ft.com.users":@enron.com>
        "dasovich-j/all_documents/578.",             // To: w/assts <govt.>
        "dasovich-j/all_documents/3148.",            // To: <"economist.com.readers":@enron.com>
        "dasovich-j/all_documents/9953.",            // To: <"economist.com.reader":@enron.com>
        "dasovich-j/risk_analytics/3.",              // To: w/assts <govt.>
        "dasovich-j/notes_inbox/5391.",              // To: <"ft.com.users":@enron.com>
        "dasovich-j/notes_inbox/4952.",              // To: <"economist.com.reader":@enron.com>
        "dasovich-j/notes_inbox/2386.",              // To: <"ft.com.users":@enron.com>
        "dasovich-j/notes_inbox/1706.",              // To: <"ft.com.users":@enron.com>
        "dasovich-j/notes_inbox/1489.",              // To: <"economist.com.readers":@enron.com>
        "dasovich-j/notes_inbox/5.",                 // To: w/assts <govt.>
        "kaminski-v/sites/19.",                      // To: <"the.desk":@enron.com>
        "kaminski-v/sites/1.",                       // To: <"the.desk":@enron.com>
        "kaminski-v/discussion_threads/5082.",       // To: <"ft.com.users":@enron.com>
        "kaminski-v/discussion_threads/4046.",       // To: <"the.desk":@enron.com>
        "kaminski-v/discussion_threads/4187.",       // To: <"the.desk":@enron.com>
        "kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
        "kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
        "kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
        "kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
        "kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
        "kaminski-v/all_documents/92.",   // To: <"the.desk":@enron.com>
        "kaminski-v/all_documents/276.",  // To: <"the.desk":@enron.com>
        "kaminski-v/technical/1.",        // To: <"the.desk":@enron.com>
        "kaminski-v/technical/7.",        // To: <"the.desk":@enron.com>
        "kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
        "kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
        "kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
        "kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
        "kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
        "kean-s/reg_risk/9.", // To: w/assts <govt.>
        "kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
        "kean-s/discussion_threads/577.", // To: w/assts <govt.>
        "kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
        "kean-s/calendar/untitled/640.",  // To: w/assts <govt.>
        "kean-s/all_documents/640.",      // To: w/assts <govt.>
        "kean-s/all_documents/1095.",     // To: w/assts <govt.>
        "kean-s/attachments/2030.",       // To: w/assts <govt.>
        "williams-w3/operations_committee_isas/10.", // To: z34655 <m>
    ]);
    let known_bad_from = HashSet::from([
        "skilling-j/inbox/223.", // From: pep <performance.>
    ]);
    let mut i = 0;
    for entry in WalkDir::new(d.as_path())
        .into_iter()
        .filter_map(|file| file.ok())
    {
        if entry.metadata().unwrap().is_file() {
            let mail_path = entry.path();
            let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
            // read file
            let mut raw = Vec::new();
            let mut f = File::open(mail_path).unwrap();
            f.read_to_end(&mut raw).unwrap();
            // parse
            parser(&raw, |hdrs| {
                let ok_date = hdrs.date.is_some();
                let ok_from = hdrs.from.len() > 0;
                let ok_fields = hdrs.bad_fields.len() == 0;
                if !ok_date || !ok_from || !ok_fields {
                    println!("Issue with: {}", suffix);
                }
                assert!(ok_date);
                if !known_bad_from.contains(suffix) {
                    assert!(ok_from);
                }
                if !known_bad_fields.contains(suffix) {
                    assert!(ok_fields);
                }
                i += 1;
                if i % 1000 == 0 {
                    println!("Analyzed emails: {}", i);
                }
            })
        }
    }
 }
--- a/ignore.test/known.rs
+++ b/ignore.test/known.rs
@ -0,0 +1,340 @@
 use chrono::{FixedOffset, TimeZone};
 use imf_codec::fragments::{misc_token, model, section, part, trace};
 use imf_codec::multipass;
 use std::collections::HashMap;
 fn parser<'a, F>(input: &'a [u8], func: F) -> ()
 where
    F: FnOnce(&section::Section) -> (),
 {
    let seg = multipass::segment::new(input).unwrap();
    let charset = seg.charset();
    let fields = charset.fields().unwrap();
    let field_names = fields.names();
    let field_body = field_names.body();
    let section = field_body.section();
    func(&section.fields);
 }
 #[test]
 fn test_headers() {
    let fullmail: &[u8] = r#"Return-Path: <gitlab@example.com>
 Delivered-To: quentin@example.com
 Received: from smtp.example.com ([10.83.2.2])
 	by doradille with LMTP
 	id xyzabcd
 	(envelope-from <gitlab@example.com>)
 	for <quentin@example.com>; Tue, 13 Jun 2023 19:01:08 +0000
 Date: Tue, 13 Jun 2023 10:01:10 +0200
 From: Mary Smith
 <mary@example.net>, "A\lan" <alan@example>
 Sender: imf@example.com
 Reply-To: "Mary Smith: Personal Account" <smith@home.example>
 To: John Doe <jdoe@machine.example>
 Cc: imf2@example.com
 Bcc: (hidden)
 Subject: Re: Saying Hello
 Comments: A simple message
 Comments: Not that complicated
 comments : not valid header name but should be accepted
    by the parser.
 Keywords: hello, world
 Héron: Raté
 Raté raté
 Keywords: salut, le, monde
 Not a real header but should still recover
 Message-ID: <3456@example.net>
 In-Reply-To: <1234@local.machine.example>
 References: <1234@local.machine.example>
 Unknown: unknown
 This is a reply to your hello.
 "#
    .as_bytes();
    parser(fullmail, |parsed_section| {
        assert_eq!(
            parsed_section,
            &section::Section {
                date: Some(
                    &FixedOffset::east_opt(2 * 3600)
                        .unwrap()
                        .with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
                        .unwrap()
                ),
                from: vec![
                    &model::MailboxRef {
                        name: Some("Mary Smith".into()),
                        addrspec: model::AddrSpec {
                            local_part: "mary".into(),
                            domain: "example.net".into(),
                        }
                    },
                    &model::MailboxRef {
                        name: Some("Alan".into()),
                        addrspec: model::AddrSpec {
                            local_part: "alan".into(),
                            domain: "example".into(),
                        }
                    }
                ],
                sender: Some(&model::MailboxRef {
                    name: None,
                    addrspec: model::AddrSpec {
                        local_part: "imf".into(),
                        domain: "example.com".into(),
                    }
                }),
                reply_to: vec![&model::AddressRef::Single(model::MailboxRef {
                    name: Some("Mary Smith: Personal Account".into()),
                    addrspec: model::AddrSpec {
                        local_part: "smith".into(),
                        domain: "home.example".into(),
                    }
                })],
                to: vec![&model::AddressRef::Single(model::MailboxRef {
                    name: Some("John Doe".into()),
                    addrspec: model::AddrSpec {
                        local_part: "jdoe".into(),
                        domain: "machine.example".into(),
                    }
                })],
                cc: vec![&model::AddressRef::Single(model::MailboxRef {
                    name: None,
                    addrspec: model::AddrSpec {
                        local_part: "imf2".into(),
                        domain: "example.com".into(),
                    }
                })],
                bcc: vec![],
                msg_id: Some(&model::MessageId {
                    left: "3456",
                    right: "example.net"
                }),
                in_reply_to: vec![&model::MessageId {
                    left: "1234",
                    right: "local.machine.example"
                }],
                references: vec![&model::MessageId {
                    left: "1234",
                    right: "local.machine.example"
                }],
                subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
                comments: vec![
                    &misc_token::Unstructured("A simple message".into()),
                    &misc_token::Unstructured("Not that complicated".into()),
                    &misc_token::Unstructured(
                        "not valid header name but should be accepted by the parser.".into()
                    ),
                ],
                keywords: vec![
                    &misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
                    &misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
                ],
                received: vec![&trace::ReceivedLog(
                    r#"from smtp.example.com ([10.83.2.2])
 	by doradille with LMTP
 	id xyzabcd
 	(envelope-from <gitlab@example.com>)
 	for <quentin@example.com>"#
                )],
                return_path: vec![&model::MailboxRef {
                    name: None,
                    addrspec: model::AddrSpec {
                        local_part: "gitlab".into(),
                        domain: "example.com".into(),
                    }
                }],
                optional: HashMap::from([
                    (
                        "Delivered-To",
                        &misc_token::Unstructured("quentin@example.com".into())
                    ),
                    ("Unknown", &misc_token::Unstructured("unknown".into())),
                ]),
                bad_fields: vec![],
                unparsed: vec![
                    "Héron: Raté\n Raté raté\n",
                    "Not a real header but should still recover\n",
                ],
                ..section::Section::default()
            }
        )
    })
 }
 #[test]
 fn test_headers_mime() {
    use imf_codec::fragments::mime;
    let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
 To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
 CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
 Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
    =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
 MIME-Version: 1.0
 Content-Type: text/plain; charset=ISO-8859-1
 Content-Transfer-Encoding: quoted-printable
 Content-ID: <a@example.com>
 Content-Description: hello
 Now's the time =
 for all folk to come=
 to the aid of their country.
 "#
    .as_bytes();
   parser(fullmail, |parsed_section| {
        assert_eq!(
            parsed_section,
            &section::Section {
                from: vec![
                    &model::MailboxRef {
                        name: Some("Keith Moore".into()),
                        addrspec: model::AddrSpec {
                            local_part: "moore".into(),
                            domain: "cs.utk.edu".into(),
                        }
                    },
                ],
                to: vec![&model::AddressRef::Single(model::MailboxRef {
                    name: Some("Keld Jørn Simonsen".into()),
                    addrspec: model::AddrSpec {
                        local_part: "keld".into(),
                        domain: "dkuug.dk".into(),
                    }
                })],
                cc: vec![&model::AddressRef::Single(model::MailboxRef {
                    name: Some("André Pirard".into()),
                    addrspec: model::AddrSpec {
                        local_part: "PIRARD".into(),
                        domain: "vm1.ulg.ac.be".into(),
                    }
                })],
                subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
                mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
                mime: section::MIMESection {
                    content_type: Some(&mime::Type::Text(mime::TextDesc { 
                        charset: Some(mime::EmailCharset::ISO_8859_1), 
                        subtype: mime::TextSubtype::Plain, 
                        unknown_parameters: vec![]
                    })),
                    content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
                    content_id: Some(&model::MessageId {
                        left: "a",
                        right: "example.com"
                    }),
                    content_description: Some(&misc_token::Unstructured("hello".into())),
                    ..section::MIMESection::default()
                },
                ..section::Section::default()
            }
        );
   })
 }
 fn parser_bodystruct<'a, F>(input: &'a [u8], func: F) -> ()
 where
    F: FnOnce(&part::PartNode) -> (),
 {
    let seg = multipass::segment::new(input).unwrap();
    let charset = seg.charset();
    let fields = charset.fields().unwrap();
    let field_names = fields.names();
    let field_body = field_names.body();
    let section = field_body.section();
    let bodystruct = section.body_structure();
    func(&bodystruct.body);
 }
 #[test]
 fn test_multipart() {
    let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
 From: Grrrnd Zero <grrrndzero@example.org>
 To: John Doe <jdoe@machine.example>
 Subject: Re: Saying Hello
 Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
 MIME-Version: 1.0
 Content-Type: multipart/alternative;
 boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
 Content-Transfer-Encoding: 7bit
 This is a multi-part message in MIME format.
 --b1_e376dc71bafc953c0b0fdeb9983a9956
 Content-Type: text/plain; charset=utf-8
 Content-Transfer-Encoding: quoted-printable
 GZ
 OoOoO
 oOoOoOoOo
 oOoOoOoOoOoOoOoOo
 oOoOoOoOoOoOoOoOoOoOoOo
 oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
 OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
 --b1_e376dc71bafc953c0b0fdeb9983a9956
 Content-Type: text/html; charset=us-ascii
 <div style="text-align: center;"><strong>GZ</strong><br />
 OoOoO<br />
 oOoOoOoOo<br />
 oOoOoOoOoOoOoOoOo<br />
 oOoOoOoOoOoOoOoOoOoOoOo<br />
 oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
 OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
 --b1_e376dc71bafc953c0b0fdeb9983a9956--
 "#.as_bytes();
    parser_bodystruct(fullmail, |part| {
        assert_eq!(part, &part::PartNode::Composite(
            part::PartHeader {
                ..part::PartHeader::default()
            },
            vec![
                part::PartNode::Discrete(
                    part::PartHeader {
                        ..part::PartHeader::default()
                    },
                    r#"GZ
 OoOoO
 oOoOoOoOo
 oOoOoOoOoOoOoOoOo
 oOoOoOoOoOoOoOoOoOoOoOo
 oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
 OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO"#.as_bytes()
                ),
                part::PartNode::Discrete(
                    part::PartHeader {
                        ..part::PartHeader::default()
                    },
                    r#"<div style="text-align: center;"><strong>GZ</strong><br />
 OoOoO<br />
 oOoOoOoOo<br />
 oOoOoOoOoOoOoOoOo<br />
 oOoOoOoOoOoOoOoOoOoOoOo<br />
 oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
 OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />"#.as_bytes()
                ),
            ]));
    });
 }
--- a/src/mime/content_fields.rs
+++ b/src/mime/content_fields.rs
@ -15,12 +15,6 @@ use crate::fragments::lazy;
 use crate::fragments::whitespace::cfws;
 use crate::fragments::quoted::quoted_string;
 #[derive(Debug, PartialEq)]
 pub struct Version {
    pub major: u32,
    pub minor: u32,
 }
 #[derive(Debug, PartialEq)]
 pub enum Type<'a> {
    // Composite types
@ -278,20 +272,6 @@ impl<'a> From<&'a str> for TextSubtype<'a> {
    }
 }
 pub fn version(input: &str) -> IResult<&str, Version> {
    let (rest, (_, major, _, _, _, minor, _)) = tuple((
        opt(cfws),
        character::u32,
        opt(cfws),
        tag("."),
        opt(cfws),
        character::u32,
        opt(cfws),
    ))(input)?;
    Ok((rest, Version { major, minor }))
 }
 pub fn parameter(input: &str) -> IResult<&str, Parameter> {
    let (rest, (pname, _, pvalue)) = tuple((
            token, 
--- a/src/mime/section.rs
+++ b/src/mime/section.rs
@ -0,0 +1,28 @@
 #[derive(Debug, PartialEq, Default)]
 pub struct MIMESection<'a> {
    pub content_type: Option<&'a Type<'a>>,
    pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
    pub content_id: Option<&'a MessageId<'a>>,
    pub content_description: Option<&'a Unstructured>,
    pub optional: HashMap<&'a str, &'a Unstructured>,
    pub unparsed: Vec<&'a str>,
 }
 impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
    fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
        let mut section = MIMESection::default();
        for field in iter {
            match field {
                MIMEField::ContentType(v) => section.content_type = Some(v),
                MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
                MIMEField::ContentID(v) => section.content_id = Some(v),
                MIMEField::ContentDescription(v) => section.content_description = Some(v),
                MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
                MIMEField::Rescue(v) => section.unparsed.push(v),
            };
        }
        section
    }
 }
--- a/src/rfc5322/address.rs
+++ b/src/rfc5322/address.rs
@ -140,6 +140,13 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
    Ok((input, vec![]))
 }
 pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<>> {
    map(
        opt(alt((address_list, address_list_cfws))), 
        |v| v.unwrap_or(vec![]),
    )(input)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/rfc5322/field.rs
+++ b/src/rfc5322/field.rs
@ -0,0 +1,88 @@
 use nom::{
    IResult,
 };
 use crate::rfc5322::address::{MailboxList, AddressList};
 use crate::rfc5322::mailbox::MailboxRef;
 use crate::rfc5322::identification::{MessageId, MessageIdList};
 use crate::rfc5322::trace::ReceivedLog;
 use crate::text::misc_token::{Unstructured, PhraseList};
 #[derive(Debug, PartialEq)]
 pub enum Field<'a> {
    // 3.6.1.  The Origination Date Field
    Date(DateTime<'a>),
    // 3.6.2.  Originator Fields
    From(MailboxList<'a>),
    Sender(Mailbox<'a>),
    ReplyTo(AddressList<'a>),
    // 3.6.3.  Destination Address Fields
    To(AddressList<'a>),
    Cc(AddressList<'a>),
    Bcc(NullableAddressList<'a>),
    // 3.6.4.  Identification Fields
    MessageID(Identifier<'a>),
    InReplyTo(IdentifierList<'a>),
    References(IdentifierList<'a>),
    // 3.6.5.  Informational Fields
    Subject(Unstructured<'a>),
    Comments(Unstructured<'a>),
    Keywords(PhraseList<'a>),
    // 3.6.6   Resent Fields (not implemented)
    // 3.6.7   Trace Fields
    Received(ReceivedLog<'a>),
    ReturnPath(Option<AddrSpec<'a>>),
    MIMEVersion(Version<'a>),
    Optional(&'a [u8], Unstructured<'a>),
 }
 pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> {
    let (name, rest) = field_name(input)?;
    match name.to_lowercase().as_ref() {
        "date" => datetime::section(rest).map(Field::Date),
        "from" => mailbox_list(rest).map(Field::From),
        "sender" => mailbox(rest).map(Field::Sender),
        "reply-to" => address_list(rest).map(Field::ReplyTo),
        "to" => address_list(rest).map(Field::To),
        "cc" => address_list(rest).map(Field::Cc),
        "bcc" => nullable_address_list(rest).map(Field::Bcc), 
        "message-id" => msg_id(rest).map(Field::MessageID),
        "in-reply-to" => msg_list(rest).map(Field::InReplyTo),
        "references" => msg_list(rest).map(Field::References),
        "subject" => unstructured(rest).map(Field::Subject),
        "comments" => unstructured(rest).map(Field::Comments),
        "keywords" => phrase_list(rest).map(Field::Keywords),
        "return-path" => return_path(rest).map(Field::ReturnPath), 
        "received" => received_log(rest).map(Field::ReceivedLog), 
        "mime-version" => version(rest).map(Field::MIMEVersion), 
         _ => unstructured(rest).map(|v| Field::Optional(name, v)),
    }
 }
 /// Optional field
 ///
 /// ```abnf
 /// field      =   field-name ":" unstructured CRLF
 /// field-name =   1*ftext
 /// ftext      =   %d33-57 /          ; Printable US-ASCII
 ///                %d59-126           ;  characters not including
 ///                                   ;  ":".
 /// ```
 fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> {
    terminated(
        take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
        tuple((space0, tag(b":"), space0)),
    )(input)
 }
--- a/src/rfc5322/identification.rs
+++ b/src/rfc5322/identification.rs
@ -18,27 +18,6 @@ pub struct MessageId<'a> {
 }
 pub type MessageIdList<'a> = Vec<MessageId<'a>>;
 /*
 impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
    type Error = IMFError<'a>;
    fn try_from(id: &'a lazy::Identifier<'a>) -> Result<Self, Self::Error> {
        msg_id(id.0)
            .map(|(_, i)| i)
            .map_err(|e| IMFError::MessageID(e))
    }
 }
 impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> {
    type Error = IMFError<'a>;
    fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
        many1(msg_id)(id.0)
            .map(|(_, i)| i)
            .map_err(|e| IMFError::MessageIDList(e))
    }
 }*/
 /// Message identifier
 ///
 /// ```abnf
@ -53,6 +32,10 @@ pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageId> {
    Ok((input, MessageId { left, right }))
 }
 pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIdList> {
    many1(msg_id)(input)
 }
 // @FIXME Missing obsolete
 fn id_left(input: &[u8]) -> IResult<&[u8], &[u8]> {
    dot_atom_text(input)
--- a/src/rfc5322/message.rs
+++ b/src/rfc5322/message.rs
@ -9,57 +9,63 @@ use crate::fragments::trace::ReceivedLog;
 use chrono::{DateTime, FixedOffset};
 #[derive(Debug, PartialEq, Default)]
-pub struct Section<'a> {
+pub struct Message<'a> {
    // 3.6.1.  The Origination Date Field
-    pub date: Option<&'a DateTime<FixedOffset>>,
+    pub date: Option<DateTime<FixedOffset>>,
    // 3.6.2.  Originator Fields
-    pub from: Vec<&'a MailboxRef>,
+    pub from: Vec<MailboxRef<'a>>,
-    pub sender: Option<&'a MailboxRef>,
+    pub sender: Option<MailboxRef<'a>>,
-    pub reply_to: Vec<&'a AddressRef>,
+    pub reply_to: Vec<AddressRef<'a>>,
    // 3.6.3.  Destination Address Fields
-    pub to: Vec<&'a AddressRef>,
+    pub to: Vec<AddressRef<'a>>,
-    pub cc: Vec<&'a AddressRef>,
+    pub cc: Vec<AddressRef<'a>>,
-    pub bcc: Vec<&'a AddressRef>,
+    pub bcc: Vec<AddressRef<'a>>,
    // 3.6.4.  Identification Fields
-    pub msg_id: Option<&'a MessageId<'a>>,
+    pub msg_id: Option<MessageId<'a>>,
-    pub in_reply_to: Vec<&'a MessageId<'a>>,
+    pub in_reply_to: Vec<MessageId<'a>>,
-    pub references: Vec<&'a MessageId<'a>>,
+    pub references: Vec<MessageId<'a>>,
    // 3.6.5.  Informational Fields
-    pub subject: Option<&'a Unstructured>,
+    pub subject: Option<Unstructured<'a>>,
-    pub comments: Vec<&'a Unstructured>,
+    pub comments: Vec<Unstructured<'a>>,
-    pub keywords: Vec<&'a PhraseList>,
+    pub keywords: Vec<PhraseList<'a>>,
    // 3.6.6 Not implemented
    // 3.6.7 Trace Fields
-    pub return_path: Vec<&'a MailboxRef>,
+    pub return_path: Vec<MailboxRef<'a>>,
-    pub received: Vec<&'a ReceivedLog<'a>>,
+    pub received: Vec<ReceivedLog<'a>>,
    // 3.6.8.  Optional Fields
-    pub optional: HashMap<&'a str, &'a Unstructured>,
+    pub optional: HashMap<&'a [u8], Unstructured<'a>>,
    // MIME
    pub mime_version: Option<&'a Version>,
    pub mime: MIMESection<'a>,
    // Recovery
-    pub bad_fields: Vec<&'a lazy::Field<'a>>,
+    pub unparsed: Vec<&'a [u8]>,
    pub unparsed: Vec<&'a str>,
 }
-#[derive(Debug, PartialEq, Default)]
+impl<'a> FromIterator<&'a [u8]> for Message<'a> {
-pub struct MIMESection<'a> {
+    fn from_iter<I: IntoIterator<Item = &'a [u8]>>(iter: I) -> Self {
-    pub content_type: Option<&'a Type<'a>>,
+        iter.fold(
-    pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
+            Message::default(),
-    pub content_id: Option<&'a MessageId<'a>>,
+            |mut msg, field| {
-    pub content_description: Option<&'a Unstructured>,
+                match field_name(field) {
-    pub optional: HashMap<&'a str, &'a Unstructured>,
+                    Ok((name, value)) => xx,
-    pub unparsed: Vec<&'a str>,
+
                }
                match field {
                }
                msg
            }
        )
    }
 }
 //@FIXME min and max limits are not enforced,
 // it may result in missing data or silently overriden data.
 impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
@ -104,19 +110,3 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
    }
 }
 impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
    fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
        let mut section = MIMESection::default();
        for field in iter {
            match field {
                MIMEField::ContentType(v) => section.content_type = Some(v),
                MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
                MIMEField::ContentID(v) => section.content_id = Some(v),
                MIMEField::ContentDescription(v) => section.content_description = Some(v),
                MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
                MIMEField::Rescue(v) => section.unparsed.push(v),
            };
        }
        section
    }
 }
--- a/src/rfc5322/mod.rs
+++ b/src/rfc5322/mod.rs
@ -4,3 +4,4 @@ pub mod datetime;
 pub mod trace;
 pub mod identification;
 pub mod mime;
 pub mod field;
--- a/src/rfc5322/trace.rs
+++ b/src/rfc5322/trace.rs
@ -35,7 +35,7 @@ impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
    }
 }*/
-pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
+pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
    map(
        tuple((
            many0(received_tokens),
@ -46,7 +46,7 @@ pub fn received_body(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
    )(input)
 }
-pub fn return_path_body(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
+pub fn return_path(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
    alt((map(mailbox::angle_addr, |a| Some(a)), empty_path))(input)
 }
--- a/src/text/misc_token.rs
+++ b/src/text/misc_token.rs
@ -18,6 +18,9 @@ use crate::text::{
 #[derive(Debug, PartialEq, Default)]
 pub struct PhraseList(pub Vec<String>);
 pub fn phrase_list(input: &'a [u8]) -> IResult<&[u8], PhraseList> {
    separated_list1(tag(","), phrase)(input)
 }
 /*
 impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {