From a503eb1de6c5641a9304b39f71a4d8affce51eb4 Mon Sep 17 00:00:00 2001
From: Quentin Dufour <quentin@deuxfleurs.fr>
Date: Tue, 18 Jul 2023 23:25:10 +0200
Subject: [PATCH] wip refactor

---
 src/fragments/misc_token.rs                   | 129 -------
 src/fragments/mod.rs                          |  23 --
 src/fragments/model.rs                        | 146 --------
 src/fragments/words.rs                        | 116 ------
 src/headers.rs                                |  27 ++
 src/lib.rs                                    |   6 +-
 .../mime.rs => mime/content_fields.rs}        |  12 -
 src/{fragments => mime}/part.rs               |   0
 .../body_structure.rs                         |   0
 .../extract_fields.rs                         |   0
 .../field_eager.rs                            |   0
 .../field_lazy.rs                             |   0
 .../guess_charset.rs                          |   0
 .../header_section.rs                         |   0
 src/{multipass => old.multipass}/mod.rs       |   0
 src/{multipass => old.multipass}/segment.rs   |   0
 src/parse.rs                                  |  10 +-
 src/{fragments => rfc5322}/address.rs         |  25 +-
 src/{fragments => rfc5322}/datetime.rs        |   0
 src/{fragments => rfc5322}/eager.rs           |   0
 src/{fragments => rfc5322}/identification.rs  |   8 +
 src/{fragments => rfc5322}/lazy.rs            |   0
 src/{fragments => rfc5322}/mailbox.rs         |  28 +-
 .../section.rs => rfc5322/message.rs}         |   0
 src/{fragments => rfc5322}/trace.rs           |   0
 src/text/ascii.rs                             | 142 ++++++++
 src/text/buffer.rs                            |  43 +++
 src/{fragments => text}/encoding.rs           | 129 ++++---
 src/text/misc_token.rs                        | 166 +++++++++
 src/text/mod.rs                               |   7 +
 src/{fragments => text}/quoted.rs             |  49 ++-
 src/{fragments => text}/whitespace.rs         | 116 +++---
 src/text/words.rs                             | 133 +++++++
 tests/enron.rs                                | 129 -------
 tests/known.rs                                | 340 ------------------
 35 files changed, 746 insertions(+), 1038 deletions(-)
 delete mode 100644 src/fragments/misc_token.rs
 delete mode 100644 src/fragments/mod.rs
 delete mode 100644 src/fragments/model.rs
 delete mode 100644 src/fragments/words.rs
 create mode 100644 src/headers.rs
 rename src/{fragments/mime.rs => mime/content_fields.rs} (97%)
 rename src/{fragments => mime}/part.rs (100%)
 rename src/{multipass => old.multipass}/body_structure.rs (100%)
 rename src/{multipass => old.multipass}/extract_fields.rs (100%)
 rename src/{multipass => old.multipass}/field_eager.rs (100%)
 rename src/{multipass => old.multipass}/field_lazy.rs (100%)
 rename src/{multipass => old.multipass}/guess_charset.rs (100%)
 rename src/{multipass => old.multipass}/header_section.rs (100%)
 rename src/{multipass => old.multipass}/mod.rs (100%)
 rename src/{multipass => old.multipass}/segment.rs (100%)
 rename src/{fragments => rfc5322}/address.rs (90%)
 rename src/{fragments => rfc5322}/datetime.rs (100%)
 rename src/{fragments => rfc5322}/eager.rs (100%)
 rename src/{fragments => rfc5322}/identification.rs (93%)
 rename src/{fragments => rfc5322}/lazy.rs (100%)
 rename src/{fragments => rfc5322}/mailbox.rs (95%)
 rename src/{fragments/section.rs => rfc5322/message.rs} (100%)
 rename src/{fragments => rfc5322}/trace.rs (100%)
 create mode 100644 src/text/ascii.rs
 create mode 100644 src/text/buffer.rs
 rename src/{fragments => text}/encoding.rs (50%)
 create mode 100644 src/text/misc_token.rs
 create mode 100644 src/text/mod.rs
 rename src/{fragments => text}/quoted.rs (55%)
 rename src/{fragments => text}/whitespace.rs (67%)
 create mode 100644 src/text/words.rs
 delete mode 100644 tests/enron.rs
 delete mode 100644 tests/known.rs

diff --git a/src/fragments/misc_token.rs b/src/fragments/misc_token.rs
deleted file mode 100644
index 11e25af..0000000
--- a/src/fragments/misc_token.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-use nom::{
-    branch::alt,
-    bytes::complete::{tag, take_while1},
-    character::complete::space0,
-    combinator::{into, map, opt},
-    multi::{many0, many1, separated_list1},
-    sequence::tuple,
-    IResult,
-};
-use std::borrow::Cow;
-
-use crate::error::IMFError;
-use crate::fragments::lazy;
-use crate::fragments::quoted::quoted_string;
-use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
-use crate::fragments::words::{atom, is_vchar};
-use crate::fragments::encoding::encoded_word;
-
-#[derive(Debug, PartialEq, Default)]
-pub struct Unstructured(pub String);
-
-#[derive(Debug, PartialEq, Default)]
-pub struct PhraseList(pub Vec<String>);
-
-impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
-    type Error = IMFError<'a>;
-
-    fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
-        unstructured(input.0)
-            .map(|(_, v)| Unstructured(v))
-            .map_err(|e| IMFError::Unstructured(e))
-    }
-}
-
-impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
-    type Error = IMFError<'a>;
-
-    fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
-        separated_list1(tag(","), phrase)(p.0)
-            .map(|(_, q)| PhraseList(q))
-            .map_err(|e| IMFError::PhraseList(e))
-    }
-}
-
-/// Word
-///
-/// ```abnf
-///    word            =   atom / quoted-string
-/// ```
-pub fn word(input: &str) -> IResult<&str, Cow<str>> {
-    alt((into(quoted_string), into(encoded_word), into(atom)))(input)
-}
-
-/// Phrase
-///
-/// ```abnf
-///    phrase          =   1*word / obs-phrase
-/// ```
-pub fn phrase(input: &str) -> IResult<&str, String> {
-    let (input, words) = many1(word)(input)?;
-    let phrase = words.join(" ");
-    Ok((input, phrase))
-}
-
-/// Compatible unstructured input
-///
-/// ```abnf
-/// obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
-/// ```
-fn is_unstructured(c: char) -> bool {
-    is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00'
-}
-
-enum UnstrToken {
-    Init,
-    Encoded,
-    Plain,
-}
-
-/// Unstructured header field body
-///
-/// ```abnf
-/// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
-/// ```
-pub fn unstructured(input: &str) -> IResult<&str, String> {
-    let (input, r) = many0(tuple((opt(fws), alt((
-                        map(encoded_word, |v| (Cow::Owned(v), UnstrToken::Encoded)), 
-                        map(take_while1(is_unstructured), |v| (Cow::Borrowed(v), UnstrToken::Plain)),
-                    )))))(input)?;
-
-    let (input, _) = space0(input)?;
-
-    // Try to optimize for the most common cases
-    let body = match r.as_slice() {
-        // Optimization when there is only one line
-        [(None, (content, _))] | [(_, (content, UnstrToken::Encoded))] => content.to_string(),
-        [(Some(_), (content, _))] => " ".to_string() + content,
-        // Generic case, with multiple lines
-        lines => lines.iter().fold(
-            (&UnstrToken::Init, String::with_capacity(255)), 
-            |(prev_token, result), (may_ws, (content, current_token))| {
-            let new_res = match (may_ws, prev_token, current_token) {
-                (_, UnstrToken::Encoded, UnstrToken::Encoded) | (None, _, _) => result + content, 
-                _ => result + " " + content,
-            };
-            (current_token, new_res)
-        }).1,
-    };
-
-    Ok((input, body))
-}
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn test_phrase() {
-        assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
-        assert_eq!(
-            phrase("salut \"le\" monde"),
-            Ok(("", "salut le monde".into()))
-        );
-        assert_eq!(
-            phrase("fin\r\n du\r\nmonde"),
-            Ok(("\r\nmonde", "fin du".into()))
-        );
-    }
-}
diff --git a/src/fragments/mod.rs b/src/fragments/mod.rs
deleted file mode 100644
index 5ac9bf3..0000000
--- a/src/fragments/mod.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Model
-pub mod model;
-
-// Generic
-pub mod misc_token;
-mod quoted;
-pub mod whitespace;
-mod words;
-
-// Header specific
-mod address;
-mod datetime;
-pub mod eager;
-mod identification;
-pub mod lazy;
-mod mailbox;
-pub mod section;
-pub mod trace;
-
-// MIME related
-pub mod mime;
-pub mod encoding;
-pub mod part;
diff --git a/src/fragments/model.rs b/src/fragments/model.rs
deleted file mode 100644
index fb0fa30..0000000
--- a/src/fragments/model.rs
+++ /dev/null
@@ -1,146 +0,0 @@
-use chrono::{DateTime, FixedOffset};
-use std::collections::HashMap;
-
-#[derive(Debug, PartialEq)]
-pub struct AddrSpec {
-    pub local_part: String,
-    pub domain: String,
-}
-impl AddrSpec {
-    pub fn fully_qualified(&self) -> String {
-        format!("{}@{}", self.local_part, self.domain)
-    }
-}
-
-#[derive(Debug, PartialEq)]
-pub struct MailboxRef {
-    // The actual "email address" like hello@example.com
-    pub addrspec: AddrSpec,
-    pub name: Option<String>,
-}
-impl From<AddrSpec> for MailboxRef {
-    fn from(addr: AddrSpec) -> Self {
-        MailboxRef {
-            name: None,
-            addrspec: addr,
-        }
-    }
-}
-pub type MailboxList = Vec<MailboxRef>;
-
-#[derive(Debug, PartialEq)]
-pub struct GroupRef {
-    pub name: String,
-    pub participants: Vec<MailboxRef>,
-}
-
-#[derive(Debug, PartialEq)]
-pub enum AddressRef {
-    Single(MailboxRef),
-    Many(GroupRef),
-}
-impl From<MailboxRef> for AddressRef {
-    fn from(mx: MailboxRef) -> Self {
-        AddressRef::Single(mx)
-    }
-}
-impl From<GroupRef> for AddressRef {
-    fn from(grp: GroupRef) -> Self {
-        AddressRef::Many(grp)
-    }
-}
-pub type AddressList = Vec<AddressRef>;
-
-#[derive(Debug, PartialEq)]
-pub struct MessageId<'a> {
-    pub left: &'a str,
-    pub right: &'a str,
-}
-pub type MessageIdList<'a> = Vec<MessageId<'a>>;
-
-#[derive(Debug, PartialEq)]
-pub enum FieldBody<'a, T> {
-    Correct(T),
-    Failed(&'a str),
-}
-
-#[derive(Debug, PartialEq)]
-pub enum Field<'a> {
-    // 3.6.1.  The Origination Date Field
-    Date(FieldBody<'a, Option<DateTime<FixedOffset>>>),
-
-    // 3.6.2.  Originator Fields
-    From(FieldBody<'a, Vec<MailboxRef>>),
-    Sender(FieldBody<'a, MailboxRef>),
-    ReplyTo(FieldBody<'a, Vec<AddressRef>>),
-
-    // 3.6.3.  Destination Address Fields
-    To(FieldBody<'a, Vec<AddressRef>>),
-    Cc(FieldBody<'a, Vec<AddressRef>>),
-    Bcc(FieldBody<'a, Vec<AddressRef>>),
-
-    // 3.6.4.  Identification Fields
-    MessageID(FieldBody<'a, MessageId<'a>>),
-    InReplyTo(FieldBody<'a, Vec<MessageId<'a>>>),
-    References(FieldBody<'a, Vec<MessageId<'a>>>),
-
-    // 3.6.5.  Informational Fields
-    Subject(FieldBody<'a, String>),
-    Comments(FieldBody<'a, String>),
-    Keywords(FieldBody<'a, Vec<String>>),
-
-    // 3.6.6   Resent Fields (not implemented)
-    // 3.6.7   Trace Fields
-    Received(FieldBody<'a, &'a str>),
-    ReturnPath(FieldBody<'a, Option<MailboxRef>>),
-
-    // 3.6.8.  Optional Fields
-    Optional(&'a str, String),
-
-    // None
-    Rescue(&'a str),
-}
-
-/// Permissive Header Section
-///
-/// This is a structure intended for parsing/decoding,
-/// hence it's support cases where the email is considered
-/// as invalid according to RFC5322 but for which we can
-/// still extract some data.
-#[derive(Debug, PartialEq, Default)]
-pub struct HeaderSection<'a> {
-    // 3.6.1.  The Origination Date Field
-    pub date: Option<DateTime<FixedOffset>>,
-
-    // 3.6.2.  Originator Fields
-    pub from: Vec<MailboxRef>,
-    pub sender: Option<MailboxRef>,
-    pub reply_to: Vec<AddressRef>,
-
-    // 3.6.3.  Destination Address Fields
-    pub to: Vec<AddressRef>,
-    pub cc: Vec<AddressRef>,
-    pub bcc: Vec<AddressRef>,
-
-    // 3.6.4.  Identification Fields
-    pub msg_id: Option<MessageId<'a>>,
-    pub in_reply_to: Vec<MessageId<'a>>,
-    pub references: Vec<MessageId<'a>>,
-
-    // 3.6.5.  Informational Fields
-    pub subject: Option<String>,
-    pub comments: Vec<String>,
-    pub keywords: Vec<String>,
-
-    // 3.6.6 Not implemented
-    // 3.6.7 Trace Fields
-    pub return_path: Vec<MailboxRef>,
-    pub received: Vec<&'a str>,
-
-    // 3.6.8.  Optional Fields
-    pub optional: HashMap<&'a str, String>,
-
-    // Recovery
-    pub bad_fields: Vec<Field<'a>>,
-    pub unparsed: Vec<&'a str>,
-}
diff --git a/src/fragments/words.rs b/src/fragments/words.rs
deleted file mode 100644
index acc5584..0000000
--- a/src/fragments/words.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-use crate::fragments::whitespace::cfws;
-use nom::{
-    bytes::complete::{tag, take_while1},
-    combinator::{opt, recognize},
-    multi::many0,
-    sequence::{delimited, pair},
-    IResult,
-};
-
-/// VCHAR definition
-pub fn is_vchar(c: char) -> bool {
-    (c >= '\x21' && c <= '\x7E') || !c.is_ascii()
-}
-
-/// Sequence of visible chars with the UTF-8 extension
-///
-/// ```abnf
-/// VCHAR   =  %x21-7E
-///            ; visible (printing) characters
-/// VCHAR   =/  UTF8-non-ascii
-/// SEQ     = 1*VCHAR
-///```
-#[allow(dead_code)]
-pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
-    take_while1(is_vchar)(input)
-}
-
-/// Atom allowed characters
-fn is_atext(c: char) -> bool {
-    c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii()
-}
-
-/// Atom
-///
-/// `[CFWS] 1*atext [CFWS]`
-pub fn atom(input: &str) -> IResult<&str, &str> {
-    delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
-}
-
-/// dot-atom-text
-///
-/// `1*atext *("." 1*atext)`
-pub fn dot_atom_text(input: &str) -> IResult<&str, &str> {
-    recognize(pair(
-        take_while1(is_atext),
-        many0(pair(tag("."), take_while1(is_atext))),
-    ))(input)
-}
-
-/// dot-atom
-///
-/// `[CFWS] dot-atom-text [CFWS]`
-pub fn dot_atom(input: &str) -> IResult<&str, &str> {
-    delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
-}
-
-#[allow(dead_code)]
-pub fn is_special(c: char) -> bool {
-    c == '('
-        || c == ')'
-        || c == '<'
-        || c == '>'
-        || c == '['
-        || c == ']'
-        || c == ':'
-        || c == ';'
-        || c == '@'
-        || c == '\\'
-        || c == ','
-        || c == '.'
-        || c == '"'
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_vchar_seq() {
-        assert_eq!(vchar_seq("hello world"), Ok((" world", "hello")));
-        assert_eq!(vchar_seq("hello👋 world"), Ok((" world", "hello👋")));
-    }
-
-    #[test]
-    fn test_atext() {
-        assert!(is_atext('='));
-        assert!(is_atext('5'));
-        assert!(is_atext('Q'));
-        assert!(!is_atext(' '));
-        assert!(is_atext('É')); // support utf8
-    }
-
-    #[test]
-    fn test_atom() {
-        assert_eq!(
-            atom("(skip)  imf_codec (hidden) aerogramme"),
-            Ok(("aerogramme", "imf_codec"))
-        );
-    }
-
-    #[test]
-    fn test_dot_atom_text() {
-        assert_eq!(
-            dot_atom_text("quentin.dufour.io abcdef"),
-            Ok((" abcdef", "quentin.dufour.io"))
-        );
-    }
-
-    #[test]
-    fn test_dot_atom() {
-        assert_eq!(
-            dot_atom("   (skip) quentin.dufour.io abcdef"),
-            Ok(("abcdef", "quentin.dufour.io"))
-        );
-    }
-}
diff --git a/src/headers.rs b/src/headers.rs
new file mode 100644
index 0000000..5bf0661
--- /dev/null
+++ b/src/headers.rs
@@ -0,0 +1,27 @@
+use nom::{
+    self,
+    combinator::{all_consuming, recognize},
+    multi::many0,
+    sequence::terminated,
+    IResult,
+};
+
+use crate::text::whitespace::{foldable_line, line, obs_crlf};
+
+pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    let (body, hdrs) = segment(input)?;
+    let (_, fields) = fields(hdrs)?;
+    Ok((body, fields))
+}
+
+// -- part 1, segment
+fn segment(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    terminated(recognize(many0(line)), obs_crlf)(input)
+}
+
+// -- part 2, isolate fields
+fn fields(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+    let (rest, parsed) = all_consuming(many0(foldable_line))(input)?;
+    Ok((rest, parsed))
+}
+
diff --git a/src/lib.rs b/src/lib.rs
index 257344f..974cd54 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,5 @@
 pub mod error;
-pub mod fragments;
-pub mod multipass;
+//pub mod mime;
+//pub mod message;
+pub mod headers;
+pub mod text;
diff --git a/src/fragments/mime.rs b/src/mime/content_fields.rs
similarity index 97%
rename from src/fragments/mime.rs
rename to src/mime/content_fields.rs
index 14ece11..31ac49c 100644
--- a/src/fragments/mime.rs
+++ b/src/mime/content_fields.rs
@@ -292,18 +292,6 @@ pub fn version(input: &str) -> IResult<&str, Version> {
     Ok((rest, Version { major, minor }))
 }
 
-/// Token allowed characters
-fn is_token_text(c: char) -> bool {
-    c.is_ascii() && !c.is_ascii_control() && !c.is_ascii_whitespace() && !"()<>@,;:\\\"/[]?=".contains(c)
-}
-
-/// Token
-///
-/// `[CFWS] 1*token_text [CFWS]`
-pub fn token(input: &str) -> IResult<&str, &str> {
-    delimited(opt(cfws), take_while1(is_token_text), opt(cfws))(input)
-}
-
 pub fn parameter(input: &str) -> IResult<&str, Parameter> {
     let (rest, (pname, _, pvalue)) = tuple((
             token, 
diff --git a/src/fragments/part.rs b/src/mime/part.rs
similarity index 100%
rename from src/fragments/part.rs
rename to src/mime/part.rs
diff --git a/src/multipass/body_structure.rs b/src/old.multipass/body_structure.rs
similarity index 100%
rename from src/multipass/body_structure.rs
rename to src/old.multipass/body_structure.rs
diff --git a/src/multipass/extract_fields.rs b/src/old.multipass/extract_fields.rs
similarity index 100%
rename from src/multipass/extract_fields.rs
rename to src/old.multipass/extract_fields.rs
diff --git a/src/multipass/field_eager.rs b/src/old.multipass/field_eager.rs
similarity index 100%
rename from src/multipass/field_eager.rs
rename to src/old.multipass/field_eager.rs
diff --git a/src/multipass/field_lazy.rs b/src/old.multipass/field_lazy.rs
similarity index 100%
rename from src/multipass/field_lazy.rs
rename to src/old.multipass/field_lazy.rs
diff --git a/src/multipass/guess_charset.rs b/src/old.multipass/guess_charset.rs
similarity index 100%
rename from src/multipass/guess_charset.rs
rename to src/old.multipass/guess_charset.rs
diff --git a/src/multipass/header_section.rs b/src/old.multipass/header_section.rs
similarity index 100%
rename from src/multipass/header_section.rs
rename to src/old.multipass/header_section.rs
diff --git a/src/multipass/mod.rs b/src/old.multipass/mod.rs
similarity index 100%
rename from src/multipass/mod.rs
rename to src/old.multipass/mod.rs
diff --git a/src/multipass/segment.rs b/src/old.multipass/segment.rs
similarity index 100%
rename from src/multipass/segment.rs
rename to src/old.multipass/segment.rs
diff --git a/src/parse.rs b/src/parse.rs
index 60306c1..9f5407b 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -1,8 +1,9 @@
-use imf_codec::fragments::section::Section;
-use imf_codec::multipass::segment;
+//use imf_codec::fragments::section::Section;
+//use imf_codec::multipass::segment;
 use std::io;
 use std::io::Read;
 
+/*
 fn parser<'a, F>(input: &'a [u8], func: F) -> ()
 where
     F: FnOnce(&Section) -> (),
@@ -15,9 +16,10 @@ where
     let section = field_body.section();
 
     func(&section.fields);
-}
+}*/
 
 fn main() {
+    /*
     // Read full mail in memory
     let mut rawmail = Vec::new();
     io::stdin().lock().read_to_end(&mut rawmail).unwrap();
@@ -30,4 +32,6 @@ fn main() {
         assert!(section.from.len() > 0);
         assert!(section.bad_fields.len() == 0);
     });
+    */
+    println!("hello world");
 }
diff --git a/src/fragments/address.rs b/src/rfc5322/address.rs
similarity index 90%
rename from src/fragments/address.rs
rename to src/rfc5322/address.rs
index 7bf2a43..c829ac3 100644
--- a/src/fragments/address.rs
+++ b/src/rfc5322/address.rs
@@ -11,9 +11,32 @@ use crate::error::IMFError;
 use crate::fragments::lazy;
 use crate::fragments::mailbox::mailbox;
 use crate::fragments::misc_token::phrase;
-use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef};
+//use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef};
 use crate::fragments::whitespace::cfws;
 
+#[derive(Debug, PartialEq)]
+pub struct GroupRef {
+    pub name: String,
+    pub participants: Vec<MailboxRef>,
+}
+
+#[derive(Debug, PartialEq)]
+pub enum AddressRef {
+    Single(MailboxRef),
+    Many(GroupRef),
+}
+impl From<MailboxRef> for AddressRef {
+    fn from(mx: MailboxRef) -> Self {
+        AddressRef::Single(mx)
+    }
+}
+impl From<GroupRef> for AddressRef {
+    fn from(grp: GroupRef) -> Self {
+        AddressRef::Many(grp)
+    }
+}
+pub type AddressList = Vec<AddressRef>;
+
 impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
     type Error = IMFError<'a>;
 
diff --git a/src/fragments/datetime.rs b/src/rfc5322/datetime.rs
similarity index 100%
rename from src/fragments/datetime.rs
rename to src/rfc5322/datetime.rs
diff --git a/src/fragments/eager.rs b/src/rfc5322/eager.rs
similarity index 100%
rename from src/fragments/eager.rs
rename to src/rfc5322/eager.rs
diff --git a/src/fragments/identification.rs b/src/rfc5322/identification.rs
similarity index 93%
rename from src/fragments/identification.rs
rename to src/rfc5322/identification.rs
index 8ba4b89..179505e 100644
--- a/src/fragments/identification.rs
+++ b/src/rfc5322/identification.rs
@@ -14,6 +14,14 @@ use crate::fragments::model::{MessageId, MessageIdList};
 use crate::fragments::whitespace::cfws;
 use crate::fragments::words::dot_atom_text;
 
+
+#[derive(Debug, PartialEq)]
+pub struct MessageId<'a> {
+    pub left: &'a str,
+    pub right: &'a str,
+}
+pub type MessageIdList<'a> = Vec<MessageId<'a>>;
+
 impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
     type Error = IMFError<'a>;
 
diff --git a/src/fragments/lazy.rs b/src/rfc5322/lazy.rs
similarity index 100%
rename from src/fragments/lazy.rs
rename to src/rfc5322/lazy.rs
diff --git a/src/fragments/mailbox.rs b/src/rfc5322/mailbox.rs
similarity index 95%
rename from src/fragments/mailbox.rs
rename to src/rfc5322/mailbox.rs
index 6860c7c..fd87309 100644
--- a/src/fragments/mailbox.rs
+++ b/src/rfc5322/mailbox.rs
@@ -10,11 +10,37 @@ use nom::{
 use std::borrow::Cow;
 
 use crate::fragments::misc_token::{phrase, word};
-use crate::fragments::model::{AddrSpec, MailboxRef};
 use crate::fragments::quoted::quoted_string;
 use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
 use crate::fragments::words::{atom, dot_atom};
 
+#[derive(Debug, PartialEq)]
+pub struct AddrSpec {
+    pub local_part: String,
+    pub domain: String,
+}
+impl AddrSpec {
+    pub fn fully_qualified(&self) -> String {
+        format!("{}@{}", self.local_part, self.domain)
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub struct MailboxRef {
+    // The actual "email address" like hello@example.com
+    pub addrspec: AddrSpec,
+    pub name: Option<String>,
+}
+impl From<AddrSpec> for MailboxRef {
+    fn from(addr: AddrSpec) -> Self {
+        MailboxRef {
+            name: None,
+            addrspec: addr,
+        }
+    }
+}
+pub type MailboxList = Vec<MailboxRef>;
+
 /// Mailbox
 ///
 /// ```abnf
diff --git a/src/fragments/section.rs b/src/rfc5322/message.rs
similarity index 100%
rename from src/fragments/section.rs
rename to src/rfc5322/message.rs
diff --git a/src/fragments/trace.rs b/src/rfc5322/trace.rs
similarity index 100%
rename from src/fragments/trace.rs
rename to src/rfc5322/trace.rs
diff --git a/src/text/ascii.rs b/src/text/ascii.rs
new file mode 100644
index 0000000..bb5c9b4
--- /dev/null
+++ b/src/text/ascii.rs
@@ -0,0 +1,142 @@
+// ASCII
+// -- CONTROL CHARACTERS
+pub const NULL: u8 = 0x00; // NULL
+pub const SOH: u8 = 0x01; // START OF HEADER
+pub const STX: u8 = 0x02; // START OF TEXT
+pub const ETX: u8 = 0x03; // END OF TEXT
+pub const EOT: u8 = 0x04; // 
+pub const ANQ: u8 = 0x05;
+pub const ACK: u8 = 0x06;
+pub const BEL: u8 = 0x07;
+pub const BS: u8 = 0x08; // BACKSPACE
+pub const HT: u8 = 0x09; // horizontal tab
+pub const LF: u8 = 0x0A;
+pub const VT: u8 = 0x0B;
+pub const FF: u8 = 0x0C;
+pub const CR: u8 = 0x0D;
+pub const SO: u8 = 0x0E;
+pub const SI: u8 = 0x0F;
+pub const DLE: u8 = 0x10;
+pub const DC1: u8 = 0x11;
+pub const DC2: u8 = 0x12;
+pub const DC3: u8 = 0x13;
+pub const DC4 : u8 = 0x14;
+pub const NAK: u8 = 0x15;
+pub const SYN: u8 = 0x16;
+pub const ETB: u8 = 0x17;
+pub const CAN: u8 = 0x18;
+pub const EM: u8 = 0x19;
+pub const SUB: u8 = 0x1A;
+pub const ESC: u8 = 0x1B;
+pub const FS: u8 = 0x1C;
+pub const GS: u8 = 0x1D;
+pub const RS: u8 = 0x1E;
+pub const US: u8 = 0x1F;
+pub const DEL: u8 = 0x7F;
+
+// --  GRAPHIC CHARACTERS
+pub const SP: u8 = 0x20; // space
+pub const EXCLAMATION: u8 = 0x21; // ! 
+pub const DQUOTE: u8 = 0x22; // "
+pub const NUM: u8 = 0x23; // #
+pub const DOLLAR: u8 = 0x24; // $
+pub const PERCENT: u8 = 0x25; // %
+pub const AMPERSAND: u8 = 0x26; // &
+pub const SQUOTE: u8 = 0x27; // '
+pub const LEFT_PAR: u8 = 0x28; // (
+pub const RIGHT_PAR: u8 = 0x29; // )
+pub const ASTERISK: u8 = 0x2A; // *
+pub const PLUS: u8 = 0x2B; // +
+pub const COMMA: u8 = 0x2C; // ,
+pub const MINUS: u8 = 0x2D; // -
+pub const PERIOD: u8 = 0x2E; // .
+pub const SLASH: u8 = 0x2F; // /
+pub const N0: u8 = 0x30; // 0
+pub const N1: u8 = 0x31; // 1
+pub const N2: u8 = 0x32; // 2
+pub const N3: u8 = 0x33; // 3
+pub const N4: u8 = 0x34; // 4
+pub const N5: u8 = 0x35; // 5
+pub const N6: u8 = 0x36; // 6
+pub const N7: u8 = 0x37; // 7
+pub const N8: u8 = 0x38; // 8
+pub const N9: u8 = 0x39; // 9
+pub const COL: u8 = 0x3A; // :
+pub const SEM_COL: u8 = 0x3B; // ;
+pub const LT: u8 = 0x3C; // <
+pub const EQ: u8 = 0x3D; // =
+pub const GT: u8 = 0x3E; // >
+pub const QUESTION: u8 = 0x3F; // ?
+pub const AT: u8 = 0x40; // @
+pub const LCA: u8 = 0x41; // A
+pub const LCB: u8 = 0x42; // B
+pub const LCC: u8 = 0x43; // C
+pub const LCD: u8 = 0x44; // D
+pub const LCE: u8 = 0x45; // E
+pub const LCF: u8 = 0x46; // F
+pub const LCG: u8 = 0x47; // G
+pub const LCH: u8 = 0x48; // H
+pub const LCI: u8 = 0x49; // I
+pub const LCJ: u8 = 0x4A; // J
+pub const LCK: u8 = 0x4B; // K
+pub const LCL: u8 = 0x4C; // L
+pub const LCM: u8 = 0x4D; // M
+pub const LCN: u8 = 0x4E; // N
+pub const LCO: u8 = 0x4F; // O
+pub const LCP: u8 = 0x50; // P
+pub const LCQ: u8 = 0x51; // Q
+pub const LCR: u8 = 0x52; // R
+pub const LCS: u8 = 0x53; // S
+pub const LCT: u8 = 0x54; // T
+pub const LCU: u8 = 0x55; // U
+pub const LCV: u8 = 0x56; // V
+pub const LCW: u8 = 0x57; // W
+pub const LCX: u8 = 0x58; // X
+pub const LCY: u8 = 0x59; // Y
+pub const LCZ: u8 = 0x5A; // Z
+pub const LEFT_BRACKET: u8 = 0x5B; // [
+pub const BACKSLASH: u8 = 0x5C; // \
+pub const RIGHT_BRACKET: u8 = 0x5D; // ]
+pub const CARRET: u8 = 0x5E; // ^
+pub const UNDERSCORE: u8 = 0x5F; // _
+pub const GRAVE: u8 = 0x60; // `
+pub const LSA: u8 = 0x61; // a
+pub const LSB: u8 = 0x62; // b
+pub const LSC: u8 = 0x63; // c
+pub const LSD: u8 = 0x64; // d
+pub const LSE: u8 = 0x65; // e
+pub const LSF: u8 = 0x66; // f
+pub const LSG: u8 = 0x67; // g
+pub const LSH: u8 = 0x68; // h
+pub const LSI: u8 = 0x69; // i
+pub const LSJ: u8 = 0x6A; // j
+pub const LSK: u8 = 0x6B; // k
+pub const LSL: u8 = 0x6C; // l
+pub const LSM: u8 = 0x6D; // m
+pub const LSN: u8 = 0x6E; // n
+pub const LSO: u8 = 0x6F; // o
+pub const LSP: u8 = 0x70; // p
+pub const LSQ: u8 = 0x71; // q
+pub const LSR: u8 = 0x72; // r
+pub const LSS: u8 = 0x73; // s
+pub const LST: u8 = 0x74; // t
+pub const LSU: u8 = 0x75; // u
+pub const LSV: u8 = 0x76; // v
+pub const LSW: u8 = 0x77; // w
+pub const LSX: u8 = 0x78; // x
+pub const LSY: u8 = 0x79; // y
+pub const LSZ: u8 = 0x7A; // z
+pub const LEFT_CURLY: u8 = 0x7B; // {
+pub const PIPE: u8 = 0x7C; // |
+pub const RIGHT_CURLY: u8 = 0x7D; // }
+pub const TILDE: u8 = 0x7E; // ~
+
+// GROUP OF CHARACTERS
+// -- CRLF
+pub const CRLF: &[u8] = &[CR, LF];
+
+// -- WHITESPACE
+pub const WS: &[u8] = &[HT, SP];
+
+pub const GRAPHIC_BEGIN: u8 = SP;
+pub const GRAPHIC_END: u8 = TILDE;
diff --git a/src/text/buffer.rs b/src/text/buffer.rs
new file mode 100644
index 0000000..bd9cbb8
--- /dev/null
+++ b/src/text/buffer.rs
@@ -0,0 +1,43 @@
+use encoding_rs::Encoding;
+
+#[derive(Debug, PartialEq, Default)]
+pub struct Text<'a> {
+    parts: Vec<&'a [u8]>,
+}
+
+impl<'a> Text<'a> {
+    pub fn push(&mut self, e: &[u8]) {
+        self.parts.push(e)
+    }
+
+    pub fn to_string(&self) -> String {
+        let enc = encoding_rs::UTF_8;
+        let size = self.parts.iter().fold(0, |acc, v| acc + v.len());
+
+        self.parts.iter().fold(
+            String::with_capacity(size),
+            |mut acc, v| {
+                let (content, _) = enc.decode_without_bom_handling(v);
+                acc.push_str(content.as_ref());
+                acc
+            },
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_text() {
+        let mut text = Text::default();
+        text.push(b"hello");
+        text.push(&[ascii::SP]);
+        text.push(b"world");
+        assert_eq!(
+            text.to_string(),
+            "hello world".to_string(),
+        );
+    }
+}
diff --git a/src/fragments/encoding.rs b/src/text/encoding.rs
similarity index 50%
rename from src/fragments/encoding.rs
rename to src/text/encoding.rs
index 5ad0dc5..84a2c0c 100644
--- a/src/fragments/encoding.rs
+++ b/src/text/encoding.rs
@@ -1,5 +1,3 @@
-use std::borrow::Cow;
-use chardetng::EncodingDetector;
 use encoding_rs::Encoding;
 
 use nom::{
@@ -7,92 +5,107 @@ use nom::{
     branch::alt,
     bytes::complete::{tag, take, take_while1, take_while},
     character::complete::{one_of},
+    character::is_alphanumeric,
     combinator::map,
     sequence::{preceded, terminated, tuple},
     multi::many0,
 };
-use encoding_rs::Encoding;
 use base64::{Engine as _, engine::general_purpose};
 
-use crate::fragments::mime;
+use crate::text::words;
+use crate::text::ascii;
 
-const IS_LAST_BUFFER: bool = true;
-const ALLOW_UTF8: bool = true;
-const NO_TLD: Option<&[u8]> = None;
-
-pub fn header_decode(input: &[u8]) -> Cow<str> {
-    // Create detector
-    let mut detector = EncodingDetector::new();
-    detector.feed(input, IS_LAST_BUFFER);
-
-    // Get encoding
-    let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
-    let (header, _, _) = enc.decode(input);
-    header
-}
-
-pub fn encoded_word(input: &str) -> IResult<&str, String> {
+pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
     alt((encoded_word_quoted, encoded_word_base64))(input)
 }
 
-pub fn encoded_word_quoted(input: &str) -> IResult<&str, String> {
+pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
     let (rest, (_, charset, _, _, _, txt, _)) = tuple((
-        tag("=?"), mime::token, 
+        tag("=?"), words::mime_token, 
         tag("?"), one_of("Qq"),
         tag("?"), ptext,
         tag("?=")))(input)?;
 
-    let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
-    let parsed = decode_quoted_encoding(renc, txt.iter());
+    let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
+    let parsed = EncodedWord::Quoted(QuotedWord { enc: renc, chunks: txt });
     Ok((rest, parsed))
 }
 
-pub fn encoded_word_base64(input: &str) -> IResult<&str, String> {
+pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> {
     let (rest, (_, charset, _, _, _, txt, _)) = tuple((
-        tag("=?"), mime::token, 
+        tag("=?"), words::mime_token, 
         tag("?"), one_of("Bb"),
         tag("?"), btext,
         tag("?=")))(input)?;
 
-    let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
-    let parsed = general_purpose::STANDARD_NO_PAD.decode(txt).map(|d| renc.decode(d.as_slice()).0.to_string()).unwrap_or("".into());
-
+    let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
+    let parsed = EncodedWord::Base64(Base64Word { enc: renc, content: txt });
     Ok((rest, parsed))
 }
 
-fn decode_quoted_encoding<'a>(enc: &'static Encoding, q: impl Iterator<Item = &'a QuotedChunk<'a>>) -> String {
-    q.fold(
-        String::new(), 
-        |mut acc, c| {
-            let dec = match c {
-                QuotedChunk::Safe(v) => Cow::Borrowed(*v),
-                QuotedChunk::Space => Cow::Borrowed(" "),
-                QuotedChunk::Encoded(v) => {
-                    let w = &[*v];
-                    let (d, _, _) = enc.decode(w);
-                    Cow::Owned(d.into_owned())
-                },
-            };
-            acc.push_str(dec.as_ref());
-            acc
-        })
+#[derive(PartialEq,Debug)]
+pub enum EncodedWord<'a> {
+    Quoted(QuotedWord<'a>),
+    Base64(Base64Word<'a>),
 }
 
+#[derive(PartialEq,Debug)]
+pub struct Base64Word<'a> {
+    pub enc: &'static Encoding,
+    pub content: &'a [u8],
+}
+
+impl<'a> Base64Word<'a> {
+    pub fn to_string(&self) -> String {
+        general_purpose::STANDARD_NO_PAD
+            .decode(self.content)
+            .map(|d| self.enc.decode(d.as_slice()).0.to_string())
+            .unwrap_or("".into())
+    }
+}
+
+#[derive(PartialEq,Debug)]
+pub struct QuotedWord<'a> {
+    pub enc: &'static Encoding,
+    pub chunks: Vec<QuotedChunk<'a>>,
+}
+
+impl<'a> QuotedWord<'a> {
+    pub fn to_string(&self) -> String {
+        self.chunks.iter().fold(
+            String::new(), 
+            |mut acc, c| {
+                match c {
+                    QuotedChunk::Safe(v) => {
+                        let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
+                        acc.push_str(content.as_ref());
+                    }
+                    QuotedChunk::Space => acc.push(' '),
+                    QuotedChunk::Encoded(v) => {
+                        let w = &[*v];
+                        let (d, _) = self.enc.decode_without_bom_handling(w);
+                        acc.push_str(d.as_ref());
+                    },
+                };
+                acc
+            })
+    }
+}
 
 #[derive(PartialEq,Debug)]
 pub enum QuotedChunk<'a> {
-    Safe(&'a str),
+    Safe(&'a [u8]),
     Encoded(u8),
     Space,
 }
 
 //quoted_printable
-pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> {
+pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
     many0(alt((safe_char2, encoded_space, hex_octet)))(input)
 }
 
 
-fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
+fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
   map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)  
 }
 
@@ -101,8 +114,8 @@ fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
 /// 8-bit values which correspond to printable ASCII characters other
 /// than "=", "?", and "_" (underscore), MAY be represented as those
 /// characters.
-fn is_safe_char2(c: char) -> bool {
-    c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '='
+fn is_safe_char2(c: u8) -> bool {
+    c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
 }
 
 /*
@@ -111,28 +124,30 @@ fn is_safe_char(c: char) -> bool {
         (c >= '\x3e' && c <= '\x7e')
 }*/
 
-fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> {
+fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
     map(tag("_"), |_| QuotedChunk::Space)(input)
 }
 
-fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
+fn hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
     use nom::error::*;
 
-    let (rest, hstr) = preceded(tag("="), take(2usize))(input)?;
+    let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
 
-    let parsed = u8::from_str_radix(hstr, 16)
+    let (hstr, _) = encoding_rs::UTF_8.decode_without_bom_handling(hbytes);
+
+    let parsed = u8::from_str_radix(hstr.as_ref(), 16)
         .map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
 
     Ok((rest, QuotedChunk::Encoded(parsed)))
 }
 
 //base64 (maybe use a crate)
-pub fn btext(input: &str) -> IResult<&str, &str> {
+pub fn btext(input: &[u8]) -> IResult<&[u8], &[u8]> {
     terminated(take_while(is_bchar), many0(tag("=")))(input)
 }
 
-fn is_bchar(c: char) -> bool {
-    c.is_ascii_alphanumeric() || c == '+' || c == '/'
+fn is_bchar(c: u8) -> bool {
+    is_alphanumeric(c) || c == ascii::PLUS || c == ascii::SLASH
 }
 
 #[cfg(test)]
diff --git a/src/text/misc_token.rs b/src/text/misc_token.rs
new file mode 100644
index 0000000..35869fe
--- /dev/null
+++ b/src/text/misc_token.rs
@@ -0,0 +1,166 @@
+use nom::{
+    branch::alt,
+    bytes::complete::take_while1,
+    character::complete::space0,
+    combinator::{into, map, opt},
+    multi::{many0, many1},
+    sequence::{preceded, tuple},
+    IResult,
+};
+use std::borrow::Cow;
+
+use crate::text::{
+    quoted::quoted_string,
+    whitespace::{fws, is_obs_no_ws_ctl},
+    words::{atom, is_vchar},
+    encoding::{self, encoded_word},
+    buffer,
+    ascii,
+};
+
+#[derive(Debug, PartialEq, Default)]
+pub struct PhraseList(pub Vec<String>);
+
+/*
+impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
+    type Error = IMFError<'a>;
+
+    fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
+        unstructured(input.0)
+            .map(|(_, v)| Unstructured(v))
+            .map_err(|e| IMFError::Unstructured(e))
+    }
+}
+
+impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
+    type Error = IMFError<'a>;
+
+    fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
+        separated_list1(tag(","), phrase)(p.0)
+            .map(|(_, q)| PhraseList(q))
+            .map_err(|e| IMFError::PhraseList(e))
+    }
+}*/
+
+pub enum Word<'a> {
+    Quoted(buffer::Text<'a>),
+    Encoded(encoding::EncodedWord<'a>),
+    Atom(&'a [u8]),
+}
+impl<'a> Word<'a> {
+    pub fn to_string(&self) -> String {
+        match self {
+            Word::Quoted(v) => v.to_string(),
+            Word::Encoded(v) => v.to_string(),
+            Word::Atom(v) => v.to_string(),
+        }
+    }
+}
+
+/// Word
+///
+/// ```abnf
+///    word            =   atom / quoted-string
+/// ```
+pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
+    alt((
+        map(quoted_string, |v| Word::Quoted(v)), 
+        map(encoded_word, |v| Word::Encoded(v)),
+        map(atom, |v| Word::Atom(v))
+    ))(input)
+}
+
+pub struct Phrase<'a>(pub Vec<Word<'a>>);
+impl<'a> Phrase<'a> {
+    pub fn to_string(&self) -> String {
+        self.0.join(" ")
+    }
+}
+
+/// Phrase
+///
+/// ```abnf
+///    phrase          =   1*word / obs-phrase
+/// ```
+pub fn phrase(input: &[u8]) -> IResult<&[u8], Phrase> {
+    let (input, phrase) = map(many1(word), |v| Phrase(v))(input)?;
+    Ok((input, phrase))
+}
+
+/// Compatible unstructured input
+///
+/// ```abnf
+/// obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
+/// ```
+fn is_unstructured(c: u8) -> bool {
+    is_vchar(c) || is_obs_no_ws_ctl(c) || c == ascii::NULL
+}
+
+enum UnstrToken<'a> {
+    Init,
+    Encoded(encoding::EncodedWord<'a>),
+    Plain(&'a [u8]),
+}
+impl<'a> UnstrToken<'a> {
+    pub fn to_string(&self) -> String {
+        match self {
+            UnstrToken::Init => "".into(),
+            UnstrToken::Encoded(e) => e.to_string(),
+            UnstrToken::Plain(e) => encoding_rs::UTF_8.decode_without_bom_handling(e).into_owned(),
+        }
+    }
+}
+
+pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
+impl<'a> Unstructured<'a> {
+    pub fn to_string(&self) -> String {
+        self.0.iter().fold(
+            (&UnstrToken::Init, String::new()),
+            |(prev_token, result), current_token| {
+                match (prev_token, current_token) {
+                    (UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()),
+                    (UnstrToken::EncodedWord(_), UnstrToken::EncodedWord(v)) => result.push_str(v.to_string()).as_ref(),
+                    (_, v) => {
+                        result.push(' ');
+                        result.push_str(v.to_string().as_ref())
+                    },
+                };
+
+                result
+            }
+        )
+    }
+}
+
+/// Unstructured header field body
+///
+/// ```abnf
+/// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
+/// ```
+pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured> {
+    let (input, r) = many0(preceded(opt(fws), alt((
+                        map(encoded_word, |v| UnstrToken::Encoded(v)), 
+                        map(take_while1(is_unstructured), |v| UnstrToken::Plain(v)),
+                    ))))(input)?;
+
+    let (input, _) = space0(input)?;
+    Ok((input, Unstructured(r)))
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_phrase() {
+        assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
+        assert_eq!(
+            phrase("salut \"le\" monde"),
+            Ok(("", "salut le monde".into()))
+        );
+        assert_eq!(
+            phrase("fin\r\n du\r\nmonde"),
+            Ok(("\r\nmonde", "fin du".into()))
+        );
+    }
+}
diff --git a/src/text/mod.rs b/src/text/mod.rs
new file mode 100644
index 0000000..6baecdb
--- /dev/null
+++ b/src/text/mod.rs
@@ -0,0 +1,7 @@
+pub mod ascii;
+pub mod encoding;
+pub mod misc_token;
+pub mod quoted;
+pub mod whitespace;
+pub mod words;
+pub mod buffer;
diff --git a/src/fragments/quoted.rs b/src/text/quoted.rs
similarity index 55%
rename from src/fragments/quoted.rs
rename to src/text/quoted.rs
index 261f499..78ef7a3 100644
--- a/src/fragments/quoted.rs
+++ b/src/text/quoted.rs
@@ -1,14 +1,16 @@
 use nom::{
     branch::alt,
-    bytes::complete::tag,
-    character::complete::{anychar, satisfy},
-    combinator::opt,
+    bytes::complete::{take_while1, tag},
+    character::complete::anychar,
+    combinator::{recognize, opt},
     multi::many0,
     sequence::{pair, preceded},
     IResult,
 };
 
-use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
+use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
+use crate::text::ascii;
+use crate::text::buffer;
 
 /// Quoted pair
 ///
@@ -16,8 +18,8 @@ use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
 ///    quoted-pair     =   ("\" (VCHAR / WSP)) / obs-qp
 ///    obs-qp          =   "\" (%d0 / obs-NO-WS-CTL / LF / CR)
 /// ```
-pub fn quoted_pair(input: &str) -> IResult<&str, char> {
-    preceded(tag("\\"), anychar)(input)
+pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], u8> {
+    preceded(tag(&[ascii::SLASH]), anychar)(input)
 }
 
 /// Allowed characters in quote
@@ -28,11 +30,11 @@ pub fn quoted_pair(input: &str) -> IResult<&str, char> {
 ///                       %d93-126 /         ;  "\" or the quote character
 ///                       obs-qtext
 /// ```
-fn is_restr_qtext(c: char) -> bool {
-    c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E')
+fn is_restr_qtext(c: u8) -> bool {
+    c == ascii::EXCLAMATION || (c >= ascii::NUM && c <= ascii::LEFT_BRACKET) || (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
 }
 
-fn is_qtext(c: char) -> bool {
+fn is_qtext(c: u8) -> bool {
     is_restr_qtext(c) || is_obs_no_ws_ctl(c)
 }
 
@@ -41,8 +43,8 @@ fn is_qtext(c: char) -> bool {
 /// ```abnf
 ///   qcontent        =   qtext / quoted-pair
 /// ```
-fn qcontent(input: &str) -> IResult<&str, char> {
-    alt((satisfy(is_qtext), quoted_pair))(input)
+fn qcontent(input: &u8) -> IResult<&[u8], &[u8]> {
+    alt((take_while1(is_qtext), recognize(quoted_pair)))(input)
 }
 
 /// Quoted string
@@ -52,7 +54,7 @@ fn qcontent(input: &str) -> IResult<&str, char> {
 ///                     DQUOTE *([FWS] qcontent) [FWS] DQUOTE
 ///                     [CFWS]
 /// ```
-pub fn quoted_string(input: &str) -> IResult<&str, String> {
+pub fn quoted_string(input: &[u8]) -> IResult<&[u8], buffer::Text> {
     let (input, _) = opt(cfws)(input)?;
     let (input, _) = tag("\"")(input)?;
     let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
@@ -60,11 +62,11 @@ pub fn quoted_string(input: &str) -> IResult<&str, String> {
     // Rebuild string
     let mut qstring = content
         .iter()
-        .fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
+        .fold(buffer::Text::default(), |mut acc, (maybe_wsp, c)| {
             if let Some(wsp) = maybe_wsp {
-                acc.push(*wsp);
+                acc.push(&[ascii::SP]);
             }
-            acc.push(*c);
+            acc.push(c);
             acc
         });
 
@@ -84,13 +86,22 @@ mod tests {
 
     #[test]
     fn test_quoted_string() {
+        let mut text = buffer::Text::default();
+        text.push(b"hello");
+        text.push(&[ascii::DQUOTE]);
+        text.push(b"world");
         assert_eq!(
-            quoted_string(" \"hello\\\"world\" "),
-            Ok(("", "hello\"world".to_string()))
+            quoted_string(b" \"hello\\\"world\" "),
+            Ok(("", text))
         );
+
+        let mut text = buffer::Text::default();
+        text.push(b"hello");
+        text.push(&[ascii::SP]);
+        text.push(b"world");
         assert_eq!(
-            quoted_string("\"hello\r\n world\""),
-            Ok(("", "hello world".to_string()))
+            quoted_string(b"\"hello\r\n world\""),
+            Ok(("", text))
         );
     }
 }
diff --git a/src/fragments/whitespace.rs b/src/text/whitespace.rs
similarity index 67%
rename from src/fragments/whitespace.rs
rename to src/text/whitespace.rs
index 08b8a2d..28050b2 100644
--- a/src/fragments/whitespace.rs
+++ b/src/text/whitespace.rs
@@ -1,71 +1,68 @@
-use crate::fragments::quoted::quoted_pair;
 use nom::{
     branch::alt,
-    bytes::complete::{is_not, tag},
-    character::complete::{crlf, satisfy, space0, space1},
+    bytes::complete::{is_not, tag, take_while1},
+    character::complete::{space0, space1},
     combinator::{opt, recognize},
     multi::{many0, many1},
-    sequence::{pair, terminated, tuple},
+    sequence::{pair, tuple},
     IResult,
 };
-use crate::fragments::encoding::encoded_word;
+use crate::text::encoding::encoded_word;
+use crate::text::quoted::quoted_pair;
+use crate::text::ascii;
 
 /// Whitespace (space, new line, tab) content and 
 /// delimited content (eg. comment, line, sections, etc.)
 
-// Bytes CRLF
-const CR: u8 = 0x0D;
-const LF: u8 = 0x0A;
-pub const CRLF: &[u8] = &[CR, LF];
+/// Obsolete/Compatible CRLF
+///
+/// Theoretically, all lines must end with \r\n
+/// but some mail servers like Dovecot support malformated emails,
+/// for example with only \n eol. It works because
+/// \r or \n is allowed nowhere else, so we also add this support.
 
-pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
-    terminated(recognize(many0(line)), obs_crlf)(input)
+pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    alt((tag(ascii::CRLF), tag(&[ascii::CR]), tag(&[ascii::LF])))(input)
 }
-
-pub fn fields(input: &str) -> IResult<&str, Vec<&str>> {
-    all_consuming(many0(foldable_line))(input)
-} 
-
 pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
     // is_not(CRLF) is a hack, it means "is not CR or LF"
     // and not "is not CRLF". In other words, it continues while
     // it does not encounter 0x0D or 0x0A.
-    pair(is_not(CRLF), obs_crlf)(input)
+    pair(is_not(ascii::CRLF), obs_crlf)(input)
 }
 
-pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
-    alt((tag(CRLF), tag(&[CR]), tag(&[LF])))(input)
+/// ```abnf
+/// fold_line = any *(1*(crlf WS) any) crlf
+/// ```
+pub fn foldable_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    recognize(tuple((
+        is_not(ascii::CRLF),
+        many0(pair(
+            many1(pair(obs_crlf, space1)),
+            is_not(ascii::CRLF),
+        )),
+        obs_crlf,
+    )))(input)
 }
 
-
 // --- whitespaces and comments
 
 // Note: WSP = SP / HTAB = %x20 / %x09
 // nom::*::space0 = *WSP
 // nom::*::space1 = 1*WSP
 
-/// Permissive CRLF
-///
-/// Theoretically, all lines must end with \r\n
-/// but some mail servers like Dovecot support malformated emails,
-/// for example with only \n eol. It works because
-/// \r or \n is allowed nowhere else, so we also add this support.
-pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
-    alt((crlf, tag("\r"), tag("\n")))(input)
-}
-
 /// Permissive foldable white space
 ///
 /// Folding white space are used for long headers splitted on multiple lines.
 /// The obsolete syntax allowes multiple lines without content; implemented for compatibility
 /// reasons
-pub fn fws(input: &str) -> IResult<&str, char> {
+pub fn fws(input: &[u8]) -> IResult<&[u8], u8> {
     let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
-    Ok((input, ' '))
+    Ok((input, ascii::SP))
 }
-fn fold_marker(input: &str) -> IResult<&str, &str> {
+fn fold_marker(input: &[u8]) -> IResult<&[u8], &[u8]> {
     let (input, _) = space0(input)?;
-    let (input, _) = perm_crlf(input)?;
+    let (input, _) = obs_crlf(input)?;
     space1(input)
 }
 
@@ -85,17 +82,17 @@ fn fold_marker(input: &str) -> IResult<&str, &str> {
 ///
 ///   CFWS            =   (1*([FWS] comment) [FWS]) / FWS
 /// ```
-pub fn cfws(input: &str) -> IResult<&str, &str> {
+pub fn cfws(input: &[u8]) -> IResult<&[u8], &[u8]> {
     alt((recognize(comments), recognize(fws)))(input)
 }
 
-pub fn comments(input: &str) -> IResult<&str, ()> {
+pub fn comments(input: &[u8]) -> IResult<&[u8], ()> {
     let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
     let (input, _) = opt(fws)(input)?;
     Ok((input, ()))
 }
 
-pub fn comment(input: &str) -> IResult<&str, ()> {
+pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
     let (input, _) = tag("(")(input)?;
     let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
     let (input, _) = opt(fws)(input)?;
@@ -103,12 +100,16 @@ pub fn comment(input: &str) -> IResult<&str, ()> {
     Ok((input, ()))
 }
 
-pub fn ccontent(input: &str) -> IResult<&str, &str> {
-    alt((recognize(ctext), recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
+pub fn ccontent(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    alt((ctext, recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
 }
 
-pub fn ctext(input: &str) -> IResult<&str, char> {
-    satisfy(is_ctext)(input)
+pub fn ctext(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while1(is_ctext)(input)
+}
+
+pub fn is_ctext(c: u8) -> bool {
+    is_restr_ctext(c) || is_obs_no_ws_ctl(c)
 }
 
 /// Check if it's a comment text character
@@ -119,15 +120,10 @@ pub fn ctext(input: &str) -> IResult<&str, char> {
 ///                       %d93-126 /         ;  "(", ")", or "\"
 ///                       obs-ctext
 ///```
-pub fn is_restr_ctext(c: char) -> bool {
-    (c >= '\x21' && c <= '\x27')
-        || (c >= '\x2A' && c <= '\x5B')
-        || (c >= '\x5D' && c <= '\x7E')
-        || !c.is_ascii()
-}
-
-pub fn is_ctext(c: char) -> bool {
-    is_restr_ctext(c) || is_obs_no_ws_ctl(c)
+pub fn is_restr_ctext(c: u8) -> bool {
+    (c >= ascii::EXCLAMATION && c <= ascii::SQUOTE)
+        || (c >= ascii::ASTERISK && c <= ascii::LEFT_BRACKET)
+        || (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
 }
 
 /// US ASCII control characters without effect
@@ -139,12 +135,12 @@ pub fn is_ctext(c: char) -> bool {
 ///                       %d14-31 /          ;  return, line feed, and
 ///                       %d127              ;  white space characters
 /// ```
-pub fn is_obs_no_ws_ctl(c: char) -> bool {
-    (c >= '\x01' && c <= '\x08')
-        || c == '\x0b'
-        || c == '\x0b'
-        || (c >= '\x0e' && c <= '\x1f')
-        || c == '\x7F'
+pub fn is_obs_no_ws_ctl(c: u8) -> bool {
+    (c >= ascii::SOH && c <= ascii::BS)
+        || c == ascii::VT 
+        || c == ascii::FF
+        || (c >= ascii::SO && c <= ascii::US)
+        || c == ascii::DEL
 }
 
 #[cfg(test)]
@@ -152,10 +148,10 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_perm_crlf() {
-        assert_eq!(perm_crlf("\rworld"), Ok(("world", "\r")));
-        assert_eq!(perm_crlf("\r\nworld"), Ok(("world", "\r\n")));
-        assert_eq!(perm_crlf("\nworld"), Ok(("world", "\n")));
+    fn test_obs_crlf() {
+        assert_eq!(obs_crlf("\rworld"), Ok(("world", "\r")));
+        assert_eq!(obs_crlf("\r\nworld"), Ok(("world", "\r\n")));
+        assert_eq!(obs_crlf("\nworld"), Ok(("world", "\n")));
     }
 
     #[test]
diff --git a/src/text/words.rs b/src/text/words.rs
new file mode 100644
index 0000000..6a50d7a
--- /dev/null
+++ b/src/text/words.rs
@@ -0,0 +1,133 @@
+use crate::text::whitespace::cfws;
+use crate::text::ascii;
+use nom::{
+    bytes::complete::{tag, take_while1},
+    character::is_alphanumeric,
+    combinator::{opt, recognize},
+    multi::many0,
+    sequence::{delimited, pair},
+    IResult,
+};
+
+pub fn is_vchar(c: u8) -> bool {
+    c >= ascii::EXCLAMATION && c <= ascii::TILDE
+}
+
+/// MIME Token allowed characters
+///
+/// forbidden: ()<>@,;:\"/[]?=
+fn is_mime_token_text(c: u8) -> bool {
+    is_alphanumeric(c) 
+    || c == ascii::EXCLAMATION
+    || c == ascii::NUM
+    || c == ascii::DOLLAR
+    || c == ascii::PERCENT
+    || c == ascii::AMPERSAND
+    || c == ascii::SQUOTE
+    || c == ascii::ASTERISK
+    || c == ascii::PLUS
+    || c == ascii::MINUS
+    || c == ascii::PERIOD
+    || c == ascii::CARRET
+    || c == ascii::UNDERSCORE
+    || c == ascii::GRAVE
+    || c == ascii::LEFT_CURLY
+    || c == ascii::PIPE
+    || c == ascii::RIGHT_CURLY
+    || c == ascii::TILDE
+}
+
+/// MIME Token
+///
+/// `[CFWS] 1*token_text [CFWS]`
+pub fn mime_token(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    delimited(opt(cfws), take_while1(is_mime_token_text), opt(cfws))(input)
+}
+
+/// Atom allowed characters
+///
+/// authorized: !#$%&'*+-/=?^_`{|}~
+fn is_atext(c: u8) -> bool {
+    is_alphanumeric(c)
+    || c == ascii::EXCLAMATION
+    || c == ascii::NUM
+    || c == ascii::DOLLAR
+    || c == ascii::PERCENT
+    || c == ascii::AMPERSAND
+    || c == ascii::SQUOTE
+    || c == ascii::ASTERISK
+    || c == ascii::PLUS
+    || c == ascii::MINUS
+    || c == ascii::SLASH
+    || c == ascii::EQ
+    || c == ascii::QUESTION
+    || c == ascii::CARRET
+    || c == ascii::UNDERSCORE
+    || c == ascii::GRAVE
+    || c == ascii::LEFT_CURLY
+    || c == ascii::PIPE
+    || c == ascii::RIGHT_CURLY
+    || c == ascii::TILDE
+}
+
+/// Atom
+///
+/// `[CFWS] 1*atext [CFWS]`
+pub fn atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
+}
+
+/// dot-atom-text
+///
+/// `1*atext *("." 1*atext)`
+pub fn dot_atom_text(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    recognize(pair(
+        take_while1(is_atext),
+        many0(pair(tag("."), take_while1(is_atext))),
+    ))(input)
+}
+
+/// dot-atom
+///
+/// `[CFWS] dot-atom-text [CFWS]`
+pub fn dot_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_atext() {
+        assert!(is_atext('=' as u8));
+        assert!(is_atext('5' as u8));
+        assert!(is_atext('Q' as u8));
+        assert!(!is_atext(' ' as u8));
+        //assert!(is_atext('É')); // support utf8
+    }
+
+    #[test]
+    fn test_atom() {
+        assert_eq!(
+            atom(b"(skip)  imf_codec (hidden) aerogramme"),
+            Ok((&b"aerogramme"[..], &b"imf_codec"[..]))
+        );
+    }
+
+    #[test]
+    fn test_dot_atom_text() {
+        assert_eq!(
+            dot_atom_text("quentin.dufour.io abcdef"),
+            Ok((" abcdef", "quentin.dufour.io"))
+        );
+    }
+
+    #[test]
+    fn test_dot_atom() {
+        assert_eq!(
+            dot_atom("   (skip) quentin.dufour.io abcdef"),
+            Ok(("abcdef", "quentin.dufour.io"))
+        );
+    }
+}
diff --git a/tests/enron.rs b/tests/enron.rs
deleted file mode 100644
index 8020bd9..0000000
--- a/tests/enron.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-use imf_codec::fragments::section;
-use imf_codec::multipass;
-use std::collections::HashSet;
-use std::fs::File;
-use std::io::Read;
-use std::path::PathBuf;
-use walkdir::WalkDir;
-
-fn parser<'a, F>(input: &'a [u8], func: F) -> ()
-where
-    F: FnOnce(&section::Section) -> (),
-{
-    let seg = multipass::segment::new(input).unwrap();
-    let charset = seg.charset();
-    let fields = charset.fields().unwrap();
-    let field_names = fields.names();
-    let field_body = field_names.body();
-    let section = field_body.section();
-
-    func(&section.fields);
-}
-
-#[test]
-#[ignore]
-fn test_enron500k() {
-    let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
-    d.push("resources/enron/maildir/");
-    let prefix_sz = d.as_path().to_str().unwrap().len();
-    //d.push("williams-w3/");
-
-    let known_bad_fields = HashSet::from([
-        "white-s/calendar/113.",       // To: east <7..>
-        "skilling-j/inbox/223.",       // From: pep <performance.>
-        "jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
-        "jones-t/notes_inbox/3303.",   // To: <"tibor.vizkelety":@enron.com>
-        "lokey-t/calendar/33.",        // A second Date entry for the calendar containing
-        // Date:       Monday, March 12
-        "zipper-a/inbox/199.",                       // To: e-mail <mari.>
-        "dasovich-j/deleted_items/128.",             // To: f62489 <g>
-        "dasovich-j/all_documents/677.",             // To: w/assts <govt.>
-        "dasovich-j/all_documents/8984.",            // To: <"ft.com.users":@enron.com>
-        "dasovich-j/all_documents/3514.",            // To: <"ft.com.users":@enron.com>
-        "dasovich-j/all_documents/4467.",            // To: <"ft.com.users":@enron.com>
-        "dasovich-j/all_documents/578.",             // To: w/assts <govt.>
-        "dasovich-j/all_documents/3148.",            // To: <"economist.com.readers":@enron.com>
-        "dasovich-j/all_documents/9953.",            // To: <"economist.com.reader":@enron.com>
-        "dasovich-j/risk_analytics/3.",              // To: w/assts <govt.>
-        "dasovich-j/notes_inbox/5391.",              // To: <"ft.com.users":@enron.com>
-        "dasovich-j/notes_inbox/4952.",              // To: <"economist.com.reader":@enron.com>
-        "dasovich-j/notes_inbox/2386.",              // To: <"ft.com.users":@enron.com>
-        "dasovich-j/notes_inbox/1706.",              // To: <"ft.com.users":@enron.com>
-        "dasovich-j/notes_inbox/1489.",              // To: <"economist.com.readers":@enron.com>
-        "dasovich-j/notes_inbox/5.",                 // To: w/assts <govt.>
-        "kaminski-v/sites/19.",                      // To: <"the.desk":@enron.com>
-        "kaminski-v/sites/1.",                       // To: <"the.desk":@enron.com>
-        "kaminski-v/discussion_threads/5082.",       // To: <"ft.com.users":@enron.com>
-        "kaminski-v/discussion_threads/4046.",       // To: <"the.desk":@enron.com>
-        "kaminski-v/discussion_threads/4187.",       // To: <"the.desk":@enron.com>
-        "kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
-        "kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
-        "kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
-        "kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
-        "kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
-        "kaminski-v/all_documents/92.",   // To: <"the.desk":@enron.com>
-        "kaminski-v/all_documents/276.",  // To: <"the.desk":@enron.com>
-        "kaminski-v/technical/1.",        // To: <"the.desk":@enron.com>
-        "kaminski-v/technical/7.",        // To: <"the.desk":@enron.com>
-        "kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
-        "kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
-        "kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
-        "kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
-        "kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
-        "kean-s/reg_risk/9.", // To: w/assts <govt.>
-        "kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
-        "kean-s/discussion_threads/577.", // To: w/assts <govt.>
-        "kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
-        "kean-s/calendar/untitled/640.",  // To: w/assts <govt.>
-        "kean-s/all_documents/640.",      // To: w/assts <govt.>
-        "kean-s/all_documents/1095.",     // To: w/assts <govt.>
-        "kean-s/attachments/2030.",       // To: w/assts <govt.>
-        "williams-w3/operations_committee_isas/10.", // To: z34655 <m>
-    ]);
-
-    let known_bad_from = HashSet::from([
-        "skilling-j/inbox/223.", // From: pep <performance.>
-    ]);
-
-    let mut i = 0;
-    for entry in WalkDir::new(d.as_path())
-        .into_iter()
-        .filter_map(|file| file.ok())
-    {
-        if entry.metadata().unwrap().is_file() {
-            let mail_path = entry.path();
-            let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
-
-            // read file
-            let mut raw = Vec::new();
-            let mut f = File::open(mail_path).unwrap();
-            f.read_to_end(&mut raw).unwrap();
-
-            // parse
-            parser(&raw, |hdrs| {
-                let ok_date = hdrs.date.is_some();
-                let ok_from = hdrs.from.len() > 0;
-                let ok_fields = hdrs.bad_fields.len() == 0;
-
-                if !ok_date || !ok_from || !ok_fields {
-                    println!("Issue with: {}", suffix);
-                }
-
-                assert!(ok_date);
-
-                if !known_bad_from.contains(suffix) {
-                    assert!(ok_from);
-                }
-
-                if !known_bad_fields.contains(suffix) {
-                    assert!(ok_fields);
-                }
-
-                i += 1;
-                if i % 1000 == 0 {
-                    println!("Analyzed emails: {}", i);
-                }
-            })
-        }
-    }
-}
diff --git a/tests/known.rs b/tests/known.rs
deleted file mode 100644
index 3cd756d..0000000
--- a/tests/known.rs
+++ /dev/null
@@ -1,340 +0,0 @@
-use chrono::{FixedOffset, TimeZone};
-use imf_codec::fragments::{misc_token, model, section, part, trace};
-use imf_codec::multipass;
-use std::collections::HashMap;
-
-fn parser<'a, F>(input: &'a [u8], func: F) -> ()
-where
-    F: FnOnce(&section::Section) -> (),
-{
-    let seg = multipass::segment::new(input).unwrap();
-    let charset = seg.charset();
-    let fields = charset.fields().unwrap();
-    let field_names = fields.names();
-    let field_body = field_names.body();
-    let section = field_body.section();
-
-    func(&section.fields);
-}
-
-#[test]
-fn test_headers() {
-    let fullmail: &[u8] = r#"Return-Path: <gitlab@example.com>
-Delivered-To: quentin@example.com
-Received: from smtp.example.com ([10.83.2.2])
-	by doradille with LMTP
-	id xyzabcd
-	(envelope-from <gitlab@example.com>)
-	for <quentin@example.com>; Tue, 13 Jun 2023 19:01:08 +0000
-Date: Tue, 13 Jun 2023 10:01:10 +0200
-From: Mary Smith
- <mary@example.net>, "A\lan" <alan@example>
-Sender: imf@example.com
-Reply-To: "Mary Smith: Personal Account" <smith@home.example>
-To: John Doe <jdoe@machine.example>
-Cc: imf2@example.com
-Bcc: (hidden)
-Subject: Re: Saying Hello
-Comments: A simple message
-Comments: Not that complicated
-comments : not valid header name but should be accepted
-    by the parser.
-Keywords: hello, world
-Héron: Raté
- Raté raté
-Keywords: salut, le, monde
-Not a real header but should still recover
-Message-ID: <3456@example.net>
-In-Reply-To: <1234@local.machine.example>
-References: <1234@local.machine.example>
-Unknown: unknown
-
-This is a reply to your hello.
-"#
-    .as_bytes();
-    parser(fullmail, |parsed_section| {
-        assert_eq!(
-            parsed_section,
-            &section::Section {
-                date: Some(
-                    &FixedOffset::east_opt(2 * 3600)
-                        .unwrap()
-                        .with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
-                        .unwrap()
-                ),
-
-                from: vec![
-                    &model::MailboxRef {
-                        name: Some("Mary Smith".into()),
-                        addrspec: model::AddrSpec {
-                            local_part: "mary".into(),
-                            domain: "example.net".into(),
-                        }
-                    },
-                    &model::MailboxRef {
-                        name: Some("Alan".into()),
-                        addrspec: model::AddrSpec {
-                            local_part: "alan".into(),
-                            domain: "example".into(),
-                        }
-                    }
-                ],
-
-                sender: Some(&model::MailboxRef {
-                    name: None,
-                    addrspec: model::AddrSpec {
-                        local_part: "imf".into(),
-                        domain: "example.com".into(),
-                    }
-                }),
-
-                reply_to: vec![&model::AddressRef::Single(model::MailboxRef {
-                    name: Some("Mary Smith: Personal Account".into()),
-                    addrspec: model::AddrSpec {
-                        local_part: "smith".into(),
-                        domain: "home.example".into(),
-                    }
-                })],
-
-                to: vec![&model::AddressRef::Single(model::MailboxRef {
-                    name: Some("John Doe".into()),
-                    addrspec: model::AddrSpec {
-                        local_part: "jdoe".into(),
-                        domain: "machine.example".into(),
-                    }
-                })],
-
-                cc: vec![&model::AddressRef::Single(model::MailboxRef {
-                    name: None,
-                    addrspec: model::AddrSpec {
-                        local_part: "imf2".into(),
-                        domain: "example.com".into(),
-                    }
-                })],
-
-                bcc: vec![],
-
-                msg_id: Some(&model::MessageId {
-                    left: "3456",
-                    right: "example.net"
-                }),
-                in_reply_to: vec![&model::MessageId {
-                    left: "1234",
-                    right: "local.machine.example"
-                }],
-                references: vec![&model::MessageId {
-                    left: "1234",
-                    right: "local.machine.example"
-                }],
-
-                subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
-
-                comments: vec![
-                    &misc_token::Unstructured("A simple message".into()),
-                    &misc_token::Unstructured("Not that complicated".into()),
-                    &misc_token::Unstructured(
-                        "not valid header name but should be accepted by the parser.".into()
-                    ),
-                ],
-
-                keywords: vec![
-                    &misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
-                    &misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
-                ],
-
-                received: vec![&trace::ReceivedLog(
-                    r#"from smtp.example.com ([10.83.2.2])
-	by doradille with LMTP
-	id xyzabcd
-	(envelope-from <gitlab@example.com>)
-	for <quentin@example.com>"#
-                )],
-
-                return_path: vec![&model::MailboxRef {
-                    name: None,
-                    addrspec: model::AddrSpec {
-                        local_part: "gitlab".into(),
-                        domain: "example.com".into(),
-                    }
-                }],
-
-                optional: HashMap::from([
-                    (
-                        "Delivered-To",
-                        &misc_token::Unstructured("quentin@example.com".into())
-                    ),
-                    ("Unknown", &misc_token::Unstructured("unknown".into())),
-                ]),
-
-                bad_fields: vec![],
-
-                unparsed: vec![
-                    "Héron: Raté\n Raté raté\n",
-                    "Not a real header but should still recover\n",
-                ],
-                ..section::Section::default()
-            }
-        )
-    })
-}
-
-#[test]
-fn test_headers_mime() {
-    use imf_codec::fragments::mime;
-    let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
-    =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-MIME-Version: 1.0
-Content-Type: text/plain; charset=ISO-8859-1
-Content-Transfer-Encoding: quoted-printable
-Content-ID: <a@example.com>
-Content-Description: hello
-
-Now's the time =
-for all folk to come=
- to the aid of their country.
-"#
-    .as_bytes();
-
-   parser(fullmail, |parsed_section| {
-        assert_eq!(
-            parsed_section,
-            &section::Section {
-                from: vec![
-                    &model::MailboxRef {
-                        name: Some("Keith Moore".into()),
-                        addrspec: model::AddrSpec {
-                            local_part: "moore".into(),
-                            domain: "cs.utk.edu".into(),
-                        }
-                    },
-                ],
-
-                to: vec![&model::AddressRef::Single(model::MailboxRef {
-                    name: Some("Keld Jørn Simonsen".into()),
-                    addrspec: model::AddrSpec {
-                        local_part: "keld".into(),
-                        domain: "dkuug.dk".into(),
-                    }
-                })],
-
-                cc: vec![&model::AddressRef::Single(model::MailboxRef {
-                    name: Some("André Pirard".into()),
-                    addrspec: model::AddrSpec {
-                        local_part: "PIRARD".into(),
-                        domain: "vm1.ulg.ac.be".into(),
-                    }
-                })],
-
-                subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
-                mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
-                mime: section::MIMESection {
-                    content_type: Some(&mime::Type::Text(mime::TextDesc { 
-                        charset: Some(mime::EmailCharset::ISO_8859_1), 
-                        subtype: mime::TextSubtype::Plain, 
-                        unknown_parameters: vec![]
-                    })),
-                    content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
-                    content_id: Some(&model::MessageId {
-                        left: "a",
-                        right: "example.com"
-                    }),
-                    content_description: Some(&misc_token::Unstructured("hello".into())),
-                    ..section::MIMESection::default()
-                },
-                ..section::Section::default()
-            }
-        );
-   })
-}
-
-fn parser_bodystruct<'a, F>(input: &'a [u8], func: F) -> ()
-where
-    F: FnOnce(&part::PartNode) -> (),
-{
-    let seg = multipass::segment::new(input).unwrap();
-    let charset = seg.charset();
-    let fields = charset.fields().unwrap();
-    let field_names = fields.names();
-    let field_body = field_names.body();
-    let section = field_body.section();
-    let bodystruct = section.body_structure();
-
-    func(&bodystruct.body);
-}
-
-#[test]
-fn test_multipart() {
-    let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
-From: Grrrnd Zero <grrrndzero@example.org>
-To: John Doe <jdoe@machine.example>
-Subject: Re: Saying Hello
-Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
-MIME-Version: 1.0
-Content-Type: multipart/alternative;
- boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
-Content-Transfer-Encoding: 7bit
-
-This is a multi-part message in MIME format.
-
---b1_e376dc71bafc953c0b0fdeb9983a9956
-Content-Type: text/plain; charset=utf-8
-Content-Transfer-Encoding: quoted-printable
-
-GZ
-OoOoO
-oOoOoOoOo
-oOoOoOoOoOoOoOoOo
-oOoOoOoOoOoOoOoOoOoOoOo
-oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
-OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
-
---b1_e376dc71bafc953c0b0fdeb9983a9956
-Content-Type: text/html; charset=us-ascii
-
-<div style="text-align: center;"><strong>GZ</strong><br />
-OoOoO<br />
-oOoOoOoOo<br />
-oOoOoOoOoOoOoOoOo<br />
-oOoOoOoOoOoOoOoOoOoOoOo<br />
-oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
-OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
-
---b1_e376dc71bafc953c0b0fdeb9983a9956--
-"#.as_bytes();
-    
-    parser_bodystruct(fullmail, |part| {
-        assert_eq!(part, &part::PartNode::Composite(
-            part::PartHeader {
-                ..part::PartHeader::default()
-            },
-            vec![
-                part::PartNode::Discrete(
-                    part::PartHeader {
-                        ..part::PartHeader::default()
-                    },
-                    r#"GZ
-OoOoO
-oOoOoOoOo
-oOoOoOoOoOoOoOoOo
-oOoOoOoOoOoOoOoOoOoOoOo
-oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
-OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO"#.as_bytes()
-                ),
-                part::PartNode::Discrete(
-                    part::PartHeader {
-                        ..part::PartHeader::default()
-                    },
-                    r#"<div style="text-align: center;"><strong>GZ</strong><br />
-OoOoO<br />
-oOoOoOoOo<br />
-oOoOoOoOoOoOoOoOo<br />
-oOoOoOoOoOoOoOoOoOoOoOo<br />
-oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
-OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />"#.as_bytes()
-                ),
-            ]));
-    });
-}