From 6b3343f1370a56c7a4af9748a5082f73f0ee3fcf Mon Sep 17 00:00:00 2001
From: Quentin Dufour <quentin@deuxfleurs.fr>
Date: Fri, 14 Jul 2023 10:43:31 +0200
Subject: [PATCH] implement mime headers

---
 src/fragments/mime.rs       | 20 +++++------
 src/fragments/misc_token.rs | 40 ++++++++++++++-------
 src/fragments/section.rs    | 14 +++++++-
 src/fragments/whitespace.rs | 11 +++++-
 tests/known.rs              | 70 +++++++++++++++++++++++++++++++++++++
 5 files changed, 131 insertions(+), 24 deletions(-)
diff --git a/src/fragments/mime.rs b/src/fragments/mime.rs
index 0004e4b..14ece11 100644
--- a/src/fragments/mime.rs
+++ b/src/fragments/mime.rs
@@ -17,8 +17,8 @@ use crate::fragments::quoted::quoted_string;
 
 #[derive(Debug, PartialEq)]
 pub struct Version {
-    major: u32,
-    minor: u32,
+    pub major: u32,
+    pub minor: u32,
 }
 
 #[derive(Debug, PartialEq)]
@@ -40,9 +40,9 @@ pub enum Type<'a> {
 
 #[derive(Debug, PartialEq)]
 pub struct MultipartDesc<'a> {
-    boundary: String,
-    subtype: MultipartSubtype<'a>,
-    unknown_parameters: Vec<Parameter<'a>>,
+    pub boundary: String,
+    pub subtype: MultipartSubtype<'a>,
+    pub unknown_parameters: Vec<Parameter<'a>>,
 }
 
 #[derive(Debug, PartialEq)]
@@ -57,8 +57,8 @@ pub enum MultipartSubtype<'a> {
 
 #[derive(Debug, PartialEq)]
 pub struct MessageDesc<'a> {
-    subtype: MessageSubtype<'a>,
-    unknown_parameters: Vec<Parameter<'a>>,
+    pub subtype: MessageSubtype<'a>,
+    pub unknown_parameters: Vec<Parameter<'a>>,
 }
 
 #[derive(Debug, PartialEq)]
@@ -71,9 +71,9 @@ pub enum MessageSubtype<'a> {
 
 #[derive(Debug, PartialEq)]
 pub struct TextDesc<'a> {
-    charset: Option<EmailCharset<'a>>,
-    subtype: TextSubtype<'a>,
-    unknown_parameters: Vec<Parameter<'a>>,
+    pub charset: Option<EmailCharset<'a>>,
+    pub subtype: TextSubtype<'a>,
+    pub unknown_parameters: Vec<Parameter<'a>>,
 }
 
 #[derive(Debug, PartialEq)]
diff --git a/src/fragments/misc_token.rs b/src/fragments/misc_token.rs
index 3f18213..11e25af 100644
--- a/src/fragments/misc_token.rs
+++ b/src/fragments/misc_token.rs
@@ -2,7 +2,7 @@ use nom::{
     branch::alt,
     bytes::complete::{tag, take_while1},
     character::complete::space0,
-    combinator::{into, opt},
+    combinator::{into, map, opt},
     multi::{many0, many1, separated_list1},
     sequence::tuple,
     IResult,
@@ -14,6 +14,7 @@ use crate::fragments::lazy;
 use crate::fragments::quoted::quoted_string;
 use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
 use crate::fragments::words::{atom, is_vchar};
+use crate::fragments::encoding::encoded_word;
 
 #[derive(Debug, PartialEq, Default)]
 pub struct Unstructured(pub String);
@@ -47,7 +48,7 @@ impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
 ///    word            =   atom / quoted-string
 /// ```
 pub fn word(input: &str) -> IResult<&str, Cow<str>> {
-    alt((into(quoted_string), into(atom)))(input)
+    alt((into(quoted_string), into(encoded_word), into(atom)))(input)
 }
 
 /// Phrase
@@ -70,31 +71,46 @@ fn is_unstructured(c: char) -> bool {
     is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00'
 }
 
+enum UnstrToken {
+    Init,
+    Encoded,
+    Plain,
+}
+
 /// Unstructured header field body
 ///
 /// ```abnf
 /// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
 /// ```
 pub fn unstructured(input: &str) -> IResult<&str, String> {
-    let (input, r) = many0(tuple((opt(fws), take_while1(is_unstructured))))(input)?;
+    let (input, r) = many0(tuple((opt(fws), alt((
+                        map(encoded_word, |v| (Cow::Owned(v), UnstrToken::Encoded)), 
+                        map(take_while1(is_unstructured), |v| (Cow::Borrowed(v), UnstrToken::Plain)),
+                    )))))(input)?;
+
     let (input, _) = space0(input)?;
 
     // Try to optimize for the most common cases
     let body = match r.as_slice() {
-        [(None, content)] => content.to_string(),
-        [(Some(_), content)] => " ".to_string() + content,
-        lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
-            let (may_ws, content) = item;
-            match may_ws {
-                Some(_) => acc + " " + content,
-                None => acc + content,
-            }
-        }),
+        // Optimization when there is only one line
+        [(None, (content, _))] | [(_, (content, UnstrToken::Encoded))] => content.to_string(),
+        [(Some(_), (content, _))] => " ".to_string() + content,
+        // Generic case, with multiple lines
+        lines => lines.iter().fold(
+            (&UnstrToken::Init, String::with_capacity(255)), 
+            |(prev_token, result), (may_ws, (content, current_token))| {
+            let new_res = match (may_ws, prev_token, current_token) {
+                (_, UnstrToken::Encoded, UnstrToken::Encoded) | (None, _, _) => result + content, 
+                _ => result + " " + content,
+            };
+            (current_token, new_res)
+        }).1,
     };
 
     Ok((input, body))
 }
 
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/fragments/section.rs b/src/fragments/section.rs
index efdd19f..4d66867 100644
--- a/src/fragments/section.rs
+++ b/src/fragments/section.rs
@@ -3,6 +3,7 @@ use std::collections::HashMap;
 use crate::fragments::eager::Field;
 use crate::fragments::lazy;
 use crate::fragments::misc_token::{PhraseList, Unstructured};
+use crate::fragments::mime::{Version,Type,Mechanism};
 use crate::fragments::model::{AddressRef, MailboxRef, MessageId};
 use crate::fragments::trace::ReceivedLog;
 use chrono::{DateTime, FixedOffset};
@@ -40,6 +41,13 @@ pub struct Section<'a> {
     // 3.6.8.  Optional Fields
     pub optional: HashMap<&'a str, &'a Unstructured>,
 
+    // MIME
+    pub mime_version: Option<&'a Version>,
+    pub content_type: Option<&'a Type<'a>>,
+    pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
+    pub content_id: Option<&'a MessageId<'a>>,
+    pub content_description: Option<&'a Unstructured>,
+
     // Recovery
     pub bad_fields: Vec<&'a lazy::Field<'a>>,
     pub unparsed: Vec<&'a str>,
@@ -71,7 +79,11 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
                     section.optional.insert(k, v);
                 }
                 Field::Rescue(v) => section.unparsed.push(v),
-                _ => todo!(),
+                Field::MIMEVersion(v) => section.mime_version = Some(v),
+                Field::ContentType(v) => section.content_type = Some(v),
+                Field::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
+                Field::ContentID(v) => section.content_id = Some(v),
+                Field::ContentDescription(v) => section.content_description = Some(v),
             }
         }
         section
diff --git a/src/fragments/whitespace.rs b/src/fragments/whitespace.rs
index 4acb8e8..57aec12 100644
--- a/src/fragments/whitespace.rs
+++ b/src/fragments/whitespace.rs
@@ -8,6 +8,7 @@ use nom::{
     sequence::tuple,
     IResult,
 };
+use crate::fragments::encoding::encoded_word;
 
 // --- whitespaces and comments
 
@@ -75,7 +76,7 @@ pub fn comment(input: &str) -> IResult<&str, ()> {
 }
 
 pub fn ccontent(input: &str) -> IResult<&str, &str> {
-    alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input)
+    alt((recognize(ctext), recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
 }
 
 pub fn ctext(input: &str) -> IResult<&str, char> {
@@ -155,4 +156,12 @@ mod tests {
             Ok(("wouch", "(double (comment) is fun) "))
         );
     }
+
+    #[test]
+    fn test_cfws_encoded_word() {
+       assert_eq!(
+            cfws("(=?US-ASCII?Q?Keith_Moore?=)"),
+            Ok(("", "(=?US-ASCII?Q?Keith_Moore?=)")),
+        );
+    }
 }
diff --git a/tests/known.rs b/tests/known.rs
index 9eac7c8..03ef6a8 100644
--- a/tests/known.rs
+++ b/tests/known.rs
@@ -172,7 +172,77 @@ This is a reply to your hello.
                     "Héron: Raté\n Raté raté\n",
                     "Not a real header but should still recover\n",
                 ],
+                ..section::Section::default()
             }
         )
     })
 }
+
+#[test]
+fn test_headers_mime() {
+    use imf_codec::fragments::mime;
+    let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
+To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
+CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
+Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
+    =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: quoted-printable
+Content-ID: <a@example.com>
+Content-Description: hello
+
+Now's the time =
+for all folk to come=
+ to the aid of their country.
+"#
+    .as_bytes();
+
+   parser(fullmail, |parsed_section| {
+        assert_eq!(
+            parsed_section,
+            &section::Section {
+                from: vec![
+                    &model::MailboxRef {
+                        name: Some("Keith Moore".into()),
+                        addrspec: model::AddrSpec {
+                            local_part: "moore".into(),
+                            domain: "cs.utk.edu".into(),
+                        }
+                    },
+                ],
+
+                to: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: Some("Keld Jørn Simonsen".into()),
+                    addrspec: model::AddrSpec {
+                        local_part: "keld".into(),
+                        domain: "dkuug.dk".into(),
+                    }
+                })],
+
+                cc: vec![&model::AddressRef::Single(model::MailboxRef {
+                    name: Some("André Pirard".into()),
+                    addrspec: model::AddrSpec {
+                        local_part: "PIRARD".into(),
+                        domain: "vm1.ulg.ac.be".into(),
+                    }
+                })],
+
+                subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
+                mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
+                content_type: Some(&mime::Type::Text(mime::TextDesc { 
+                    charset: Some(mime::EmailCharset::ISO_8859_1), 
+                    subtype: mime::TextSubtype::Plain, 
+                    unknown_parameters: vec![]
+                })),
+                content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
+                content_id: Some(&model::MessageId {
+                    left: "a",
+                    right: "example.com"
+                }),
+                content_description: Some(&misc_token::Unstructured("hello".into())),
+                ..section::Section::default()
+            }
+        );
+   })
+}