From 1e6b18de5aee9f1660a34054bf778cd6dc4350b2 Mon Sep 17 00:00:00 2001
From: Quentin Dufour <quentin@deuxfleurs.fr>
Date: Mon, 12 Jun 2023 17:20:24 +0200
Subject: [PATCH] wip atom

---
 src/headers.rs | 61 +++++++++++++++++++++++++++++++++++---------------
 src/tokens.rs  | 57 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 21 deletions(-)

diff --git a/src/headers.rs b/src/headers.rs
index bdcad2f..35f82be 100644
--- a/src/headers.rs
+++ b/src/headers.rs
@@ -46,6 +46,7 @@ pub fn header_section(input: &str) -> IResult<&str, PermissiveHeaderSection> {
     Ok((input, headers))
 }
 
+#[derive(Debug)]
 enum HeaderField<'a> {
     // 3.6.1.  The Origination Date Field
     Date(HeaderDate),
@@ -86,16 +87,17 @@ enum HeaderField<'a> {
     Optional(&'a str, String)
 }
 
-/// Extract one header field
+/// Parse one header field
 ///
-/// Derived grammar inspired by RFC5322 optional-field:
+/// RFC5322 optional-field seems to be a generalization of the field terminology.
+/// We use it to parse all header names:
 /// 
 /// ```abnf
 /// field      =   field-name ":" unstructured CRLF
 /// field-name =   1*ftext
 /// ftext      =   %d33-57 /          ; Printable US-ASCII
 ///                %d59-126           ;  characters not including
-///                                        ;  ":".
+///                                   ;  ":".
 /// ```
 fn header_field(input: &str) -> IResult<&str, HeaderField> {
     // Extract field name
@@ -104,21 +106,8 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
 
     // Extract field body
     let (input, hfield) = match field_name {
-        "Date" => {
-            // @FIXME want to extract datetime our way in the future
-            // to better handle obsolete/bad cases instead of crashing.
-            let (input, raw_date) = unstructured(input)?;
-            let date = match DateTime::parse_from_rfc2822(&raw_date) {
-                Ok(chronodt) => HeaderDate::Parsed(chronodt),
-                Err(e) => HeaderDate::Unknown(raw_date, e),
-            };
-            (input, HeaderField::Date(date))
-        },
-        "From" => {
-           let (input, mbx) = mailbox(input)?;
-           //many0(
-           unimplemented!()
-        },
+        "Date" => datetime(input)?,
+        "From" => from(input)?,
         "Sender" => unimplemented!(),
         "Subject" => {
             let (input, body) = unstructured(input)?;
@@ -130,6 +119,7 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
         }
     };
 
+    // Drop EOL
     let (input, _) = crlf(input)?;
     return Ok((input, hfield));
 }
@@ -159,6 +149,41 @@ fn unstructured(input: &str) -> IResult<&str, String> {
     Ok((input, body))
 }
 
+fn datetime(input: &str) -> IResult<&str, HeaderField> {
+    // @FIXME want to extract datetime our way in the future
+    // to better handle obsolete/bad cases instead of returning raw text.
+    let (input, raw_date) = unstructured(input)?;
+    let date = match DateTime::parse_from_rfc2822(&raw_date) {
+        Ok(chronodt) => HeaderDate::Parsed(chronodt),
+        Err(e) => HeaderDate::Unknown(raw_date, e),
+    };
+    Ok((input, HeaderField::Date(date)))
+}
+
+fn from(input: &str) -> IResult<&str, HeaderField> {
+    //let (input, mbox_list) = many0(mailbox)(input)?;
+    //Ok((input, HeaderField::From(mbox_list)))
+    unimplemented!();
+}
+
 fn mailbox(input: &str) -> IResult<&str, MailboxRef> {
     unimplemented!();
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_datetime() {
+        let datefield = "Thu,\r\n  13\r\n  Feb\r\n    1969\r\n 23:32\r\n   -0330 (Newfoundland Time)";
+        let (input, v) = datetime(datefield).unwrap();
+        assert_eq!(input, "");
+        match v {
+            HeaderField::Date(HeaderDate::Parsed(_)) => (),
+            _ => panic!("Date has not been parsed"),
+        };
+    }
+}
+
+
diff --git a/src/tokens.rs b/src/tokens.rs
index de76c99..bd03f64 100644
--- a/src/tokens.rs
+++ b/src/tokens.rs
@@ -5,7 +5,7 @@ use nom::{
     character::complete::{crlf, satisfy, space0, space1},
     combinator::{recognize, opt},
     multi::{many0, many1},
-    sequence::{preceded, terminated, tuple},
+    sequence::{delimited, pair, preceded, terminated, tuple},
 };
 
 /// Lexical tokens
@@ -68,7 +68,7 @@ fn fold_marker(input: &str) -> IResult<&str, &str> {
 ///   CFWS            =   (1*([FWS] comment) [FWS]) / FWS
 /// ```
 pub fn cfws(input: &str) -> IResult<&str, &str> {
-    alt((perm_fws, recognize(comments)))(input)
+    alt((recognize(comments), perm_fws))(input)
 }
 
 pub fn comments(input: &str) -> IResult<&str, ()> {
@@ -122,10 +122,35 @@ pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
    take_while1(is_vchar)(input)
 }
 
+fn is_atext(c: char) -> bool {
+    c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c)
+}
+
+/// atom
+///
+/// `[CFWS] 1*atext [CFWS]`
+fn atom(input: &str) -> IResult<&str, &str> {
+    delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
+}
+
+/// dot-atom-text
+///
+/// `1*atext *("." 1*atext)`
+fn dot_atom_text(input: &str) -> IResult<&str, &str> {
+    recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input)
+}
+
+/// dot-atom
+///
+/// `[CFWS] dot-atom-text [CFWS]`
+fn dot_atom(input: &str) -> IResult<&str, &str> {
+    delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
+}
+
+
 #[cfg(test)]
 mod tests {
     use super::*;
-    use nom;
 
     #[test]
     fn test_vchar_seq() {
@@ -151,5 +176,31 @@ mod tests {
     #[test]
     fn test_cfws() {
         assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) ")));
+        assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)")));
+        assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) ")));
+    }
+
+    #[test]
+    fn test_atext() {
+        assert!(is_atext('='));
+        assert!(is_atext('5'));
+        assert!(is_atext('Q'));
+        assert!(!is_atext(' '));
+        assert!(!is_atext('É'));
+    }
+
+    #[test]
+    fn test_atom() {
+        assert_eq!(atom("(skip)  imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec")));
+    }
+
+    #[test]
+    fn test_dot_atom_text() {
+        assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io")));
+    }
+
+    #[test]
+    fn test_dot_atom() {
+        assert_eq!(dot_atom("   (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
     }
 }