refactor parser

2023-06-12 22:08:34 +02:00 · 2023-06-12 22:08:34 +02:00 · 37705f2894
commit 37705f2894
parent db2fea9015
8 changed files with 373 additions and 329 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 /target
+.*sw*
--- a/src/headers.rs
+++ b/src/headers.rs
@ -11,7 +11,9 @@ use nom::{
    sequence::tuple,
 };

-use crate::tokens::{fws, vchar_seq, perm_crlf, unstructured};
+use crate::whitespace::{fws, perm_crlf};
+use crate::words::vchar_seq;
+use crate::misc_token::unstructured;
 use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef};

 /// HEADERS
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,3 +1,6 @@
 pub mod headers;
 pub mod model;
-mod tokens;
+mod whitespace;
+mod words;
+mod quoted;
+mod misc_token;
--- a/src/misc_token.rs
+++ b/src/misc_token.rs
@ -0,0 +1,69 @@
+use std::borrow::Cow;
+use nom::{
+    IResult,
+    branch::alt,
+    character::complete::space0,
+    combinator::{into, opt},
+    multi::{many0, many1},
+    sequence::{pair, tuple},
+};
+
+use crate::quoted::quoted_string;
+use crate::whitespace::fws;
+use crate::words::{atom, vchar_seq};
+
+/// Word
+///
+/// ```abnf
+///    word            =   atom / quoted-string
+/// ```
+pub fn word(input: &str) -> IResult<&str, Cow<str>> {
+    alt((into(quoted_string), into(atom)))(input)
+}
+
+/// Phrase
+///
+/// ```abnf
+///    phrase          =   1*word / obs-phrase
+/// ```
+pub fn phrase(input: &str) -> IResult<&str, String> {
+    let (input, words) = many1(word)(input)?;
+    let phrase = words.join(" ");
+    Ok((input, phrase))
+}
+
+/// Unstructured header field body
+///
+/// ```abnf
+/// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
+/// ```
+pub fn unstructured(input: &str) -> IResult<&str, String> {
+    let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
+    let (input, _) = space0(input)?;
+
+    // Try to optimize for the most common cases
+    let body = match r.as_slice() {
+        [(None, content)] => content.to_string(),
+        [(Some(_), content)] => " ".to_string() + content,
+        lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
+            let (may_ws, content) = item;
+            match may_ws {
+                Some(_) => acc + " " + content,
+                None => acc + content,
+            }
+        }),
+    };
+
+    Ok((input, body))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_phrase() {
+        assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
+        assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into())));
+        assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into())));
+    }
+}
--- a/src/quoted.rs
+++ b/src/quoted.rs
@ -0,0 +1,86 @@
+use nom::{
+    IResult,
+    branch::alt,
+    bytes::complete::tag,
+    character::complete::satisfy,
+    combinator::opt,
+    multi::many0,
+    sequence::{pair, preceded},
+};
+
+use crate::words::is_vchar;
+use crate::whitespace::{fws, cfws};
+
+/// Quoted pair
+///
+/// ```abnf
+///    quoted-pair     =   ("\" (VCHAR / WSP)) / obs-qp
+/// ```
+pub fn quoted_pair(input: &str) -> IResult<&str, char> {
+    preceded(tag("\\"), satisfy(|c| is_vchar(c) || c == '\t' || c == ' '))(input)
+}
+
+/// Allowed characters in quote
+///
+/// ```abnf
+///   qtext           =   %d33 /             ; Printable US-ASCII
+///                       %d35-91 /          ;  characters not including
+///                       %d93-126 /         ;  "\" or the quote character
+///                       obs-qtext
+/// ```
+fn is_qtext(c: char) -> bool {
+    c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E')
+}
+
+/// Quoted pair content
+///
+/// ```abnf
+///   qcontent        =   qtext / quoted-pair
+/// ```
+fn qcontent(input: &str) -> IResult<&str, char> {
+    alt((satisfy(is_qtext), quoted_pair))(input)
+}
+
+/// Quoted string
+///
+/// ```abnf
+/// quoted-string   =   [CFWS]
+///                     DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+///                     [CFWS]
+/// ```
+pub fn quoted_string(input: &str) -> IResult<&str, String> {
+  let (input, _) = opt(cfws)(input)?;
+  let (input, _) = tag("\"")(input)?;
+  let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
+
+  // Rebuild string
+  let mut qstring = content.iter().fold(
+    String::with_capacity(16), 
+    |mut acc, (maybe_wsp, c)| {
+      if let Some(wsp) = maybe_wsp {
+        acc.push(*wsp);
+      }
+      acc.push(*c);
+      acc
+    });
+
+  let (input, maybe_wsp) = opt(fws)(input)?;
+  if let Some(wsp) = maybe_wsp {
+    qstring.push(wsp);
+  }
+
+  let (input, _) = tag("\"")(input)?;
+  let (input, _) = opt(cfws)(input)?;
+  Ok((input, qstring))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_quoted_string() {
+        assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string())));
+        assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string())));
+    }
+}
--- a/src/tokens.rs
+++ b/src/tokens.rs
@ -1,327 +0,0 @@
-use std::borrow::Cow;
-use nom::{
-    IResult,
-    branch::alt,
-    bytes::complete::{tag, take_while1},
-    character::complete::{crlf, satisfy, space0, space1},
-    combinator::{into, recognize, opt},
-    multi::{many0, many1},
-    sequence::{delimited, pair, preceded, terminated, tuple},
-};
-
-/// Lexical tokens
-///
-/// Approx. maps to section 3.2 of the RFC 
-/// https://www.rfc-editor.org/rfc/rfc5322#section-3.2
-/// Also https://datatracker.ietf.org/doc/html/rfc6532
-
-// quoted characters and strings
-
-/// Quoted pair
-///
-/// ```abnf
-///    quoted-pair     =   ("\" (VCHAR / WSP)) / obs-qp
-/// ```
-pub fn quoted_pair(input: &str) -> IResult<&str, char> {
-    preceded(tag("\\"), satisfy(|c| is_vchar(c) || c == '\t' || c == ' '))(input)
-}
-
-/// Allowed characters in quote
-///
-/// ```abnf
-///   qtext           =   %d33 /             ; Printable US-ASCII
-///                       %d35-91 /          ;  characters not including
-///                       %d93-126 /         ;  "\" or the quote character
-///                       obs-qtext
-/// ```
-fn is_qtext(c: char) -> bool {
-    c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E')
-}
-
-/// Quoted pair content
-///
-/// ```abnf
-///   qcontent        =   qtext / quoted-pair
-/// ```
-fn qcontent(input: &str) -> IResult<&str, char> {
-    alt((satisfy(is_qtext), quoted_pair))(input)
-}
-
-/// Quoted string
-///
-/// ```abnf
-/// quoted-string   =   [CFWS]
-///                     DQUOTE *([FWS] qcontent) [FWS] DQUOTE
-///                     [CFWS]
-/// ```
-pub fn quoted_string(input: &str) -> IResult<&str, String> {
-  let (input, _) = opt(cfws)(input)?;
-  let (input, _) = tag("\"")(input)?;
-  let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
-
-  // Rebuild string
-  let mut qstring = content.iter().fold(
-    String::with_capacity(16), 
-    |mut acc, (maybe_wsp, c)| {
-      if let Some(wsp) = maybe_wsp {
-        acc.push(*wsp);
-      }
-      acc.push(*c);
-      acc
-    });
-
-  let (input, maybe_wsp) = opt(fws)(input)?;
-  if let Some(wsp) = maybe_wsp {
-    qstring.push(wsp);
-  }
-
-  let (input, _) = tag("\"")(input)?;
-  let (input, _) = opt(cfws)(input)?;
-  Ok((input, qstring))
-}
-
-/// Word
-///
-/// ```abnf
-///    word            =   atom / quoted-string
-/// ```
-pub fn word(input: &str) -> IResult<&str, Cow<str>> {
-    alt((into(quoted_string), into(atom)))(input)
-}
-
-/// Phrase
-///
-/// ```abnf
-///    phrase          =   1*word / obs-phrase
-/// ```
-pub fn phrase(input: &str) -> IResult<&str, String> {
-    let (input, words) = many1(word)(input)?;
-    let phrase = words.join(" ");
-    Ok((input, phrase))
-}
-
-/// Unstructured header field body
-///
-/// ```abnf
-/// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
-/// ```
-pub fn unstructured(input: &str) -> IResult<&str, String> {
-    let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
-    let (input, _) = space0(input)?;
-
-    // Try to optimize for the most common cases
-    let body = match r.as_slice() {
-        [(None, content)] => content.to_string(),
-        [(Some(_), content)] => " ".to_string() + content,
-        lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
-            let (may_ws, content) = item;
-            match may_ws {
-                Some(_) => acc + " " + content,
-                None => acc + content,
-            }
-        }),
-    };
-
-    Ok((input, body))
-}
-
-// --- whitespaces and comments
-
-// Note: WSP = SP / HTAB = %x20 / %x09
-// nom::*::space0 = *WSP
-// nom::*::space1 = 1*WSP
-
-/// Permissive CRLF
-///
-/// Theoretically, all lines must end with \r\n
-/// but some mail servers like Dovecot support malformated emails,
-/// for example with only \n eol. It works because
-/// \r or \n is allowed nowhere else, so we also add this support.
-pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
-    alt((crlf, tag("\r"), tag("\n")))(input)
-}
-
-/// Permissive foldable white space
-///
-/// Folding white space are used for long headers splitted on multiple lines.
-/// The obsolete syntax allowes multiple lines without content; implemented for compatibility
-/// reasons
-pub fn fws(input: &str) -> IResult<&str, char> {
-    let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
-    Ok((input, ' '))
-}
-fn fold_marker(input: &str) -> IResult<&str, &str> {
-   let (input, _) = space0(input)?;
-   let (input, _) = perm_crlf(input)?;
-   space1(input)
-}
-
-
-/// Folding White Space with Comment
-///
-/// Note: we drop the comments for now...  
-///
-///   ctext           =   %d33-39 /          ; Printable US-ASCII
-///                       %d42-91 /          ;  characters not including
-///                       %d93-126 /         ;  "(", ")", or "\"
-///                       obs-ctext
-///
-///   ccontent        =   ctext / quoted-pair / comment
-///
-///   comment         =   "(" *([FWS] ccontent) [FWS] ")"
-///
-///   CFWS            =   (1*([FWS] comment) [FWS]) / FWS
-/// ```
-pub fn cfws(input: &str) -> IResult<&str, &str> {
-    alt((recognize(comments), recognize(fws)))(input)
-}
-
-pub fn comments(input: &str) -> IResult<&str, ()> {
-    let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
-    let (input, _) = opt(fws)(input)?;
-    Ok((input, ()))
-}
-
-pub fn comment(input: &str) -> IResult<&str, ()> {
-    let (input, _) = tag("(")(input)?;
-    let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
-    let (input, _) = opt(fws)(input)?;
-    let (input, _) = tag(")")(input)?;
-    Ok((input, ()))
-}
-
-pub fn ccontent(input: &str) -> IResult<&str, &str> {
-   alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input) 
-}
-
-pub fn ctext(input: &str) -> IResult<&str, char> {
-    satisfy(is_ctext)(input)
-}
-
-/// Check if it's a comment text character
-///
-/// ```abnf
-///   ctext           =   %d33-39 /          ; Printable US-ASCII
-///                       %d42-91 /          ;  characters not including
-///                       %d93-126 /         ;  "(", ")", or "\"
-///                       obs-ctext
-///```
-pub fn is_ctext(c: char) -> bool {
-    (c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii()
-}
-
-// atoms, words, phrases, vchar
-
-/// VCHAR definition
-pub fn is_vchar(c: char) -> bool {
-  (c >= '\x21' && c <= '\x7E') || !c.is_ascii()
-}
-
-/// Sequence of visible chars with the UTF-8 extension
-///
-/// ```abnf
-/// VCHAR   =  %x21-7E
-///            ; visible (printing) characters
-/// VCHAR   =/  UTF8-non-ascii
-/// SEQ     = 1*VCHAR
-///```
-pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
-   take_while1(is_vchar)(input)
-}
-
-/// Atom allowed characters
-fn is_atext(c: char) -> bool {
-    c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c)
-}
-
-/// Atom 
-///
-/// `[CFWS] 1*atext [CFWS]`
-fn atom(input: &str) -> IResult<&str, &str> {
-    delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
-}
-
-/// dot-atom-text
-///
-/// `1*atext *("." 1*atext)`
-fn dot_atom_text(input: &str) -> IResult<&str, &str> {
-    recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input)
-}
-
-/// dot-atom
-///
-/// `[CFWS] dot-atom-text [CFWS]`
-fn dot_atom(input: &str) -> IResult<&str, &str> {
-    delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
-}
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_vchar_seq() {
-        assert_eq!(vchar_seq("hello world"), Ok((" world", "hello")));
-        assert_eq!(vchar_seq("hello👋 world"), Ok((" world", "hello👋")));
-    }
-
-    #[test]
-    fn test_perm_crlf() {
-        assert_eq!(perm_crlf("\rworld"), Ok(("world", "\r")));
-        assert_eq!(perm_crlf("\r\nworld"), Ok(("world", "\r\n")));
-        assert_eq!(perm_crlf("\nworld"), Ok(("world", "\n")));
-    }
-
-    #[test]
-    fn test_fws() {
-        assert_eq!(fws("\r\n world"), Ok(("world", ' ')));
-        assert_eq!(fws(" \r\n \r\n world"), Ok(("world", ' ')));
-        assert_eq!(fws(" world"), Ok(("world", ' ')));
-        assert!(fws("\r\nFrom: test").is_err());
-    }
-
-    #[test]
-    fn test_cfws() {
-        assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) ")));
-        assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)")));
-        assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) ")));
-    }
-
-    #[test]
-    fn test_atext() {
-        assert!(is_atext('='));
-        assert!(is_atext('5'));
-        assert!(is_atext('Q'));
-        assert!(!is_atext(' '));
-        assert!(!is_atext('É'));
-    }
-
-    #[test]
-    fn test_atom() {
-        assert_eq!(atom("(skip)  imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec")));
-    }
-
-    #[test]
-    fn test_dot_atom_text() {
-        assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io")));
-    }
-
-    #[test]
-    fn test_dot_atom() {
-        assert_eq!(dot_atom("   (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
-    }
-
-    #[test]
-    fn test_quoted_string() {
-        assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string())));
-        assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string())));
-    }
-
-    #[test]
-    fn test_phrase() {
-        assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
-        assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into())));
-        assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into())));
-    }
-}
--- a/src/whitespace.rs
+++ b/src/whitespace.rs
@ -0,0 +1,122 @@
+use nom::{
+    IResult,
+    branch::alt,
+    bytes::complete::tag,
+    character::complete::{crlf, satisfy, space0, space1},
+    combinator::{recognize, opt},
+    multi::{many0, many1},
+    sequence::{pair, tuple},
+};
+use crate::quoted::quoted_pair;
+
+// --- whitespaces and comments
+
+// Note: WSP = SP / HTAB = %x20 / %x09
+// nom::*::space0 = *WSP
+// nom::*::space1 = 1*WSP
+
+/// Permissive CRLF
+///
+/// Theoretically, all lines must end with \r\n
+/// but some mail servers like Dovecot support malformated emails,
+/// for example with only \n eol. It works because
+/// \r or \n is allowed nowhere else, so we also add this support.
+pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
+    alt((crlf, tag("\r"), tag("\n")))(input)
+}
+
+/// Permissive foldable white space
+///
+/// Folding white space are used for long headers splitted on multiple lines.
+/// The obsolete syntax allowes multiple lines without content; implemented for compatibility
+/// reasons
+pub fn fws(input: &str) -> IResult<&str, char> {
+    let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
+    Ok((input, ' '))
+}
+fn fold_marker(input: &str) -> IResult<&str, &str> {
+   let (input, _) = space0(input)?;
+   let (input, _) = perm_crlf(input)?;
+   space1(input)
+}
+
+
+/// Folding White Space with Comment
+///
+/// Note: we drop the comments for now...  
+///
+///   ctext           =   %d33-39 /          ; Printable US-ASCII
+///                       %d42-91 /          ;  characters not including
+///                       %d93-126 /         ;  "(", ")", or "\"
+///                       obs-ctext
+///
+///   ccontent        =   ctext / quoted-pair / comment
+///
+///   comment         =   "(" *([FWS] ccontent) [FWS] ")"
+///
+///   CFWS            =   (1*([FWS] comment) [FWS]) / FWS
+/// ```
+pub fn cfws(input: &str) -> IResult<&str, &str> {
+    alt((recognize(comments), recognize(fws)))(input)
+}
+
+pub fn comments(input: &str) -> IResult<&str, ()> {
+    let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
+    let (input, _) = opt(fws)(input)?;
+    Ok((input, ()))
+}
+
+pub fn comment(input: &str) -> IResult<&str, ()> {
+    let (input, _) = tag("(")(input)?;
+    let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
+    let (input, _) = opt(fws)(input)?;
+    let (input, _) = tag(")")(input)?;
+    Ok((input, ()))
+}
+
+pub fn ccontent(input: &str) -> IResult<&str, &str> {
+   alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input) 
+}
+
+pub fn ctext(input: &str) -> IResult<&str, char> {
+    satisfy(is_ctext)(input)
+}
+
+/// Check if it's a comment text character
+///
+/// ```abnf
+///   ctext           =   %d33-39 /          ; Printable US-ASCII
+///                       %d42-91 /          ;  characters not including
+///                       %d93-126 /         ;  "(", ")", or "\"
+///                       obs-ctext
+///```
+pub fn is_ctext(c: char) -> bool {
+    (c >= '\x21' && c <= '\x27') || (c >= '\x2A' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E') || !c.is_ascii()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_perm_crlf() {
+        assert_eq!(perm_crlf("\rworld"), Ok(("world", "\r")));
+        assert_eq!(perm_crlf("\r\nworld"), Ok(("world", "\r\n")));
+        assert_eq!(perm_crlf("\nworld"), Ok(("world", "\n")));
+    }
+
+    #[test]
+    fn test_fws() {
+        assert_eq!(fws("\r\n world"), Ok(("world", ' ')));
+        assert_eq!(fws(" \r\n \r\n world"), Ok(("world", ' ')));
+        assert_eq!(fws(" world"), Ok(("world", ' ')));
+        assert!(fws("\r\nFrom: test").is_err());
+    }
+
+    #[test]
+    fn test_cfws() {
+        assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) ")));
+        assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)")));
+        assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) ")));
+    }
+}
--- a/src/words.rs
+++ b/src/words.rs
@ -0,0 +1,88 @@
+use nom::{
+    IResult,
+    bytes::complete::{tag, take_while1},
+    combinator::{recognize, opt},
+    multi::many0,
+    sequence::{delimited, pair},
+};
+use crate::whitespace::cfws;
+
+
+/// VCHAR definition
+pub fn is_vchar(c: char) -> bool {
+  (c >= '\x21' && c <= '\x7E') || !c.is_ascii()
+}
+
+/// Sequence of visible chars with the UTF-8 extension
+///
+/// ```abnf
+/// VCHAR   =  %x21-7E
+///            ; visible (printing) characters
+/// VCHAR   =/  UTF8-non-ascii
+/// SEQ     = 1*VCHAR
+///```
+pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
+   take_while1(is_vchar)(input)
+}
+
+/// Atom allowed characters
+fn is_atext(c: char) -> bool {
+    c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c)
+}
+
+/// Atom 
+///
+/// `[CFWS] 1*atext [CFWS]`
+pub fn atom(input: &str) -> IResult<&str, &str> {
+    delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
+}
+
+/// dot-atom-text
+///
+/// `1*atext *("." 1*atext)`
+fn dot_atom_text(input: &str) -> IResult<&str, &str> {
+    recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input)
+}
+
+/// dot-atom
+///
+/// `[CFWS] dot-atom-text [CFWS]`
+fn dot_atom(input: &str) -> IResult<&str, &str> {
+    delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_vchar_seq() {
+        assert_eq!(vchar_seq("hello world"), Ok((" world", "hello")));
+        assert_eq!(vchar_seq("hello👋 world"), Ok((" world", "hello👋")));
+    }
+
+    #[test]
+    fn test_atext() {
+        assert!(is_atext('='));
+        assert!(is_atext('5'));
+        assert!(is_atext('Q'));
+        assert!(!is_atext(' '));
+        assert!(!is_atext('É'));
+    }
+
+    #[test]
+    fn test_atom() {
+        assert_eq!(atom("(skip)  imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec")));
+    }
+
+    #[test]
+    fn test_dot_atom_text() {
+        assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io")));
+    }
+
+    #[test]
+    fn test_dot_atom() {
+        assert_eq!(dot_atom("   (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
+    }
+}