refactor by splitting into files

2023-06-08 21:43:32 +02:00 · 2023-06-08 21:43:32 +02:00 · 93576dfeff
commit 93576dfeff
parent a443a90738
4 changed files with 200 additions and 190 deletions
--- a/src/abnf.rs
+++ b/src/abnf.rs
@ -0,0 +1,50 @@
 use nom::{
    IResult,
    branch::alt,
    bytes::complete::{tag, take_while1},
    character::complete::{crlf, space0, space1},
    combinator::opt,
    sequence::terminated,
 };
 /// ABNF rfc5234
 /// Permissive CRLF
 ///
 /// Theoretically, all lines must end with \r\n
 /// but mail servers support malformated emails,
 /// for example with only \n eol. It works because
 /// \r\n is allowed nowhere else, so we also add this support.
 pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
    alt((crlf, tag("\r"), tag("\n")))(input)
 }
 // Note: WSP = SP / HTAB = %x20 / %x09
 // nom::*::space0 = *WSP
 // nom::*::space1 = 1*WSP
 /// Parse a folding white space
 ///
 /// Folding white space are used for long headers splitted on multiple lines
 ///
 /// ```abnf
 /// FWS             =   ([*WSP CRLF] 1*WSP) /  obs-FWS
 /// obs-FWS         =   1*WSP *(CRLF 1*WSP)
 /// ```
 pub fn fws(input: &str) -> IResult<&str, &str> {
    let (input, _) = opt(terminated(space0, perm_crlf))(input)?;
    // @FIXME: not implemented obs-FWS
    space1(input)
 }
 /// Sequence of visible chars with the UTF-8 extension
 ///
 /// ```abnf
 /// VCHAR   =  %x21-7E
 ///            ; visible (printing) characters
 /// VCHAR   =/  UTF8-non-ascii
 /// SEQ     = 1*VCHAR
 ///```
 pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
   take_while1(|c: char| (c >= '\x21' && c <= '\x7E') || !c.is_ascii())(input)
 }
--- a/src/headers.rs
+++ b/src/headers.rs
@ -0,0 +1,138 @@
 use nom::{
    IResult,
    bytes::complete::take_while1,
    bytes::complete::tag,
    character::complete::space0,
    character::complete::crlf,
    combinator::opt,
    multi::fold_many0,
    multi::many0,
    sequence::tuple,
 };
 use crate::abnf::{fws, vchar_seq};
 use crate::model::HeaderSection;
 /// HEADERS
 /// Header section
 ///
 /// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2
 pub fn header_section(input: &str) -> IResult<&str, HeaderSection> {
    fold_many0(
        header_field,
        HeaderSection::default,
        |mut section, head| {
            match head {
                HeaderField::Subject(title) => {
                    section.subject = Some(title);
                }
                HeaderField::Optional(name, body) => {
                    section.optional.insert(name, body);
                }
                _ => unimplemented!(),
            };
            section
        }
    )(input)
 }
 enum HeaderField<'a> {
    // 3.6.1.  The Origination Date Field
    Date,
    // 3.6.2.  Originator Fields
    From,
    Sender,
    ReplyTo,
    // 3.6.3.  Destination Address Fields
    To,
    Cc,
    Bcc,
    // 3.6.4.  Identification Fields
    MessageID,
    InReplyTo,
    References,
    // 3.6.5.  Informational Fields
    Subject(String),
    Comments(String),
    Keywords(Vec<&'a str>),
    // 3.6.6.  Resent Fields
    ResentDate,
    ResentFrom,
    ResentSender,
    ResentTo,
    ResentCc,
    ResentBcc,
    ResentMessageID,
    // 3.6.7.  Trace Fields
    Trace,
    // 3.6.8.  Optional Fields
    Optional(&'a str, String)
 }
 /// Extract one header field
 ///
 /// Derived grammar inspired by RFC5322 optional-field:
 /// 
 /// ```abnf
 /// field      =   field-name ":" unstructured CRLF
 /// field-name =   1*ftext
 /// ftext      =   %d33-57 /          ; Printable US-ASCII
 ///                %d59-126           ;  characters not including
 ///                                        ;  ":".
 /// ```
 fn header_field(input: &str) -> IResult<&str, HeaderField> {
    // Extract field name
    let (input, field_name) = take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A')(input)?;
    let (input, _) = tuple((tag(":"), space0))(input)?;
    // Extract field body
    match field_name {
        "Date" => unimplemented!(),
        //"From" => unimplemented!(),
        "Sender" => unimplemented!(),
        "Subject" => {
            let (input, body) = unstructured(input)?;
            let (input, _) = crlf(input)?;
            Ok((input, HeaderField::Subject(body)))
        },
        _ => {
            let (input, body) = unstructured(input)?;
            let (input, _) = crlf(input)?;
            Ok((input, HeaderField::Optional(field_name, body)))
        }
    }
 }
 /// Unstructured header field body
 ///
 /// ```abnf
 /// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
 /// ```
 fn unstructured(input: &str) -> IResult<&str, String> {
    let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
    let (input, _) = space0(input)?;
    // Try to optimize for the most common cases
    let body = match r.as_slice() {
        [(None, content)] => content.to_string(),
        [(Some(ws), content)] => ws.to_string() + content,
        lines => lines.iter().fold(String::with_capacity(255), |mut acc, item| {
            let (may_ws, content) = item;
            match may_ws {
                Some(ws) => acc + ws + content,
                None => acc + content,
            }
        }),
    };
    Ok((input, body))
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,194 +1,8 @@
-use std::collections::HashMap;
+mod headers;
-use std::default::Default;
+mod model;
-
+mod abnf;
 use nom::{
    IResult, 
    branch::alt,
    combinator::opt,
    character::complete::alphanumeric1,
    character::complete::crlf,
    character::complete::space0,
    character::complete::space1,
    bytes::complete::tag,
    bytes::complete::take_while1,
    multi::many0,
    multi::fold_many0,
    sequence::terminated,
    sequence::tuple,
 };
 //-------------- ABNF rfc5234
 /// Permissive CRLF
 ///
 /// Theoretically, all lines must end with \r\n
 /// but mail servers support malformated emails,
 /// for example with only \n eol. It works because
 /// \r\n is allowed nowhere else, so we also add this support.
 fn perm_crlf(input: &str) -> IResult<&str, &str> {
    alt((crlf, tag("\r"), tag("\n")))(input)
 }
 // Note: WSP = SP / HTAB = %x20 / %x09
 // nom::*::space0 = *WSP
 // nom::*::space1 = 1*WSP
 /// Parse a folding white space
 ///
 /// Folding white space are used for long headers splitted on multiple lines
 ///
 /// ```abnf
 /// FWS             =   ([*WSP CRLF] 1*WSP) /  obs-FWS
 /// obs-FWS         =   1*WSP *(CRLF 1*WSP)
 /// ```
 fn fws(input: &str) -> IResult<&str, &str> {
    let (input, _) = opt(terminated(space0, perm_crlf))(input)?;
    // @FIXME: not implemented obs-FWS
    space1(input)
 }
 /// Sequence of visible chars with the UTF-8 extension
 ///
 /// ```abnf
 /// VCHAR   =  %x21-7E
 ///            ; visible (printing) characters
 /// VCHAR   =/  UTF8-non-ascii
 /// SEQ     = 1*VCHAR
 ///```
 fn vchar_seq(input: &str) -> IResult<&str, &str> {
   take_while1(|c: char| (c >= '\x21' && c <= '\x7E') || !c.is_ascii())(input)
 }
 //--------------- HEADERS
 /// Header section
 ///
 /// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2
 #[derive(Debug, Default)]
 pub struct HeaderSection<'a> {
    pub subject: Option<String>,
    pub from: Vec<String>,
    pub optional: HashMap<&'a str, String>,
 }
 fn header_section(input: &str) -> IResult<&str, HeaderSection> {
    fold_many0(
        header_field,
        HeaderSection::default,
        |mut section, head| {
            match head {
                HeaderField::Subject(title) => {
                    section.subject = Some(title);
                }
                HeaderField::Optional(name, body) => {
                    section.optional.insert(name, body);
                }
                _ => unimplemented!(),
            };
            section
        }
    )(input)
 }
 enum HeaderField<'a> {
    // 3.6.1.  The Origination Date Field
    Date,
    // 3.6.2.  Originator Fields
    From,
    Sender,
    ReplyTo,
    // 3.6.3.  Destination Address Fields
    To,
    Cc,
    Bcc,
    // 3.6.4.  Identification Fields
    MessageID,
    InReplyTo,
    References,
    // 3.6.5.  Informational Fields
    Subject(String),
    Comments(String),
    Keywords(Vec<&'a str>),
    // 3.6.6.  Resent Fields
    ResentDate,
    ResentFrom,
    ResentSender,
    ResentTo,
    ResentCc,
    ResentBcc,
    ResentMessageID,
    // 3.6.7.  Trace Fields
    Trace,
    // 3.6.8.  Optional Fields
    Optional(&'a str, String)
 }
 /// Extract one header field
 ///
 /// Derived grammar inspired by RFC5322 optional-field:
 /// 
 /// ```abnf
 /// field      =   field-name ":" unstructured CRLF
 /// field-name =   1*ftext
 /// ftext      =   %d33-57 /          ; Printable US-ASCII
 ///                %d59-126           ;  characters not including
 ///                                        ;  ":".
 /// ```
 fn header_field(input: &str) -> IResult<&str, HeaderField> {
    // Extract field name
    let (input, field_name) = take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A')(input)?;
    let (input, _) = tuple((tag(":"), space0))(input)?;
    // Extract field body
    match field_name {
        "Date" => unimplemented!(),
        //"From" => unimplemented!(),
        "Sender" => unimplemented!(),
        "Subject" => {
            let (input, body) = unstructured(input)?;
            let (input, _) = crlf(input)?;
            Ok((input, HeaderField::Subject(body)))
        },
        _ => {
            let (input, body) = unstructured(input)?;
            let (input, _) = crlf(input)?;
            Ok((input, HeaderField::Optional(field_name, body)))
        }
    }
 }
 /// Unstructured header field body
 ///
 /// ```abnf
 /// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
 /// ```
 fn unstructured(input: &str) -> IResult<&str, String> {
    let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
    let (input, _) = space0(input)?;
    // Try to optimize for the most common cases
    let body = match r.as_slice() {
        [(None, content)] => content.to_string(),
        [(Some(ws), content)] => ws.to_string() + content,
        lines => lines.iter().fold(String::with_capacity(255), |mut acc, item| {
            let (may_ws, content) = item;
            match may_ws {
                Some(ws) => acc + ws + content,
                None => acc + content,
            }
        }),
    };
    Ok((input, body))
 }
 fn main() {
    let header = "Subject: Hello\r\n World\r\nFrom: <quentin@deuxfleurs.fr>\r\n\r\n";
-    println!("{:?}", header_section(header));
+    println!("{:?}", headers::header_section(header));
 }
--- a/src/model.rs
+++ b/src/model.rs
@ -0,0 +1,8 @@
 use std::collections::HashMap;
 #[derive(Debug, Default)]
 pub struct HeaderSection<'a> {
    pub subject: Option<String>,
    pub from: Vec<String>,
    pub optional: HashMap<&'a str, String>,
 }