From ec937bf11d1e975401995c83a6d3c6318b4b6983 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Thu, 20 Jul 2023 16:26:59 +0200 Subject: [PATCH] extract common field logic to header --- src/header.rs | 55 +++++++++++++ src/headers.rs | 27 ------ src/lib.rs | 4 +- src/mime/field.rs | 23 ++++++ src/mime/mod.rs | 1 + src/rfc5322/eager.rs | 103 ----------------------- src/rfc5322/field.rs | 52 +----------- src/rfc5322/lazy.rs | 190 ------------------------------------------- 8 files changed, 84 insertions(+), 371 deletions(-) create mode 100644 src/header.rs delete mode 100644 src/headers.rs create mode 100644 src/mime/field.rs create mode 100644 src/mime/mod.rs delete mode 100644 src/rfc5322/eager.rs delete mode 100644 src/rfc5322/lazy.rs diff --git a/src/header.rs b/src/header.rs new file mode 100644 index 0000000..7ae4841 --- /dev/null +++ b/src/header.rs @@ -0,0 +1,55 @@ +use nom::{ + IResult, + bytes::complete::{tag_no_case, tag, take_while1}, + character::complete::space0, + sequence::{pair, terminated, tuple}, +}; +use crate::text::misc_token::{Unstructured, unstructured}; + +#[derive(Debug, PartialEq)] +pub enum CompField<'a, T> { + Known(T), + Unknown(&'a [u8], Unstructured<'a>), + Bad(&'a [u8]), +} + +#[derive(Debug, PartialEq)] +pub struct CompFieldList<'a, T>(pub Vec>); +impl<'a, T> CompFieldList<'a, T> { + pub fn known(self) -> Vec { + self.0.into_iter().map(|v| match v { + CompField::Known(f) => Some(f), + _ => None, + }).flatten().collect() + } +} + +pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { + move |input| { + terminated( + tag_no_case(name), + tuple((space0, tag(b":"), space0)), + )(input) + } +} + +/// Optional field +/// +/// ```abnf +/// field = field-name ":" unstructured CRLF +/// field-name = 1*ftext +/// ftext = %d33-57 / ; Printable US-ASCII +/// %d59-126 ; characters not including +/// ; ":". +/// ``` +pub fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> { + pair( + terminated( + take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A), + tuple((space0, tag(b":"), space0)), + ), + unstructured, + )(input) +} + + diff --git a/src/headers.rs b/src/headers.rs deleted file mode 100644 index 5bf0661..0000000 --- a/src/headers.rs +++ /dev/null @@ -1,27 +0,0 @@ -use nom::{ - self, - combinator::{all_consuming, recognize}, - multi::many0, - sequence::terminated, - IResult, -}; - -use crate::text::whitespace::{foldable_line, line, obs_crlf}; - -pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { - let (body, hdrs) = segment(input)?; - let (_, fields) = fields(hdrs)?; - Ok((body, fields)) -} - -// -- part 1, segment -fn segment(input: &[u8]) -> IResult<&[u8], &[u8]> { - terminated(recognize(many0(line)), obs_crlf)(input) -} - -// -- part 2, isolate fields -fn fields(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { - let (rest, parsed) = all_consuming(many0(foldable_line))(input)?; - Ok((rest, parsed)) -} - diff --git a/src/lib.rs b/src/lib.rs index be418c4..9bf85f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ pub mod error; -//pub mod mime; -pub mod headers; pub mod text; +pub mod header; pub mod rfc5322; +//pub mod mime; diff --git a/src/mime/field.rs b/src/mime/field.rs new file mode 100644 index 0000000..fad8b88 --- /dev/null +++ b/src/mime/field.rs @@ -0,0 +1,23 @@ +#[derive(Debug, PartialEq)] +pub enum Field<'a> { + ContentType(Type<'a>), + ContentTransferEncoding(Mechanism<'a>), + ContentID(MessageId<'a>), + ContentDescription(Unstructured), +} + +fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> { + use MIMEField::*; + field_name(input).map(|(rest, name)| { + ( + "", + match name.to_lowercase().as_ref() { + "content-type" => ContentType(Type(rest)), + "content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)), + "content-id" => ContentID(Identifier(rest)), + "content-description" => ContentDescription(Unstructured(rest)), + _ => Optional(name, Unstructured(rest)), + } + ) + }) +} diff --git a/src/mime/mod.rs b/src/mime/mod.rs new file mode 100644 index 0000000..5b8f2df --- /dev/null +++ b/src/mime/mod.rs @@ -0,0 +1 @@ +pub mod field; diff --git a/src/rfc5322/eager.rs b/src/rfc5322/eager.rs deleted file mode 100644 index 5e0ec8f..0000000 --- a/src/rfc5322/eager.rs +++ /dev/null @@ -1,103 +0,0 @@ -use crate::error::IMFError; -use crate::fragments::lazy::{Field as Lazy, MIMEField as LazyMIME}; -use crate::fragments::mime::{Mechanism, Type, Version}; -use crate::fragments::misc_token::{PhraseList, Unstructured}; -use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList}; -use crate::fragments::trace::ReceivedLog; -use chrono::{DateTime, FixedOffset}; - -#[derive(Debug, PartialEq)] -pub enum Field<'a> { - // 3.6.1. The Origination Date Field - Date(DateTime), - - // 3.6.2. Originator Fields - From(MailboxList), - Sender(MailboxRef), - ReplyTo(AddressList), - - // 3.6.3. Destination Address Fields - To(AddressList), - Cc(AddressList), - Bcc(AddressList), - - // 3.6.4. Identification Fields - MessageID(MessageId<'a>), - InReplyTo(MessageIdList<'a>), - References(MessageIdList<'a>), - - // 3.6.5. Informational Fields - Subject(Unstructured), - Comments(Unstructured), - Keywords(PhraseList), - - // 3.6.6 Resent Fields (not implemented) - // 3.6.7 Trace Fields - Received(ReceivedLog<'a>), - ReturnPath(MailboxRef), - - // MIME RFC2045 - MIMEVersion(Version), - MIME(MIMEField<'a>), - - // 3.6.8. Optional Fields - Optional(&'a str, Unstructured), - - // None - Rescue(&'a str), -} - -#[derive(Debug, PartialEq)] -pub enum MIMEField<'a> { - ContentType(Type<'a>), - ContentTransferEncoding(Mechanism<'a>), - ContentID(MessageId<'a>), - ContentDescription(Unstructured), - Optional(&'a str, Unstructured), - Rescue(&'a str), -} - -impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> { - type Error = IMFError<'a>; - - fn try_from(l: &'a Lazy<'a>) -> Result { - use Field::*; - match l { - Lazy::Date(v) => v.try_into().map(|v| Date(v)), - Lazy::From(v) => v.try_into().map(|v| From(v)), - Lazy::Sender(v) => v.try_into().map(|v| Sender(v)), - Lazy::ReplyTo(v) => v.try_into().map(|v| ReplyTo(v)), - Lazy::To(v) => v.try_into().map(|v| To(v)), - Lazy::Cc(v) => v.try_into().map(|v| Cc(v)), - Lazy::Bcc(v) => v.try_into().map(|v| Bcc(v)), - Lazy::MessageID(v) => v.try_into().map(|v| MessageID(v)), - Lazy::InReplyTo(v) => v.try_into().map(|v| InReplyTo(v)), - Lazy::References(v) => v.try_into().map(|v| References(v)), - Lazy::Subject(v) => v.try_into().map(|v| Subject(v)), - Lazy::Comments(v) => v.try_into().map(|v| Comments(v)), - Lazy::Keywords(v) => v.try_into().map(|v| Keywords(v)), - Lazy::Received(v) => v.try_into().map(|v| Received(v)), - Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)), - Lazy::MIMEVersion(v) => v.try_into().map(|v| MIMEVersion(v)), - Lazy::MIME(v) => v.try_into().map(|v| MIME(v)), - Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)), - Lazy::Rescue(v) => Ok(Rescue(v)), - } - } -} - -impl<'a> TryFrom<&'a LazyMIME<'a>> for MIMEField<'a> { - type Error = IMFError<'a>; - - fn try_from(l: &'a LazyMIME<'a>) -> Result { - use MIMEField::*; - match l { - LazyMIME::ContentType(v) => v.try_into().map(|v| ContentType(v)), - LazyMIME::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)), - LazyMIME::ContentID(v) => v.try_into().map(|v| ContentID(v)), - LazyMIME::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)), - LazyMIME::Optional(k, v) => v.try_into().map(|v| Optional(k, v)), - LazyMIME::Rescue(v) => Ok(Rescue(v)), - } - } -} diff --git a/src/rfc5322/field.rs b/src/rfc5322/field.rs index 1826d1a..5573491 100644 --- a/src/rfc5322/field.rs +++ b/src/rfc5322/field.rs @@ -17,6 +17,7 @@ use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list} use crate::rfc5322::trace::{ReceivedLog, return_path, received_log}; use crate::rfc5322::mime::{Version, version}; use crate::rfc5322::message::Message; +use crate::header::*; use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list}; #[derive(Debug, PartialEq)] @@ -61,24 +62,6 @@ impl<'a> FieldList<'a> { } } -#[derive(Debug, PartialEq)] -pub enum CompField<'a> { - Known(Field<'a>), - Unknown(&'a [u8], Unstructured<'a>), - Bad(&'a [u8]), -} - -#[derive(Debug, PartialEq)] -pub struct CompFieldList<'a>(pub Vec>); -impl<'a> CompFieldList<'a> { - pub fn message(self) -> Message<'a> { - Message::from_iter(self.0.into_iter().map(|v| match v { - CompField::Known(f) => Some(f), - _ => None, - }).flatten()) - } -} - pub fn field(input: &[u8]) -> IResult<&[u8], Field> { terminated(alt(( preceded(field_name(b"date"), map(date, Field::Date)), @@ -106,36 +89,7 @@ pub fn field(input: &[u8]) -> IResult<&[u8], Field> { )), obs_crlf)(input) } - -fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { - move |input| { - terminated( - tag_no_case(name), - tuple((space0, tag(b":"), space0)), - )(input) - } -} - -/// Optional field -/// -/// ```abnf -/// field = field-name ":" unstructured CRLF -/// field-name = 1*ftext -/// ftext = %d33-57 / ; Printable US-ASCII -/// %d59-126 ; characters not including -/// ; ":". -/// ``` -fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> { - pair( - terminated( - take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A), - tuple((space0, tag(b":"), space0)), - ), - unstructured, - )(input) -} - -pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList> { +pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList> { map(terminated(many0(alt(( map(field, CompField::Known), map(opt_field, |(k,v)| CompField::Unknown(k,v)), @@ -163,7 +117,7 @@ This is the plain text body of the message. Note the blank line between the header information and the body of the message."; assert_eq!( - map(header, |v| v.message())(fullmail), + map(header, |v| FieldList(v.known()).message())(fullmail), Ok(( &b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..], Message { diff --git a/src/rfc5322/lazy.rs b/src/rfc5322/lazy.rs deleted file mode 100644 index 071b8eb..0000000 --- a/src/rfc5322/lazy.rs +++ /dev/null @@ -1,190 +0,0 @@ -use std::convert::From; - -use nom::{ - bytes::complete::{tag, take_while1}, - character::complete::space0, - sequence::{terminated, tuple}, - IResult, -}; - -#[derive(Debug, PartialEq)] -pub struct DateTime<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct MailboxList<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Mailbox<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct AddressList<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct NullableAddressList<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Identifier<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct IdentifierList<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Unstructured<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct PhraseList<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct ReceivedLog<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Path<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Version<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Type<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub struct Mechanism<'a>(pub &'a str); - -#[derive(Debug, PartialEq)] -pub enum Field<'a> { - // 3.6.1. The Origination Date Field - Date(DateTime<'a>), - - // 3.6.2. Originator Fields - From(MailboxList<'a>), - Sender(Mailbox<'a>), - ReplyTo(AddressList<'a>), - - // 3.6.3. Destination Address Fields - To(AddressList<'a>), - Cc(AddressList<'a>), - Bcc(NullableAddressList<'a>), - - // 3.6.4. Identification Fields - MessageID(Identifier<'a>), - InReplyTo(IdentifierList<'a>), - References(IdentifierList<'a>), - - // 3.6.5. Informational Fields - Subject(Unstructured<'a>), - Comments(Unstructured<'a>), - Keywords(PhraseList<'a>), - - // 3.6.6 Resent Fields (not implemented) - // 3.6.7 Trace Fields - Received(ReceivedLog<'a>), - ReturnPath(Mailbox<'a>), - - // MIME RFC 2045 - MIMEVersion(Version<'a>), - MIME(MIMEField<'a>), - - // 3.6.8. Optional Fields - Optional(&'a str, Unstructured<'a>), - - // None - Rescue(&'a str), -} - -impl<'a> From<&'a str> for Field<'a> { - fn from(input: &'a str) -> Self { - match correct_field(input) { - Ok((_, field)) => field, - Err(_) => Field::Rescue(input), - } - } -} - -#[derive(Debug, PartialEq)] -pub enum MIMEField<'a> { - ContentType(Type<'a>), - ContentTransferEncoding(Mechanism<'a>), - ContentID(Identifier<'a>), - ContentDescription(Unstructured<'a>), - - Optional(&'a str, Unstructured<'a>), - Rescue(&'a str), -} -impl<'a> From<&'a str> for MIMEField<'a> { - fn from(input: &'a str) -> Self { - match correct_mime_field(input) { - Ok((_, field)) => field, - Err(_) => MIMEField::Rescue(input), - } - } -} - -/// Optional field -/// -/// ```abnf -/// field = field-name ":" unstructured CRLF -/// field-name = 1*ftext -/// ftext = %d33-57 / ; Printable US-ASCII -/// %d59-126 ; characters not including -/// ; ":". -/// ``` -fn field_name(input: &str) -> IResult<&str, &str> { - terminated( - take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'), - tuple((space0, tag(":"), space0)), - )(input) -} - -fn correct_field(input: &str) -> IResult<&str, Field> { - use Field::*; - field_name(input).map(|(rest, name)| { - ( - "", - match name.to_lowercase().as_ref() { - "date" => Date(DateTime(rest)), - - "from" => From(MailboxList(rest)), - "sender" => Sender(Mailbox(rest)), - "reply-to" => ReplyTo(AddressList(rest)), - - "to" => To(AddressList(rest)), - "cc" => Cc(AddressList(rest)), - "bcc" => Bcc(NullableAddressList(rest)), - - "message-id" => MessageID(Identifier(rest)), - "in-reply-to" => InReplyTo(IdentifierList(rest)), - "references" => References(IdentifierList(rest)), - - "subject" => Subject(Unstructured(rest)), - "comments" => Comments(Unstructured(rest)), - "keywords" => Keywords(PhraseList(rest)), - - "return-path" => ReturnPath(Mailbox(rest)), - "received" => Received(ReceivedLog(rest)), - - "content-type" => MIME(MIMEField::ContentType(Type(rest))), - "content-transfer-encoding" => MIME(MIMEField::ContentTransferEncoding(Mechanism(rest))), - "content-id" => MIME(MIMEField::ContentID(Identifier(rest))), - "content-description" => MIME(MIMEField::ContentDescription(Unstructured(rest))), - - "mime-version" => MIMEVersion(Version(rest)), - _ => Optional(name, Unstructured(rest)), - }, - ) - }) -} - -fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> { - use MIMEField::*; - field_name(input).map(|(rest, name)| { - ( - "", - match name.to_lowercase().as_ref() { - "content-type" => ContentType(Type(rest)), - "content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)), - "content-id" => ContentID(Identifier(rest)), - "content-description" => ContentDescription(Unstructured(rest)), - _ => Optional(name, Unstructured(rest)), - } - ) - }) -}