From b7ad47b41e91ced222e58be675f741c8db1f3f67 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Thu, 20 Jul 2023 09:41:10 +0200 Subject: [PATCH] working field parsing --- src/rfc5322/address.rs | 4 +- src/rfc5322/field.rs | 96 +++++++++++++++++++++-------------- src/rfc5322/identification.rs | 13 ++--- src/rfc5322/trace.rs | 2 +- src/text/misc_token.rs | 31 ++--------- 5 files changed, 72 insertions(+), 74 deletions(-) diff --git a/src/rfc5322/address.rs b/src/rfc5322/address.rs index cd18fae..3ca7863 100644 --- a/src/rfc5322/address.rs +++ b/src/rfc5322/address.rs @@ -1,7 +1,7 @@ use nom::{ branch::alt, bytes::complete::tag, - combinator::{into, opt}, + combinator::{into, map, opt}, multi::separated_list1, sequence::tuple, IResult, @@ -140,7 +140,7 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec> { Ok((input, vec![])) } -pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<>> { +pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec> { map( opt(alt((address_list, address_list_cfws))), |v| v.unwrap_or(vec![]), diff --git a/src/rfc5322/field.rs b/src/rfc5322/field.rs index e3c974c..5603c1c 100644 --- a/src/rfc5322/field.rs +++ b/src/rfc5322/field.rs @@ -1,32 +1,40 @@ +use chrono::{DateTime, FixedOffset}; use nom::{ IResult, + branch::alt, + bytes::complete::{tag, tag_no_case, take_while1}, + character::complete::space0, + combinator::map, + sequence::{pair, preceded, terminated, tuple}, }; -use crate::rfc5322::address::{MailboxList, AddressList}; -use crate::rfc5322::mailbox::MailboxRef; -use crate::rfc5322::identification::{MessageId, MessageIdList}; -use crate::rfc5322::trace::ReceivedLog; -use crate::text::misc_token::{Unstructured, PhraseList}; +use crate::rfc5322::address::{AddressList, address_list, nullable_address_list, mailbox_list}; +use crate::rfc5322::datetime::section as date; +use crate::rfc5322::mailbox::{MailboxRef, MailboxList, AddrSpec, mailbox}; +use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list}; +use crate::rfc5322::trace::{ReceivedLog, return_path, received_log}; +use crate::rfc5322::mime::{Version, version}; +use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list}; #[derive(Debug, PartialEq)] pub enum Field<'a> { // 3.6.1. The Origination Date Field - Date(DateTime<'a>), + Date(Option>), // 3.6.2. Originator Fields From(MailboxList<'a>), - Sender(Mailbox<'a>), + Sender(MailboxRef<'a>), ReplyTo(AddressList<'a>), // 3.6.3. Destination Address Fields To(AddressList<'a>), Cc(AddressList<'a>), - Bcc(NullableAddressList<'a>), + Bcc(AddressList<'a>), // 3.6.4. Identification Fields - MessageID(Identifier<'a>), - InReplyTo(IdentifierList<'a>), - References(IdentifierList<'a>), + MessageID(MessageID<'a>), + InReplyTo(MessageIDList<'a>), + References(MessageIDList<'a>), // 3.6.5. Informational Fields Subject(Unstructured<'a>), @@ -38,39 +46,46 @@ pub enum Field<'a> { Received(ReceivedLog<'a>), ReturnPath(Option>), - MIMEVersion(Version<'a>), - Optional(&'a [u8], Unstructured<'a>), + MIMEVersion(Version), } -pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> { - let (name, rest) = field_name(input)?; - match name.to_lowercase().as_ref() { - "date" => datetime::section(rest).map(Field::Date), - "from" => mailbox_list(rest).map(Field::From), - "sender" => mailbox(rest).map(Field::Sender), - "reply-to" => address_list(rest).map(Field::ReplyTo), +pub fn field(input: &[u8]) -> IResult<&[u8], Field> { + alt(( + preceded(field_name(b"date"), map(date, Field::Date)), - "to" => address_list(rest).map(Field::To), - "cc" => address_list(rest).map(Field::Cc), - "bcc" => nullable_address_list(rest).map(Field::Bcc), + preceded(field_name(b"from"), map(mailbox_list, Field::From)), + preceded(field_name(b"sender"), map(mailbox, Field::Sender)), + preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)), - "message-id" => msg_id(rest).map(Field::MessageID), - "in-reply-to" => msg_list(rest).map(Field::InReplyTo), - "references" => msg_list(rest).map(Field::References), + preceded(field_name(b"to"), map(address_list, Field::To)), + preceded(field_name(b"cc"), map(address_list, Field::Cc)), + preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)), - "subject" => unstructured(rest).map(Field::Subject), - "comments" => unstructured(rest).map(Field::Comments), - "keywords" => phrase_list(rest).map(Field::Keywords), + preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)), + preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)), + preceded(field_name(b"references"), map(msg_list, Field::References)), - "return-path" => return_path(rest).map(Field::ReturnPath), - "received" => received_log(rest).map(Field::ReceivedLog), + preceded(field_name(b"subject"), map(unstructured, Field::Subject)), + preceded(field_name(b"comments"), map(unstructured, Field::Comments)), + preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)), - "mime-version" => version(rest).map(Field::MIMEVersion), - _ => unstructured(rest).map(|v| Field::Optional(name, v)), + preceded(field_name(b"return-path"), map(return_path, Field::ReturnPath)), + preceded(field_name(b"received"), map(received_log, Field::Received)), + + preceded(field_name(b"mime-version"), map(version, Field::MIMEVersion)), + ))(input) +} + + +fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { + move |input| { + terminated( + tag_no_case(name), + tuple((space0, tag(b":"), space0)), + )(input) } } - /// Optional field /// /// ```abnf @@ -80,9 +95,12 @@ pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> { /// %d59-126 ; characters not including /// ; ":". /// ``` -fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> { - terminated( - take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A), - tuple((space0, tag(b":"), space0)), +fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> { + pair( + terminated( + take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A), + tuple((space0, tag(b":"), space0)), + ), + unstructured, )(input) -} +} diff --git a/src/rfc5322/identification.rs b/src/rfc5322/identification.rs index 6164e9e..3d96b69 100644 --- a/src/rfc5322/identification.rs +++ b/src/rfc5322/identification.rs @@ -2,6 +2,7 @@ use nom::{ branch::alt, bytes::complete::{tag, take_while}, combinator::opt, + multi::many1, sequence::{delimited, pair, tuple}, IResult, }; @@ -12,27 +13,27 @@ use crate::text::words::dot_atom_text; #[derive(Debug, PartialEq)] -pub struct MessageId<'a> { +pub struct MessageID<'a> { pub left: &'a [u8], pub right: &'a [u8], } -pub type MessageIdList<'a> = Vec>; +pub type MessageIDList<'a> = Vec>; /// Message identifier /// /// ```abnf /// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] /// ``` -pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageId> { +pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageID> { let (input, (left, _, right)) = delimited( pair(opt(cfws), tag("<")), tuple((id_left, tag("@"), id_right)), pair(tag(">"), opt(cfws)), )(input)?; - Ok((input, MessageId { left, right })) + Ok((input, MessageID { left, right })) } -pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIdList> { +pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIDList> { many1(msg_id)(input) } @@ -60,7 +61,7 @@ mod tests { msg_id(b"<5678.21-Nov-1997@example.com>"), Ok(( &b""[..], - MessageId { + MessageID { left: &b"5678.21-Nov-1997"[..], right: &b"example.com"[..], } diff --git a/src/rfc5322/trace.rs b/src/rfc5322/trace.rs index 6465cbc..55947be 100644 --- a/src/rfc5322/trace.rs +++ b/src/rfc5322/trace.rs @@ -85,7 +85,7 @@ mod tests { for ; Tue, 13 Jun 2023 19:01:08 +0000"#.as_bytes(); assert_eq!( - received_body(hdrs), + received_log(hdrs), Ok(( &b""[..], ReceivedLog { diff --git a/src/text/misc_token.rs b/src/text/misc_token.rs index c8a4291..f6983d8 100644 --- a/src/text/misc_token.rs +++ b/src/text/misc_token.rs @@ -1,9 +1,9 @@ use nom::{ branch::alt, - bytes::complete::take_while1, + bytes::complete::{tag, take_while1}, character::complete::space0, combinator::{map, opt}, - multi::{many0, many1}, + multi::{many0, many1, separated_list1}, sequence::{preceded}, IResult, }; @@ -17,32 +17,11 @@ use crate::text::{ }; #[derive(Debug, PartialEq, Default)] -pub struct PhraseList(pub Vec); -pub fn phrase_list(input: &'a [u8]) -> IResult<&[u8], PhraseList> { - separated_list1(tag(","), phrase)(input) +pub struct PhraseList<'a>(pub Vec>); +pub fn phrase_list(input: &[u8]) -> IResult<&[u8], PhraseList> { + map(separated_list1(tag(","), phrase), PhraseList)(input) } -/* -impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured { - type Error = IMFError<'a>; - - fn try_from(input: &'a lazy::Unstructured<'a>) -> Result { - unstructured(input.0) - .map(|(_, v)| Unstructured(v)) - .map_err(|e| IMFError::Unstructured(e)) - } -} - -impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList { - type Error = IMFError<'a>; - - fn try_from(p: &'a lazy::PhraseList<'a>) -> Result { - separated_list1(tag(","), phrase)(p.0) - .map(|(_, q)| PhraseList(q)) - .map_err(|e| IMFError::PhraseList(e)) - } -}*/ - #[derive(Debug, PartialEq)] pub enum Word<'a> { Quoted(QuotedString<'a>),