diff --git a/src/header.rs b/src/header.rs index 084491b..88bbde8 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,38 +1,31 @@ - -use crate::text::misc_token::{unstructured, Unstructured}; -use crate::text::whitespace::{foldable_line, obs_crlf}; use nom::{ branch::alt, - bytes::complete::{tag, tag_no_case, take_while1}, + bytes::complete::{tag, take_while1}, character::complete::space0, - combinator::{into, map}, - multi::{fold_many0, many0}, + combinator::{into, recognize}, + multi::many0, sequence::{pair, terminated, tuple}, IResult, }; -#[derive(Debug, PartialEq)] -pub enum CompField<'a, T> { - Known(T), - Unknown(Kv<'a>), - Bad(&'a [u8]), -} +use crate::text::whitespace::{foldable_line, obs_crlf}; +use crate::text::misc_token::unstructured; #[derive(Debug, PartialEq, Clone)] -pub struct Kv<'a>(pub &'a [u8], pub Unstructured<'a>); -impl<'a> From<(&'a [u8], Unstructured<'a>)> for Kv<'a> { - fn from(pair: (&'a [u8], Unstructured<'a>)) -> Self { +pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]); +impl<'a> From<(&'a [u8], &'a [u8])> for Kv2<'a> { + fn from(pair: (&'a [u8], &'a [u8])) -> Self { Self(pair.0, pair.1) } } #[derive(Debug, PartialEq, Clone)] pub enum Field<'a> { - Good(Kv<'a>), + Good(Kv2<'a>), Bad(&'a [u8]), } -impl<'a> From> for Field<'a> { - fn from(kv: Kv<'a>) -> Self { +impl<'a> From> for Field<'a> { + fn from(kv: Kv2<'a>) -> Self { Self::Good(kv) } } @@ -47,7 +40,7 @@ pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec> { terminated( many0( alt(( - into(opt_field), + into(correct_field), into(foldable_line), )) ), @@ -55,37 +48,6 @@ pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec> { )(input) } - -pub fn header<'a, T>( - fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy, -) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], (Vec::, Vec::, Vec<&'a [u8]>)> { - move |input| { - terminated( - fold_many0( - alt(( - map(fx, CompField::Known), - map(opt_field, CompField::Unknown), - map(foldable_line, CompField::Bad), - )), - || (Vec::::new(), Vec::::new(), Vec::<&'a [u8]>::new()), - |(mut known, mut unknown, mut bad), item| { - match item { - CompField::Known(v) => known.push(v), - CompField::Unknown(v) => unknown.push(v), - CompField::Bad(v) => bad.push(v), - }; - (known, unknown, bad) - } - ), - obs_crlf, - )(input) - } -} - -pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { - move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input) -} - pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { terminated( take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A), @@ -102,11 +64,11 @@ pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { /// %d59-126 ; characters not including /// ; ":". /// ``` -pub fn opt_field(input: &[u8]) -> IResult<&[u8], Kv> { +pub fn correct_field(input: &[u8]) -> IResult<&[u8], Kv2> { terminated( into(pair( field_any, - unstructured, + recognize(unstructured), )), obs_crlf, )(input) diff --git a/src/imf/field.rs b/src/imf/field.rs index 976bf4f..17a48f9 100644 --- a/src/imf/field.rs +++ b/src/imf/field.rs @@ -1,12 +1,7 @@ use chrono::{DateTime, FixedOffset}; -use nom::{ - branch::alt, - combinator::map, - sequence::{preceded, terminated}, - IResult, -}; +use nom::combinator::map; -use crate::header::{field_name}; +use crate::header; use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList}; use crate::imf::datetime::section as date; use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList}; @@ -14,7 +9,6 @@ use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef}; use crate::imf::mime::{version, Version}; use crate::imf::trace::{received_log, return_path, ReceivedLog}; use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured}; -use crate::text::whitespace::obs_crlf; #[derive(Debug, PartialEq)] pub enum Field<'a> { @@ -48,36 +42,32 @@ pub enum Field<'a> { MIMEVersion(Version), } -/*impl<'a> From> for Field<'a> { - fn from(raw: header::Field -}*/ +impl<'a> TryFrom<&header::Field<'a>> for Field<'a> { + type Error = (); + fn try_from(f: &header::Field<'a>) -> Result { + let content = match f { + header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() { + b"date" => map(date, Field::Date)(value), + b"from" => map(mailbox_list, Field::From)(value), + b"sender" => map(mailbox, Field::Sender)(value), + b"reply-to" => map(address_list, Field::ReplyTo)(value), + b"to" => map(address_list, Field::To)(value), + b"cc" => map(address_list, Field::Cc)(value), + b"bcc" => map(nullable_address_list, Field::Bcc)(value), + b"message-id" => map(msg_id, Field::MessageID)(value), + b"in-reply-to" => map(msg_list, Field::InReplyTo)(value), + b"references" => map(msg_list, Field::References)(value), + b"subject" => map(unstructured, Field::Subject)(value), + b"comments" => map(unstructured, Field::Comments)(value), + b"keywords" => map(phrase_list, Field::Keywords)(value), + b"return-path" => map(return_path, Field::ReturnPath)(value), + b"received" => map(received_log, Field::Received)(value), + b"mime-version" => map(version, Field::MIMEVersion)(value), + _ => return Err(()), + }, + _ => return Err(()), + }; -pub fn field(input: &[u8]) -> IResult<&[u8], Field> { - terminated( - alt(( - preceded(field_name(b"date"), map(date, Field::Date)), - preceded(field_name(b"from"), map(mailbox_list, Field::From)), - preceded(field_name(b"sender"), map(mailbox, Field::Sender)), - preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)), - preceded(field_name(b"to"), map(address_list, Field::To)), - preceded(field_name(b"cc"), map(address_list, Field::Cc)), - preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)), - preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)), - preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)), - preceded(field_name(b"references"), map(msg_list, Field::References)), - preceded(field_name(b"subject"), map(unstructured, Field::Subject)), - preceded(field_name(b"comments"), map(unstructured, Field::Comments)), - preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)), - preceded( - field_name(b"return-path"), - map(return_path, Field::ReturnPath), - ), - preceded(field_name(b"received"), map(received_log, Field::Received)), - preceded( - field_name(b"mime-version"), - map(version, Field::MIMEVersion), - ), - )), - obs_crlf, - )(input) + content.map(|(_, content)| content).or(Err(())) + } } diff --git a/src/imf/mod.rs b/src/imf/mod.rs index 51f1268..bd7e24a 100644 --- a/src/imf/mod.rs +++ b/src/imf/mod.rs @@ -13,14 +13,13 @@ use nom::{ IResult, }; -use crate::header::header; +use crate::header; use crate::imf::address::AddressRef; -use crate::imf::field::{field, Field}; +use crate::imf::field::Field; use crate::imf::identification::MessageID; use crate::imf::mailbox::{AddrSpec, MailboxRef}; use crate::imf::mime::Version; use crate::imf::trace::ReceivedLog; -use crate::header; use crate::text::misc_token::{PhraseList, Unstructured}; use chrono::{DateTime, FixedOffset}; @@ -56,19 +55,6 @@ pub struct Imf<'a> { // MIME pub mime_version: Option, - - // Junk - pub header_ext: Vec>, - pub header_bad: Vec<&'a [u8]>, -} - -impl<'a> Imf<'a> { - pub fn with_opt(mut self, opt: Vec>) -> Self { - self.header_ext = opt; self - } - pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self { - self.header_bad = bad; self - } } //@FIXME min and max limits are not enforced, @@ -100,11 +86,8 @@ impl<'a> FromIterator> for Imf<'a> { } pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> { - map(header(field), |(known, unknown, bad)| { - let mut imf = Imf::from_iter(known); - imf.header_ext = unknown; - imf.header_bad = bad; - imf + map(header::header_kv, |fields| { + fields.iter().flat_map(Field::try_from).into_iter().collect::() })(input) } diff --git a/src/mime/field.rs b/src/mime/field.rs index b11210d..2489516 100644 --- a/src/mime/field.rs +++ b/src/mime/field.rs @@ -1,16 +1,10 @@ -use nom::{ - branch::alt, - combinator::map, - sequence::{preceded, terminated}, - IResult, -}; +use nom::combinator::map; -use crate::header::{field_name}; +use crate::header; use crate::imf::identification::{msg_id, MessageID}; use crate::mime::mechanism::{mechanism, Mechanism}; use crate::mime::r#type::{naive_type, NaiveType}; use crate::text::misc_token::{unstructured, Unstructured}; -use crate::text::whitespace::obs_crlf; #[derive(Debug, PartialEq)] pub enum Content<'a> { @@ -47,38 +41,35 @@ impl<'a> Content<'a> { } } -/* -pub fn to_mime<'a, T: WithDefaultType>(list: Vec>) -> AnyMIMEWithDefault<'a, T> { - list.into_iter().collect::>() -}*/ +impl<'a> TryFrom<&header::Field<'a>> for Content<'a> { + type Error = (); + fn try_from(f: &header::Field<'a>) -> Result { + let content = match f { + header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() { + b"content-type" => map(naive_type, Content::Type)(value), + b"content-transfer-encoding" => map(mechanism, Content::TransferEncoding)(value), + b"content-id" => map(msg_id, Content::ID)(value), + b"content-description" => map(unstructured, Content::Description)(value), + _ => return Err(()), + }, + _ => return Err(()), + }; -pub fn content(input: &[u8]) -> IResult<&[u8], Content> { - terminated( - alt(( - preceded(field_name(b"content-type"), map(naive_type, Content::Type)), - preceded( - field_name(b"content-transfer-encoding"), - map(mechanism, Content::TransferEncoding), - ), - preceded(field_name(b"content-id"), map(msg_id, Content::ID)), - preceded( - field_name(b"content-description"), - map(unstructured, Content::Description), - ), - )), - obs_crlf, - )(input) + //@TODO check that the full value is parsed, otherwise maybe log an error ?! + content.map(|(_, content)| content).or(Err(())) + } } #[cfg(test)] mod tests { use super::*; - use crate::header::{header}; - use crate::mime::charset::EmailCharset; + use crate::header; + //use crate::mime::charset::EmailCharset; use crate::mime::r#type::*; use crate::text::misc_token::MIMEWord; use crate::text::quoted::QuotedString; + /* #[test] fn test_content_type() { let (rest, content) = @@ -96,7 +87,7 @@ mod tests { } else { panic!("Expected Content::Type, got {:?}", content); } - } + }*/ #[test] fn test_header() { @@ -116,7 +107,7 @@ This is a multipart message. .as_bytes(); assert_eq!( - map(header(content), |(k, _, _)| k)(fullmail), + map(header::header_kv, |k| k.iter().flat_map(Content::try_from).collect())(fullmail), Ok(( &b"This is a multipart message.\n\n"[..], vec![ diff --git a/src/mime/mod.rs b/src/mime/mod.rs index 2dd0c21..b7de69d 100644 --- a/src/mime/mod.rs +++ b/src/mime/mod.rs @@ -61,8 +61,7 @@ pub struct NaiveMIME<'a> { pub transfer_encoding: Mechanism<'a>, pub id: Option>, pub description: Option>, - pub header_ext: Vec>, - pub header_bad: Vec<&'a [u8]>, + pub fields: Vec>, pub raw: &'a [u8], } @@ -84,11 +83,8 @@ impl<'a> FromIterator> for NaiveMIME<'a> { } impl<'a> NaiveMIME<'a> { - pub fn with_opt(mut self, opt: Vec>) -> Self { - self.header_ext = opt; self - } - pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self { - self.header_bad = bad; self + pub fn with_fields(mut self, fields: Vec>) -> Self { + self.fields = fields; self } pub fn with_raw(mut self, raw: &'a [u8]) -> Self { self.raw = raw; self diff --git a/src/part/composite.rs b/src/part/composite.rs index 373ffd8..bd7231b 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -1,9 +1,9 @@ use nom::IResult; -use crate::header::{header, self}; +use crate::header; use crate::imf; use crate::mime; -use crate::part::{self, AnyPart, field::MixedField}; +use crate::part::{self, AnyPart}; use crate::text::boundary::{boundary, Delimiter}; use crate::pointers; @@ -73,15 +73,19 @@ pub fn multipart<'a>( }; // parse mime headers, otherwise pick default mime - let (input, naive_mime) = match header(mime::field::content)(input) { - Ok((input_eom, (known, unknown, bad))) => { + let (input, naive_mime) = match header::header_kv(input) { + Ok((input_eom, fields)) => { let raw_hdrs = pointers::parsed(input, input_eom); - let mime = known + let mime = fields + .iter() + .flat_map(mime::field::Content::try_from) .into_iter() - .collect::() - .with_opt(unknown) - .with_bad(bad) + .collect::(); + + let mime = mime + .with_fields(fields) .with_raw(raw_hdrs); + (input_eom, mime) }, Err(_) => (input, mime::NaiveMIME::default()), @@ -127,23 +131,21 @@ pub fn message<'a>( let orig = input; // parse header fields - let (input, (known, unknown, bad)): (_, (Vec::, Vec, Vec<&[u8]>)) = - header(part::field::mixed_field)(input)?; + let (input, headers) = header::header_kv(input)?; // extract raw parts 1/2 let raw_headers = pointers::parsed(orig, input); let body_orig = input; + //--------------- // aggregate header fields - let (naive_mime, imf) = part::field::sections(known); - - // attach bad headers to imf - let imf = imf.with_opt(unknown).with_bad(bad); + let (naive_mime, imf) = part::field::split_and_build(&headers); // interpret headers to choose a mime type - let in_mime = naive_mime.with_raw(raw_headers).to_interpreted::().into(); + let in_mime = naive_mime.with_fields(headers).with_raw(raw_headers).to_interpreted::().into(); + //--------------- - // parse this mimetype + // parse a part following this mime specification let (input, part) = part::anypart(in_mime)(input)?; // extract raw parts 2/2 @@ -459,15 +461,6 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
right: &b"www.grrrndzero.org"[..], }), mime_version: Some(imf::mime::Version { major: 1, minor: 0}), - header_ext: vec![ - header::Kv(&b"X-Unknown"[..], Unstructured(vec![ - UnstrToken::Plain(&b"something"[..]), - UnstrToken::Plain(&b"something"[..]), - ])) - ], - header_bad: vec![ - &b"Bad entry\n on multiple lines\n"[..], - ], ..imf::Imf::default() }, child: Box::new(AnyPart::Mult(Multipart { diff --git a/src/part/field.rs b/src/part/field.rs index 3f4a6dc..2717efd 100644 --- a/src/part/field.rs +++ b/src/part/field.rs @@ -1,50 +1,21 @@ -use nom::{branch::alt, combinator::map, IResult}; - +use crate::header; use crate::imf; use crate::mime; -pub enum MixedField<'a> { - MIME(mime::field::Content<'a>), - IMF(imf::field::Field<'a>), -} -#[allow(dead_code)] -impl<'a> MixedField<'a> { - pub fn mime(&self) -> Option<&mime::field::Content<'a>> { - match self { - Self::MIME(v) => Some(v), - _ => None, +pub fn split_and_build<'a>(v: &Vec>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) { + let (mimev, imfv) = v.iter().fold( + (Vec::::new(), Vec::::new()), + |(mut mime, mut imf), f| { + if let Ok(m) = mime::field::Content::try_from(f) { + mime.push(m); + } else if let Ok(i) = imf::field::Field::try_from(f) { + imf.push(i); + } + (mime, imf) } - } - pub fn to_mime(self) -> Option> { - match self { - Self::MIME(v) => Some(v), - _ => None, - } - } - pub fn imf(&self) -> Option<&imf::field::Field<'a>> { - match self { - Self::IMF(v) => Some(v), - _ => None, - } - } - pub fn to_imf(self) -> Option> { - match self { - Self::IMF(v) => Some(v), - _ => None, - } - } -} - -pub fn sections<'a>(list: Vec>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) { - let (v1, v2): (Vec, Vec<_>) = list.into_iter().partition(|v| v.mime().is_some()); - let mime = v1.into_iter().flat_map(MixedField::to_mime).collect::(); - let imf = v2.into_iter().flat_map(MixedField::to_imf).collect::(); - (mime, imf) -} - -pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> { - alt(( - map(mime::field::content, MixedField::MIME), - map(imf::field::field, MixedField::IMF), - ))(input) + ); + + let fmime = mimev.into_iter().collect::(); + let fimf = imfv.into_iter().collect::(); + (fmime, fimf) }