diff --git a/src/header.rs b/src/header.rs index bc9e300..ccb0b2f 100644 --- a/src/header.rs +++ b/src/header.rs @@ -28,8 +28,8 @@ impl<'a, T> CompFieldList<'a, T> { } } -pub fn header<'a, T>(fx: impl Fn(&'a [u8]) -> IResult<&[u8], T> + Copy) - -> impl Fn(&'a [u8]) -> IResult<&[u8], CompFieldList> +pub fn header<'a, T>(fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy) + -> impl Fn(&'a [u8]) -> IResult<&'a [u8], CompFieldList> { move |input| map(terminated(many0(alt(( map(fx, CompField::Known), diff --git a/src/lib.rs b/src/lib.rs index 4740fc6..c74f438 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,3 +3,4 @@ pub mod text; pub mod header; pub mod rfc5322; pub mod mime; +pub mod part; diff --git a/src/mime/charset.rs b/src/mime/charset.rs index bb49765..02ab8e1 100644 --- a/src/mime/charset.rs +++ b/src/mime/charset.rs @@ -7,7 +7,7 @@ use encoding_rs::Encoding; /// using encoding_rs datastructures directly would lead to a loss of information. /// https://www.iana.org/assignments/character-sets/character-sets.xhtml #[allow(non_camel_case_types)] -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, PartialEq, Default, Clone)] pub enum EmailCharset { #[default] US_ASCII, diff --git a/src/mime/mime.rs b/src/mime/mime.rs index bbeea0b..d8db0f6 100644 --- a/src/mime/mime.rs +++ b/src/mime/mime.rs @@ -3,12 +3,20 @@ use crate::rfc5322::identification::MessageID; use crate::text::misc_token::Unstructured; use crate::mime::field::Content; use crate::mime::r#type::{AnyType, self as ctype}; //Multipart, Message, Text, Binary}; - // + +#[derive(Debug, PartialEq, Clone)] pub struct Multipart<'a>(pub ctype::Multipart, Generic<'a>); + +#[derive(Debug, PartialEq, Clone)] pub struct Message<'a>(pub ctype::Message, Generic<'a>); + +#[derive(Debug, PartialEq, Clone, Default)] pub struct Text<'a>(pub ctype::Text, Generic<'a>); + +#[derive(Debug, PartialEq, Clone)] pub struct Binary<'a>(pub ctype::Binary, Generic<'a>); +#[derive(Debug, PartialEq, Clone)] pub enum AnyMIME<'a> { Mult(Multipart<'a>), Msg(Message<'a>), @@ -47,7 +55,7 @@ impl<'a> FromIterator> for AnyMIME<'a> { } } -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, PartialEq, Default, Clone)] pub struct Generic<'a> { pub transfer_encoding: Mechanism<'a>, pub id: Option>, diff --git a/src/mime/type.rs b/src/mime/type.rs index fbb622a..e038a17 100644 --- a/src/mime/type.rs +++ b/src/mime/type.rs @@ -68,7 +68,7 @@ impl<'a> From<&'a NaiveType<'a>> for AnyType { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct Multipart { pub subtype: MultipartSubtype, pub boundary: String, @@ -87,7 +87,7 @@ impl<'a> TryFrom<&'a NaiveType<'a>> for Multipart { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum MultipartSubtype { Alternative, Mixed, @@ -109,7 +109,7 @@ impl<'a> From<&NaiveType<'a>> for MultipartSubtype { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum Message { RFC822, Partial, @@ -127,7 +127,7 @@ impl<'a> From<&NaiveType<'a>> for Message { } } -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, PartialEq, Default, Clone)] pub struct Text { pub subtype: TextSubtype, pub charset: EmailCharset, @@ -144,7 +144,7 @@ impl<'a> From<&NaiveType<'a>> for Text { } } -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, PartialEq, Default, Clone)] pub enum TextSubtype { #[default] Plain, @@ -161,7 +161,7 @@ impl<'a> From<&NaiveType<'a>> for TextSubtype { } } -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, PartialEq, Default, Clone)] pub struct Binary {} #[cfg(test)] diff --git a/src/part/part.rs b/src/part/part.rs index f72003b..ffc4a05 100644 --- a/src/part/part.rs +++ b/src/part/part.rs @@ -1,22 +1,26 @@ use nom::{ IResult, branch::alt, - bytes::complete::{is_not, tag}, + bytes::complete::{is_not}, multi::many0, - sequence::{pair, preceded, tuple}, - combinator::{not, opt, recognize}, + sequence::{pair}, + combinator::{map, not, recognize}, }; -use crate::mime::r#type as ctype; -use crate::mime::mime; +use crate::mime; +use crate::mime::mime::{AnyMIME}; use crate::rfc5322::{self as imf}; +use crate::text::boundary::{Delimiter, boundary}; +use crate::text::whitespace::obs_crlf; +use crate::text::ascii::CRLF; +use crate::header::{header, CompFieldList}; -pub struct Multipart(pub mime::Multipart, pub Vec>); -pub struct Message(pub mime::Message, pub imf::message::Message, pub Part<'a>); -pub struct Text(pub mime::Text, pub &'a [u8]); -pub struct Binary(pub mime::Binary, pub &'a [u8]); +pub struct Multipart<'a>(pub mime::mime::Multipart<'a>, pub Vec>); +pub struct Message<'a>(pub mime::mime::Message<'a>, pub imf::message::Message<'a>, pub Box>); +pub struct Text<'a>(pub mime::mime::Text<'a>, pub &'a [u8]); +pub struct Binary<'a>(pub mime::mime::Binary<'a>, pub &'a [u8]); -pub struct AnyPart<'a> { +pub enum AnyPart<'a> { Mult(Multipart<'a>), Msg(Message<'a>), Txt(Text<'a>), @@ -24,89 +28,107 @@ pub struct AnyPart<'a> { } pub enum MixedField<'a> { - MIME(mime::fields::Content<'a>), - IMF(rfc5322::fields::Field<'a>), + MIME(mime::field::Content<'a>), + IMF(imf::field::Field<'a>), } impl<'a> MixedField<'a> { - pub fn mime(&self) -> Option<&mime::fields::Content<'a>> { + pub fn mime(&self) -> Option<&mime::field::Content<'a>> { match self { - MIME(v) => Some(v), + Self::MIME(v) => Some(v), _ => None, } } - pub fn imf(&self) -> Option<&rfc5322::fields::Field<'a>> { + pub fn to_mime(self) -> Option> { match self { - IMF(v) => Some(v), + Self::MIME(v) => Some(v), + _ => None, + } + } + pub fn imf(&self) -> Option<&imf::field::Field<'a>> { + match self { + Self::IMF(v) => Some(v), + _ => None, + } + } + pub fn to_imf(self) -> Option> { + match self { + Self::IMF(v) => Some(v), _ => None, } } } -impl<'a, MixedField> CompFieldList<'a, MixedField> { +impl<'a> CompFieldList<'a, MixedField<'a>> { pub fn sections(self) -> (mime::mime::AnyMIME<'a>, imf::message::Message<'a>) { let k = self.known(); - let mime = k.iter().map(MixedField::mime).flatten().collect::(); - let imf = k.iter().map(MixedField::imf).flatten().collect::(); + let (v1, v2): (Vec, Vec) = k.into_iter().partition(|v| v.mime().is_some()); + let mime = v1.into_iter().map(|v| v.to_mime()).flatten().collect::(); + let imf = v2.into_iter().map(|v| v.to_imf()).flatten().collect::(); (mime, imf) } } pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> { alt(( - map(mime::fields::content, MixedField::MIME), - map(rfc5322::fields::field, MixedField::IMF), + map(mime::field::content, MixedField::MIME), + map(imf::field::field, MixedField::IMF), ))(input) } -pub fn message<'a>(m: mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> { +pub fn message<'a>(m: mime::mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> { move |input: &[u8]| { let (input, fields) = header(mixed_field)(input)?; let (in_mime, imf) = fields.sections(); let part = to_anypart(in_mime, input); - Ok((&b[], Message(m, imf, part))) + Ok((&[], Message(m.clone(), imf, Box::new(part)))) } } -pub fn multipart<'a>(m: mime::Multipart<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> { - move |input: &[u8]| { - let (mut input_loop, _) = part_raw(m.ctype.boundary)(input)?; +pub fn multipart<'a>(m: mime::mime::Multipart<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> { + let m = m.clone(); + + move |input| { + let bound = m.0.boundary.as_bytes(); + let (mut input_loop, _) = part_raw(bound)(input)?; let mut mparts: Vec = vec![]; loop { - let input = match boundary(m.ctype.boundary)(input_loop) { - Err(_) => return Ok((input_loop, Multipart(m, mparts))), - Ok((inp, Delimiter::Last)) => return Ok((inp, Multipart(m, mparts))), + let input = match boundary(bound)(input_loop) { + Err(_) => return Ok((input_loop, Multipart(m.clone(), mparts))), + Ok((inp, Delimiter::Last)) => return Ok((inp, Multipart(m.clone(), mparts))), Ok((inp, Delimiter::Next)) => inp, }; // parse mime headers - let (input, fields) = header(content)(input)?; + let (input, fields) = header(mime::field::content)(input)?; let mime = fields.to_mime(); // parse raw part - let (input, rpart) = part_raw(ctype.boundary.as_bytes())(input)?; + let (input, rpart) = part_raw(bound)(input)?; // parse mime body - mparts.push(to_anypart(mime, rpart); + mparts.push(to_anypart(mime, rpart)); input_loop = input; } } } -pub fn to_anypart(m: AnyMIME<'a>, rpart: &[u8]) -> AnyPart<'a> { - match mime { +pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> { + match m { AnyMIME::Mult(a) => map(multipart(a), AnyPart::Mult)(rpart) - .unwrap_or(AnyPart::Text(Text::default(), rpart)), + .map(|v| v.1) + .unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))), AnyMIME::Msg(a) => map(message(a), AnyPart::Msg)(rpart) - .unwrap_or(AnyPart::Text(Text::default(), rpart)), + .map(|v| v.1) + .unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))), AnyMIME::Txt(a) => AnyPart::Txt(Text(a, rpart)), AnyMIME::Bin(a) => AnyPart::Bin(Binary(a, rpart)), } } -pub fn part_raw<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { - move |input: &[u8]| { +pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> + '_ { + move |input| { recognize(many0(pair( not(boundary(bound)), alt((is_not(CRLF), obs_crlf)), diff --git a/src/rfc5322/identification.rs b/src/rfc5322/identification.rs index 3d96b69..a0f7efb 100644 --- a/src/rfc5322/identification.rs +++ b/src/rfc5322/identification.rs @@ -12,7 +12,7 @@ use crate::text::whitespace::cfws; use crate::text::words::dot_atom_text; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct MessageID<'a> { pub left: &'a [u8], pub right: &'a [u8], diff --git a/src/text/boundary.rs b/src/text/boundary.rs index eb66b58..98dba05 100644 --- a/src/text/boundary.rs +++ b/src/text/boundary.rs @@ -13,7 +13,7 @@ pub enum Delimiter { Last } -pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> { +pub fn boundary<'a>(boundary: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> + '_ { move |input: &[u8]| { let (rest, (_, _, _, last, _)) = tuple((opt(obs_crlf), tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?; match last { diff --git a/src/text/encoding.rs b/src/text/encoding.rs index 711f1b8..d5aca55 100644 --- a/src/text/encoding.rs +++ b/src/text/encoding.rs @@ -43,7 +43,7 @@ pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> { Ok((rest, parsed)) } -#[derive(PartialEq,Debug)] +#[derive(PartialEq,Debug, Clone)] pub enum EncodedWord<'a> { Quoted(QuotedWord<'a>), Base64(Base64Word<'a>), @@ -57,7 +57,7 @@ impl<'a> EncodedWord<'a> { } } -#[derive(PartialEq,Debug)] +#[derive(PartialEq,Debug,Clone)] pub struct Base64Word<'a> { pub enc: &'static Encoding, pub content: &'a [u8], @@ -72,7 +72,7 @@ impl<'a> Base64Word<'a> { } } -#[derive(PartialEq,Debug)] +#[derive(PartialEq,Debug,Clone)] pub struct QuotedWord<'a> { pub enc: &'static Encoding, pub chunks: Vec>, @@ -100,7 +100,7 @@ impl<'a> QuotedWord<'a> { } } -#[derive(PartialEq,Debug)] +#[derive(PartialEq,Debug,Clone)] pub enum QuotedChunk<'a> { Safe(&'a [u8]), Encoded(u8), diff --git a/src/text/misc_token.rs b/src/text/misc_token.rs index b3e0708..55bac5b 100644 --- a/src/text/misc_token.rs +++ b/src/text/misc_token.rs @@ -105,7 +105,7 @@ fn is_unstructured(c: u8) -> bool { is_vchar(c) || is_obs_no_ws_ctl(c) || c == ascii::NULL } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum UnstrToken<'a> { Init, Encoded(encoding::EncodedWord<'a>), @@ -122,7 +122,7 @@ impl<'a> UnstrToken<'a> { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct Unstructured<'a>(pub Vec>); impl<'a> Unstructured<'a> {