From 6e3b12c11a8477bb84bb04fadad4c9cbece5a77a Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Tue, 25 Jul 2023 14:00:01 +0200 Subject: [PATCH] add info about deductible fields --- README.md | 2 +- examples/simple.rs | 4 +-- src/lib.rs | 12 +++---- src/mime/field.rs | 6 ++-- src/mime/mod.rs | 24 ++++++------- src/mime/type.rs | 82 +++++++++++++++++++++++++++++++------------ src/parse.rs | 2 +- src/part/composite.rs | 72 ++++++++++++++++++------------------- src/part/discrete.rs | 8 ++--- src/part/mod.rs | 8 ++--- 10 files changed, 129 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index 0c68e2d..7443fc4 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Content-Type: text/plain; charset=us-ascii This is the plain text body of the message. Note the blank line between the header information and the body of the message."#; -let (_, email) = eml_codec::email(input).unwrap(); +let (_, email) = eml_codec::parse_message(input).unwrap(); println!( "{} just sent you an email with subject \"{}\"", email.imf.from[0].to_string(), diff --git a/examples/simple.rs b/examples/simple.rs index 1918a21..c7ffb61 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -10,7 +10,7 @@ This is the plain text body of the message. Note the blank line between the header information and the body of the message."#; // if you are only interested in email metadata/headers - let (_, imf) = eml_codec::imf(input).unwrap(); + let (_, imf) = eml_codec::parse_imf(input).unwrap(); println!( "{} just sent you an email with subject \"{}\"", imf.from[0].to_string(), @@ -18,7 +18,7 @@ between the header information and the body of the message."#; ); // if you like to also parse the body/content - let (_, email) = eml_codec::email(input).unwrap(); + let (_, email) = eml_codec::parse_message(input).unwrap(); println!( "{} raw message is:\n{}", email.imf.from[0].to_string(), diff --git a/src/lib.rs b/src/lib.rs index ce05109..fa5440a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ pub mod header; /// Low-level email-specific text-based representation for data pub mod text; -use nom::IResult; +use nom::{IResult, combinator::into}; /// Parse a whole email including its (MIME) body /// @@ -46,15 +46,15 @@ use nom::IResult; /// This is the plain text body of the message. Note the blank line /// between the header information and the body of the message."#; /// -/// let (_, email) = eml_codec::email(input).unwrap(); +/// let (_, email) = eml_codec::parse_message(input).unwrap(); /// println!( /// "{} raw message is:\n{}", /// email.imf.from[0].to_string(), /// String::from_utf8_lossy(email.child.as_text().unwrap().body), /// ); /// ``` -pub fn email(input: &[u8]) -> IResult<&[u8], part::composite::Message> { - part::composite::message(mime::MIME::::default())(input) +pub fn parse_message(input: &[u8]) -> IResult<&[u8], part::composite::Message> { + into(part::composite::message(mime::MIME::::default()))(input) } /// Only extract the headers of the email that are part of the Internet Message Format spec @@ -87,13 +87,13 @@ pub fn email(input: &[u8]) -> IResult<&[u8], part::composite::Message> { /// This is the plain text body of the message. Note the blank line /// between the header information and the body of the message."#; /// -/// let (_, imf) = eml_codec::imf(input).unwrap(); +/// let (_, imf) = eml_codec::parse_imf(input).unwrap(); /// println!( /// "{} just sent you an email with subject \"{}\"", /// imf.from[0].to_string(), /// imf.subject.unwrap().to_string(), /// ); /// ``` -pub fn imf(input: &[u8]) -> IResult<&[u8], imf::Imf> { +pub fn parse_imf(input: &[u8]) -> IResult<&[u8], imf::Imf> { imf::field::imf(input) } diff --git a/src/mime/field.rs b/src/mime/field.rs index fa8b23c..b11210d 100644 --- a/src/mime/field.rs +++ b/src/mime/field.rs @@ -88,10 +88,10 @@ mod tests { if let Content::Type(nt) = content { assert_eq!( nt.to_type(), - AnyType::Text(Text { - charset: EmailCharset::UTF_8, + AnyType::Text(Deductible::Explicit(Text { + charset: Deductible::Explicit(EmailCharset::UTF_8), subtype: TextSubtype::Plain, - }), + })), ); } else { panic!("Expected Content::Type, got {:?}", content); diff --git a/src/mime/mod.rs b/src/mime/mod.rs index 3b549d0..5d049c4 100644 --- a/src/mime/mod.rs +++ b/src/mime/mod.rs @@ -21,22 +21,22 @@ use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary}; #[derive(Debug, PartialEq, Clone)] pub struct MIME<'a, T> { - pub interpreted: T, - pub parsed: NaiveMIME<'a> + pub interpreted_type: T, + pub fields: NaiveMIME<'a> } -impl<'a> Default for MIME<'a, r#type::Text> { +impl<'a> Default for MIME<'a, r#type::DeductibleText> { fn default() -> Self { Self { - interpreted: r#type::Text::default(), - parsed: NaiveMIME::default(), + interpreted_type: r#type::DeductibleText::default(), + fields: NaiveMIME::default(), } } } -impl<'a> Default for MIME<'a, r#type::Message> { +impl<'a> Default for MIME<'a, r#type::DeductibleMessage> { fn default() -> Self { Self { - interpreted: r#type::Message::default(), - parsed: NaiveMIME::default(), + interpreted_type: r#type::DeductibleMessage::default(), + fields: NaiveMIME::default(), } } } @@ -44,8 +44,8 @@ impl<'a> Default for MIME<'a, r#type::Message> { #[derive(Debug, PartialEq, Clone)] pub enum AnyMIME<'a> { Mult(MIME<'a, r#type::Multipart>), - Msg(MIME<'a, r#type::Message>), - Txt(MIME<'a, r#type::Text>), + Msg(MIME<'a, r#type::DeductibleMessage>), + Txt(MIME<'a, r#type::DeductibleText>), Bin(MIME<'a, r#type::Binary>), } @@ -103,13 +103,13 @@ pub trait WithDefaultType { pub struct WithGenericDefault {} impl WithDefaultType for WithGenericDefault { fn default_type() -> AnyType { - AnyType::Text(r#type::Text::default()) + AnyType::Text(r#type::DeductibleText::default()) } } pub struct WithDigestDefault {} impl WithDefaultType for WithDigestDefault { fn default_type() -> AnyType { - AnyType::Message(r#type::Message::default()) + AnyType::Message(r#type::DeductibleMessage::default()) } } diff --git a/src/mime/type.rs b/src/mime/type.rs index 5eaf837..082679f 100644 --- a/src/mime/type.rs +++ b/src/mime/type.rs @@ -51,10 +51,10 @@ pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec> { pub enum AnyType { // Composite types Multipart(Multipart), - Message(Message), + Message(Deductible), // Discrete types - Text(Text), + Text(Deductible), Binary(Binary), } @@ -63,25 +63,39 @@ impl<'a> From<&'a NaiveType<'a>> for AnyType { match nt.main.to_ascii_lowercase().as_slice() { b"multipart" => Multipart::try_from(nt) .map(Self::Multipart) - .unwrap_or(Self::Text(Text::default())), - b"message" => Self::Message(Message::from(nt)), - b"text" => Self::Text(Text::from(nt)), + .unwrap_or(Self::Text(DeductibleText::default())), + b"message" => Self::Message(DeductibleMessage::Explicit(Message::from(nt))), + b"text" => Self::Text(DeductibleText::Explicit(Text::from(nt))), _ => Self::Binary(Binary::default()), } } } impl<'a> AnyType { - pub fn to_mime(self, parsed: NaiveMIME<'a>) -> AnyMIME<'a> { + pub fn to_mime(self, fields: NaiveMIME<'a>) -> AnyMIME<'a> { match self { - Self::Multipart(interpreted) => AnyMIME::Mult(MIME:: { interpreted, parsed }), - Self::Message(interpreted) => AnyMIME::Msg(MIME:: { interpreted, parsed }), - Self::Text(interpreted) => AnyMIME::Txt(MIME:: { interpreted, parsed }), - Self::Binary(interpreted) => AnyMIME::Bin(MIME:: { interpreted, parsed }), + Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME:: { interpreted_type, fields }), + Self::Message(interpreted_type) => AnyMIME::Msg(MIME:: { interpreted_type, fields }), + Self::Text(interpreted_type) => AnyMIME::Txt(MIME:: { interpreted_type, fields }), + Self::Binary(interpreted_type) => AnyMIME::Bin(MIME:: { interpreted_type, fields }), } } } + +#[derive(Debug, PartialEq, Clone)] +pub enum Deductible { + Inferred(T), + Explicit(T), +} +impl Default for Deductible { + fn default() -> Self { + Self::Inferred(T::default()) + } +} + +// REAL PARTS + #[derive(Debug, PartialEq, Clone)] pub struct Multipart { pub subtype: MultipartSubtype, @@ -124,29 +138,45 @@ impl<'a> From<&NaiveType<'a>> for MultipartSubtype { } } + + #[derive(Debug, PartialEq, Default, Clone)] -pub enum Message { +pub enum MessageSubtype { #[default] RFC822, Partial, External, Unknown, } + +pub type DeductibleMessage = Deductible; +#[derive(Debug, PartialEq, Default, Clone)] +pub struct Message { + pub subtype: MessageSubtype, +} impl<'a> From<&NaiveType<'a>> for Message { fn from(nt: &NaiveType<'a>) -> Self { match nt.sub.to_ascii_lowercase().as_slice() { - b"rfc822" => Self::RFC822, - b"partial" => Self::Partial, - b"external" => Self::External, - _ => Self::Unknown, + b"rfc822" => Self { subtype: MessageSubtype::RFC822 }, + b"partial" => Self { subtype: MessageSubtype::Partial }, + b"external" => Self { subtype: MessageSubtype::External }, + _ => Self { subtype: MessageSubtype::Unknown }, + } + } +} +impl From> for Message { + fn from(d: Deductible) -> Self { + match d { + Deductible::Inferred(t) | Deductible::Explicit(t) => t } } } +pub type DeductibleText = Deductible; #[derive(Debug, PartialEq, Default, Clone)] pub struct Text { pub subtype: TextSubtype, - pub charset: EmailCharset, + pub charset: Deductible, } impl<'a> From<&NaiveType<'a>> for Text { fn from(nt: &NaiveType<'a>) -> Self { @@ -156,8 +186,15 @@ impl<'a> From<&NaiveType<'a>> for Text { .params .iter() .find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset") - .map(|x| EmailCharset::from(x.value.to_string().as_bytes())) - .unwrap_or(EmailCharset::US_ASCII), + .map(|x| Deductible::Explicit(EmailCharset::from(x.value.to_string().as_bytes()))) + .unwrap_or(Deductible::Inferred(EmailCharset::US_ASCII)), + } + } +} +impl From> for Text { + fn from(d: Deductible) -> Self { + match d { + Deductible::Inferred(t) | Deductible::Explicit(t) => t } } } @@ -187,6 +224,7 @@ mod tests { use super::*; use crate::mime::charset::EmailCharset; use crate::text::quoted::QuotedString; + use crate::mime::r#type::Deductible; #[test] fn test_parameter() { @@ -219,10 +257,10 @@ mod tests { assert_eq!( nt.to_type(), - AnyType::Text(Text { - charset: EmailCharset::UTF_8, + AnyType::Text(Deductible::Explicit(Text { + charset: Deductible::Explicit(EmailCharset::UTF_8), subtype: TextSubtype::Plain, - }) + })) ); } @@ -244,7 +282,7 @@ mod tests { let (rest, nt) = naive_type(b"message/rfc822").unwrap(); assert_eq!(rest, &[]); - assert_eq!(nt.to_type(), AnyType::Message(Message::RFC822),); + assert_eq!(nt.to_type(), AnyType::Message(Deductible::Explicit(Message { subtype: MessageSubtype::RFC822 }))); } #[test] diff --git a/src/parse.rs b/src/parse.rs index 78756f6..379682d 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -8,7 +8,7 @@ fn main() { let mut rawmail = Vec::new(); io::stdin().lock().read_to_end(&mut rawmail).unwrap(); - let (_, eml) = eml_codec::email(&rawmail).unwrap(); + let (_, eml) = eml_codec::parse_message(&rawmail).unwrap(); println!("{:#?}", eml); assert!(eml.imf.date.is_some()); assert!(!eml.imf.from.is_empty()); diff --git a/src/part/composite.rs b/src/part/composite.rs index 75a3c90..c12cfa7 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -9,7 +9,7 @@ use crate::text::boundary::{boundary, Delimiter}; //--- Multipart #[derive(Debug, PartialEq)] pub struct Multipart<'a> { - pub interpreted: mime::MIME<'a, mime::r#type::Multipart>, + pub mime: mime::MIME<'a, mime::r#type::Multipart>, pub children: Vec>, pub preamble: &'a [u8], pub epilogue: &'a [u8], @@ -27,7 +27,7 @@ pub fn multipart<'a>( let m = m.clone(); move |input| { - let bound = m.interpreted.boundary.as_bytes(); + let bound = m.interpreted_type.boundary.as_bytes(); let (mut input_loop, preamble) = part::part_raw(bound)(input)?; let mut mparts: Vec = vec![]; loop { @@ -36,7 +36,7 @@ pub fn multipart<'a>( return Ok(( input_loop, Multipart { - interpreted: m.clone(), + mime: m.clone(), children: mparts, preamble, epilogue: &[], @@ -47,7 +47,7 @@ pub fn multipart<'a>( return Ok(( inp, Multipart { - interpreted: m.clone(), + mime: m.clone(), children: mparts, preamble, epilogue: &[], @@ -64,7 +64,7 @@ pub fn multipart<'a>( }; // interpret mime according to context - let mime = match m.interpreted.subtype { + let mime = match m.interpreted_type.subtype { mime::r#type::MultipartSubtype::Digest => naive_mime.to_interpreted::().into(), _ => naive_mime.to_interpreted::().into(), }; @@ -85,7 +85,7 @@ pub fn multipart<'a>( #[derive(Debug, PartialEq)] pub struct Message<'a> { - pub interpreted: mime::MIME<'a, mime::r#type::Message>, + pub mime: mime::MIME<'a, mime::r#type::DeductibleMessage>, pub imf: imf::Imf<'a>, pub child: Box>, pub epilogue: &'a [u8], @@ -98,7 +98,7 @@ impl<'a> Message<'a> { } pub fn message<'a>( - m: mime::MIME<'a, mime::r#type::Message>, + m: mime::MIME<'a, mime::r#type::DeductibleMessage>, ) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> { move |input: &[u8]| { // parse header fields @@ -120,7 +120,7 @@ pub fn message<'a>( Ok(( &[], Message { - interpreted: m.clone(), + mime: m.clone(), imf, child: Box::new(part), epilogue: &[], @@ -142,11 +142,11 @@ mod tests { #[test] fn test_multipart() { let base_mime = mime::MIME { - interpreted: mime::r#type::Multipart { + interpreted_type: mime::r#type::Multipart { subtype: mime::r#type::MultipartSubtype::Alternative, boundary: "simple boundary".to_string(), }, - parsed: mime::NaiveMIME::default(), + fields: mime::NaiveMIME::default(), }; assert_eq!( @@ -170,27 +170,27 @@ This is the epilogue. It is also to be ignored. "), Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..], Multipart { - interpreted: base_mime, + mime: base_mime, preamble: &b"This is the preamble. It is to be ignored, though it\nis a handy place for composition agents to include an\nexplanatory note to non-MIME conformant readers.\n"[..], epilogue: &b""[..], children: vec![ AnyPart::Txt(Text { - interpreted: mime::MIME { - interpreted: mime::r#type::Text { + mime: mime::MIME { + interpreted_type: mime::r#type::Deductible::Inferred(mime::r#type::Text { subtype: mime::r#type::TextSubtype::Plain, - charset: mime::charset::EmailCharset::US_ASCII, - }, - parsed: mime::NaiveMIME::default(), + charset: mime::r#type::Deductible::Inferred(mime::charset::EmailCharset::US_ASCII), + }), + fields: mime::NaiveMIME::default(), }, body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..], }), AnyPart::Txt(Text { - interpreted: mime::MIME { - interpreted: mime::r#type::Text { + mime: mime::MIME { + interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text { subtype: mime::r#type::TextSubtype::Plain, - charset: mime::charset::EmailCharset::US_ASCII, - }, - parsed: mime::NaiveMIME { + charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII), + }), + fields: mime::NaiveMIME { ctype: Some(mime::r#type::NaiveType { main: &b"text"[..], sub: &b"plain"[..], @@ -259,13 +259,13 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
"# .as_bytes(); - let base_mime = mime::MIME::::default(); + let base_mime = mime::MIME::::default(); assert_eq!( message(base_mime.clone())(fullmail), Ok(( &[][..], Message { - interpreted: base_mime, + mime: base_mime, epilogue: &b""[..], imf: imf::Imf { date: Some(FixedOffset::east_opt(2 * 3600) @@ -342,12 +342,12 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
..imf::Imf::default() }, child: Box::new(AnyPart::Mult(Multipart { - interpreted: mime::MIME { - interpreted: mime::r#type::Multipart { + mime: mime::MIME { + interpreted_type: mime::r#type::Multipart { subtype: mime::r#type::MultipartSubtype::Alternative, boundary: "b1_e376dc71bafc953c0b0fdeb9983a9956".to_string(), }, - parsed: mime::NaiveMIME { + fields: mime::NaiveMIME { ctype: Some(mime::r#type::NaiveType { main: &b"multipart"[..], sub: &b"alternative"[..], @@ -365,12 +365,12 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
epilogue: &b""[..], children: vec![ AnyPart::Txt(Text { - interpreted: mime::MIME { - interpreted: mime::r#type::Text { + mime: mime::MIME { + interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text { subtype: mime::r#type::TextSubtype::Plain, - charset: mime::charset::EmailCharset::UTF_8, - }, - parsed: mime::NaiveMIME { + charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::UTF_8), + }), + fields: mime::NaiveMIME { ctype: Some(mime::r#type::NaiveType { main: &b"text"[..], sub: &b"plain"[..], @@ -388,13 +388,13 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
body: &b"GZ\nOoOoO\noOoOoOoOo\noOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOoOoOoOo\nOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"[..], }), AnyPart::Txt(Text { - interpreted: mime::MIME { - interpreted: mime::r#type::Text { + mime: mime::MIME { + interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text { subtype: mime::r#type::TextSubtype::Html, - charset: mime::charset::EmailCharset::US_ASCII, - }, + charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII), + }), - parsed: mime::NaiveMIME { + fields: mime::NaiveMIME { ctype: Some(mime::r#type::NaiveType { main: &b"text"[..], sub: &b"html"[..], diff --git a/src/part/discrete.rs b/src/part/discrete.rs index cdaa5f9..f92e032 100644 --- a/src/part/discrete.rs +++ b/src/part/discrete.rs @@ -4,14 +4,14 @@ use crate::mime; #[derive(PartialEq)] pub struct Text<'a> { - pub interpreted: mime::MIME<'a, mime::r#type::Text>, + pub mime: mime::MIME<'a, mime::r#type::DeductibleText>, pub body: &'a [u8], } impl<'a> fmt::Debug for Text<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("part::Text") - .field("mime", &self.interpreted) + .field("mime", &self.mime) .field( "body", &format_args!("\"{}\"", String::from_utf8_lossy(self.body)), @@ -22,14 +22,14 @@ impl<'a> fmt::Debug for Text<'a> { #[derive(PartialEq)] pub struct Binary<'a> { - pub interpreted: mime::MIME<'a, mime::r#type::Binary>, + pub mime: mime::MIME<'a, mime::r#type::Binary>, pub body: &'a [u8], } impl<'a> fmt::Debug for Binary<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("part::Binary") - .field("mime", &self.interpreted) + .field("mime", &self.mime) .field( "body", &format_args!("\"{}\"", String::from_utf8_lossy(self.body)), diff --git a/src/part/mod.rs b/src/part/mod.rs index f692b81..543cf2d 100644 --- a/src/part/mod.rs +++ b/src/part/mod.rs @@ -65,21 +65,21 @@ pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> { AnyMIME::Mult(a) => multipart(a)(rpart) .map(|(rest, multi)| AnyPart::Mult(multi.with_epilogue(rest))) .unwrap_or(AnyPart::Txt(Text { - interpreted: mime::MIME::::default(), + mime: mime::MIME::::default(), body: rpart, })), AnyMIME::Msg(a) => message(a)(rpart) .map(|(rest, msg)| AnyPart::Msg(msg.with_epilogue(rest))) .unwrap_or(AnyPart::Txt(Text { - interpreted: mime::MIME::::default(), + mime: mime::MIME::::default(), body: rpart, })), AnyMIME::Txt(a) => AnyPart::Txt(Text { - interpreted: a, + mime: a, body: rpart, }), AnyMIME::Bin(a) => AnyPart::Bin(Binary { - interpreted: a, + mime: a, body: rpart, }), }