add info about deductible fields

This commit is contained in:
Quentin 2023-07-25 14:00:01 +02:00
parent 7b7d9de92d
commit 6e3b12c11a
Signed by: quentin
GPG key ID: E9602264D639FF68
10 changed files with 129 additions and 91 deletions

View file

@ -18,7 +18,7 @@ Content-Type: text/plain; charset=us-ascii
This is the plain text body of the message. Note the blank line
between the header information and the body of the message."#;
let (_, email) = eml_codec::email(input).unwrap();
let (_, email) = eml_codec::parse_message(input).unwrap();
println!(
"{} just sent you an email with subject \"{}\"",
email.imf.from[0].to_string(),

View file

@ -10,7 +10,7 @@ This is the plain text body of the message. Note the blank line
between the header information and the body of the message."#;
// if you are only interested in email metadata/headers
let (_, imf) = eml_codec::imf(input).unwrap();
let (_, imf) = eml_codec::parse_imf(input).unwrap();
println!(
"{} just sent you an email with subject \"{}\"",
imf.from[0].to_string(),
@ -18,7 +18,7 @@ between the header information and the body of the message."#;
);
// if you like to also parse the body/content
let (_, email) = eml_codec::email(input).unwrap();
let (_, email) = eml_codec::parse_message(input).unwrap();
println!(
"{} raw message is:\n{}",
email.imf.from[0].to_string(),

View file

@ -15,7 +15,7 @@ pub mod header;
/// Low-level email-specific text-based representation for data
pub mod text;
use nom::IResult;
use nom::{IResult, combinator::into};
/// Parse a whole email including its (MIME) body
///
@ -46,15 +46,15 @@ use nom::IResult;
/// This is the plain text body of the message. Note the blank line
/// between the header information and the body of the message."#;
///
/// let (_, email) = eml_codec::email(input).unwrap();
/// let (_, email) = eml_codec::parse_message(input).unwrap();
/// println!(
/// "{} raw message is:\n{}",
/// email.imf.from[0].to_string(),
/// String::from_utf8_lossy(email.child.as_text().unwrap().body),
/// );
/// ```
pub fn email(input: &[u8]) -> IResult<&[u8], part::composite::Message> {
part::composite::message(mime::MIME::<mime::r#type::Message>::default())(input)
pub fn parse_message(input: &[u8]) -> IResult<&[u8], part::composite::Message> {
into(part::composite::message(mime::MIME::<mime::r#type::DeductibleMessage>::default()))(input)
}
/// Only extract the headers of the email that are part of the Internet Message Format spec
@ -87,13 +87,13 @@ pub fn email(input: &[u8]) -> IResult<&[u8], part::composite::Message> {
/// This is the plain text body of the message. Note the blank line
/// between the header information and the body of the message."#;
///
/// let (_, imf) = eml_codec::imf(input).unwrap();
/// let (_, imf) = eml_codec::parse_imf(input).unwrap();
/// println!(
/// "{} just sent you an email with subject \"{}\"",
/// imf.from[0].to_string(),
/// imf.subject.unwrap().to_string(),
/// );
/// ```
pub fn imf(input: &[u8]) -> IResult<&[u8], imf::Imf> {
pub fn parse_imf(input: &[u8]) -> IResult<&[u8], imf::Imf> {
imf::field::imf(input)
}

View file

@ -88,10 +88,10 @@ mod tests {
if let Content::Type(nt) = content {
assert_eq!(
nt.to_type(),
AnyType::Text(Text {
charset: EmailCharset::UTF_8,
AnyType::Text(Deductible::Explicit(Text {
charset: Deductible::Explicit(EmailCharset::UTF_8),
subtype: TextSubtype::Plain,
}),
})),
);
} else {
panic!("Expected Content::Type, got {:?}", content);

View file

@ -21,22 +21,22 @@ use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
#[derive(Debug, PartialEq, Clone)]
pub struct MIME<'a, T> {
pub interpreted: T,
pub parsed: NaiveMIME<'a>
pub interpreted_type: T,
pub fields: NaiveMIME<'a>
}
impl<'a> Default for MIME<'a, r#type::Text> {
impl<'a> Default for MIME<'a, r#type::DeductibleText> {
fn default() -> Self {
Self {
interpreted: r#type::Text::default(),
parsed: NaiveMIME::default(),
interpreted_type: r#type::DeductibleText::default(),
fields: NaiveMIME::default(),
}
}
}
impl<'a> Default for MIME<'a, r#type::Message> {
impl<'a> Default for MIME<'a, r#type::DeductibleMessage> {
fn default() -> Self {
Self {
interpreted: r#type::Message::default(),
parsed: NaiveMIME::default(),
interpreted_type: r#type::DeductibleMessage::default(),
fields: NaiveMIME::default(),
}
}
}
@ -44,8 +44,8 @@ impl<'a> Default for MIME<'a, r#type::Message> {
#[derive(Debug, PartialEq, Clone)]
pub enum AnyMIME<'a> {
Mult(MIME<'a, r#type::Multipart>),
Msg(MIME<'a, r#type::Message>),
Txt(MIME<'a, r#type::Text>),
Msg(MIME<'a, r#type::DeductibleMessage>),
Txt(MIME<'a, r#type::DeductibleText>),
Bin(MIME<'a, r#type::Binary>),
}
@ -103,13 +103,13 @@ pub trait WithDefaultType {
pub struct WithGenericDefault {}
impl WithDefaultType for WithGenericDefault {
fn default_type() -> AnyType {
AnyType::Text(r#type::Text::default())
AnyType::Text(r#type::DeductibleText::default())
}
}
pub struct WithDigestDefault {}
impl WithDefaultType for WithDigestDefault {
fn default_type() -> AnyType {
AnyType::Message(r#type::Message::default())
AnyType::Message(r#type::DeductibleMessage::default())
}
}

View file

@ -51,10 +51,10 @@ pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
pub enum AnyType {
// Composite types
Multipart(Multipart),
Message(Message),
Message(Deductible<Message>),
// Discrete types
Text(Text),
Text(Deductible<Text>),
Binary(Binary),
}
@ -63,25 +63,39 @@ impl<'a> From<&'a NaiveType<'a>> for AnyType {
match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => Multipart::try_from(nt)
.map(Self::Multipart)
.unwrap_or(Self::Text(Text::default())),
b"message" => Self::Message(Message::from(nt)),
b"text" => Self::Text(Text::from(nt)),
.unwrap_or(Self::Text(DeductibleText::default())),
b"message" => Self::Message(DeductibleMessage::Explicit(Message::from(nt))),
b"text" => Self::Text(DeductibleText::Explicit(Text::from(nt))),
_ => Self::Binary(Binary::default()),
}
}
}
impl<'a> AnyType {
pub fn to_mime(self, parsed: NaiveMIME<'a>) -> AnyMIME<'a> {
pub fn to_mime(self, fields: NaiveMIME<'a>) -> AnyMIME<'a> {
match self {
Self::Multipart(interpreted) => AnyMIME::Mult(MIME::<Multipart> { interpreted, parsed }),
Self::Message(interpreted) => AnyMIME::Msg(MIME::<Message> { interpreted, parsed }),
Self::Text(interpreted) => AnyMIME::Txt(MIME::<Text> { interpreted, parsed }),
Self::Binary(interpreted) => AnyMIME::Bin(MIME::<Binary> { interpreted, parsed }),
Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME::<Multipart> { interpreted_type, fields }),
Self::Message(interpreted_type) => AnyMIME::Msg(MIME::<DeductibleMessage> { interpreted_type, fields }),
Self::Text(interpreted_type) => AnyMIME::Txt(MIME::<DeductibleText> { interpreted_type, fields }),
Self::Binary(interpreted_type) => AnyMIME::Bin(MIME::<Binary> { interpreted_type, fields }),
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Deductible<T: Default> {
Inferred(T),
Explicit(T),
}
impl<T: Default> Default for Deductible<T> {
fn default() -> Self {
Self::Inferred(T::default())
}
}
// REAL PARTS
#[derive(Debug, PartialEq, Clone)]
pub struct Multipart {
pub subtype: MultipartSubtype,
@ -124,29 +138,45 @@ impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
}
}
#[derive(Debug, PartialEq, Default, Clone)]
pub enum Message {
pub enum MessageSubtype {
#[default]
RFC822,
Partial,
External,
Unknown,
}
pub type DeductibleMessage = Deductible<Message>;
#[derive(Debug, PartialEq, Default, Clone)]
pub struct Message {
pub subtype: MessageSubtype,
}
impl<'a> From<&NaiveType<'a>> for Message {
fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() {
b"rfc822" => Self::RFC822,
b"partial" => Self::Partial,
b"external" => Self::External,
_ => Self::Unknown,
b"rfc822" => Self { subtype: MessageSubtype::RFC822 },
b"partial" => Self { subtype: MessageSubtype::Partial },
b"external" => Self { subtype: MessageSubtype::External },
_ => Self { subtype: MessageSubtype::Unknown },
}
}
}
impl From<Deductible<Message>> for Message {
fn from(d: Deductible<Message>) -> Self {
match d {
Deductible::Inferred(t) | Deductible::Explicit(t) => t
}
}
}
pub type DeductibleText = Deductible<Text>;
#[derive(Debug, PartialEq, Default, Clone)]
pub struct Text {
pub subtype: TextSubtype,
pub charset: EmailCharset,
pub charset: Deductible<EmailCharset>,
}
impl<'a> From<&NaiveType<'a>> for Text {
fn from(nt: &NaiveType<'a>) -> Self {
@ -156,8 +186,15 @@ impl<'a> From<&NaiveType<'a>> for Text {
.params
.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
.map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
.unwrap_or(EmailCharset::US_ASCII),
.map(|x| Deductible::Explicit(EmailCharset::from(x.value.to_string().as_bytes())))
.unwrap_or(Deductible::Inferred(EmailCharset::US_ASCII)),
}
}
}
impl From<Deductible<Text>> for Text {
fn from(d: Deductible<Text>) -> Self {
match d {
Deductible::Inferred(t) | Deductible::Explicit(t) => t
}
}
}
@ -187,6 +224,7 @@ mod tests {
use super::*;
use crate::mime::charset::EmailCharset;
use crate::text::quoted::QuotedString;
use crate::mime::r#type::Deductible;
#[test]
fn test_parameter() {
@ -219,10 +257,10 @@ mod tests {
assert_eq!(
nt.to_type(),
AnyType::Text(Text {
charset: EmailCharset::UTF_8,
AnyType::Text(Deductible::Explicit(Text {
charset: Deductible::Explicit(EmailCharset::UTF_8),
subtype: TextSubtype::Plain,
})
}))
);
}
@ -244,7 +282,7 @@ mod tests {
let (rest, nt) = naive_type(b"message/rfc822").unwrap();
assert_eq!(rest, &[]);
assert_eq!(nt.to_type(), AnyType::Message(Message::RFC822),);
assert_eq!(nt.to_type(), AnyType::Message(Deductible::Explicit(Message { subtype: MessageSubtype::RFC822 })));
}
#[test]

View file

@ -8,7 +8,7 @@ fn main() {
let mut rawmail = Vec::new();
io::stdin().lock().read_to_end(&mut rawmail).unwrap();
let (_, eml) = eml_codec::email(&rawmail).unwrap();
let (_, eml) = eml_codec::parse_message(&rawmail).unwrap();
println!("{:#?}", eml);
assert!(eml.imf.date.is_some());
assert!(!eml.imf.from.is_empty());

View file

@ -9,7 +9,7 @@ use crate::text::boundary::{boundary, Delimiter};
//--- Multipart
#[derive(Debug, PartialEq)]
pub struct Multipart<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Multipart>,
pub mime: mime::MIME<'a, mime::r#type::Multipart>,
pub children: Vec<AnyPart<'a>>,
pub preamble: &'a [u8],
pub epilogue: &'a [u8],
@ -27,7 +27,7 @@ pub fn multipart<'a>(
let m = m.clone();
move |input| {
let bound = m.interpreted.boundary.as_bytes();
let bound = m.interpreted_type.boundary.as_bytes();
let (mut input_loop, preamble) = part::part_raw(bound)(input)?;
let mut mparts: Vec<AnyPart> = vec![];
loop {
@ -36,7 +36,7 @@ pub fn multipart<'a>(
return Ok((
input_loop,
Multipart {
interpreted: m.clone(),
mime: m.clone(),
children: mparts,
preamble,
epilogue: &[],
@ -47,7 +47,7 @@ pub fn multipart<'a>(
return Ok((
inp,
Multipart {
interpreted: m.clone(),
mime: m.clone(),
children: mparts,
preamble,
epilogue: &[],
@ -64,7 +64,7 @@ pub fn multipart<'a>(
};
// interpret mime according to context
let mime = match m.interpreted.subtype {
let mime = match m.interpreted_type.subtype {
mime::r#type::MultipartSubtype::Digest => naive_mime.to_interpreted::<mime::WithDigestDefault>().into(),
_ => naive_mime.to_interpreted::<mime::WithGenericDefault>().into(),
};
@ -85,7 +85,7 @@ pub fn multipart<'a>(
#[derive(Debug, PartialEq)]
pub struct Message<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Message>,
pub mime: mime::MIME<'a, mime::r#type::DeductibleMessage>,
pub imf: imf::Imf<'a>,
pub child: Box<AnyPart<'a>>,
pub epilogue: &'a [u8],
@ -98,7 +98,7 @@ impl<'a> Message<'a> {
}
pub fn message<'a>(
m: mime::MIME<'a, mime::r#type::Message>,
m: mime::MIME<'a, mime::r#type::DeductibleMessage>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
move |input: &[u8]| {
// parse header fields
@ -120,7 +120,7 @@ pub fn message<'a>(
Ok((
&[],
Message {
interpreted: m.clone(),
mime: m.clone(),
imf,
child: Box::new(part),
epilogue: &[],
@ -142,11 +142,11 @@ mod tests {
#[test]
fn test_multipart() {
let base_mime = mime::MIME {
interpreted: mime::r#type::Multipart {
interpreted_type: mime::r#type::Multipart {
subtype: mime::r#type::MultipartSubtype::Alternative,
boundary: "simple boundary".to_string(),
},
parsed: mime::NaiveMIME::default(),
fields: mime::NaiveMIME::default(),
};
assert_eq!(
@ -170,27 +170,27 @@ This is the epilogue. It is also to be ignored.
"),
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
Multipart {
interpreted: base_mime,
mime: base_mime,
preamble: &b"This is the preamble. It is to be ignored, though it\nis a handy place for composition agents to include an\nexplanatory note to non-MIME conformant readers.\n"[..],
epilogue: &b""[..],
children: vec![
AnyPart::Txt(Text {
interpreted: mime::MIME {
interpreted: mime::r#type::Text {
mime: mime::MIME {
interpreted_type: mime::r#type::Deductible::Inferred(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain,
charset: mime::charset::EmailCharset::US_ASCII,
},
parsed: mime::NaiveMIME::default(),
charset: mime::r#type::Deductible::Inferred(mime::charset::EmailCharset::US_ASCII),
}),
fields: mime::NaiveMIME::default(),
},
body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
}),
AnyPart::Txt(Text {
interpreted: mime::MIME {
interpreted: mime::r#type::Text {
mime: mime::MIME {
interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain,
charset: mime::charset::EmailCharset::US_ASCII,
},
parsed: mime::NaiveMIME {
charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII),
}),
fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType {
main: &b"text"[..],
sub: &b"plain"[..],
@ -259,13 +259,13 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
"#
.as_bytes();
let base_mime = mime::MIME::<mime::r#type::Message>::default();
let base_mime = mime::MIME::<mime::r#type::DeductibleMessage>::default();
assert_eq!(
message(base_mime.clone())(fullmail),
Ok((
&[][..],
Message {
interpreted: base_mime,
mime: base_mime,
epilogue: &b""[..],
imf: imf::Imf {
date: Some(FixedOffset::east_opt(2 * 3600)
@ -342,12 +342,12 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
..imf::Imf::default()
},
child: Box::new(AnyPart::Mult(Multipart {
interpreted: mime::MIME {
interpreted: mime::r#type::Multipart {
mime: mime::MIME {
interpreted_type: mime::r#type::Multipart {
subtype: mime::r#type::MultipartSubtype::Alternative,
boundary: "b1_e376dc71bafc953c0b0fdeb9983a9956".to_string(),
},
parsed: mime::NaiveMIME {
fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType {
main: &b"multipart"[..],
sub: &b"alternative"[..],
@ -365,12 +365,12 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
epilogue: &b""[..],
children: vec![
AnyPart::Txt(Text {
interpreted: mime::MIME {
interpreted: mime::r#type::Text {
mime: mime::MIME {
interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain,
charset: mime::charset::EmailCharset::UTF_8,
},
parsed: mime::NaiveMIME {
charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::UTF_8),
}),
fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType {
main: &b"text"[..],
sub: &b"plain"[..],
@ -388,13 +388,13 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
body: &b"GZ\nOoOoO\noOoOoOoOo\noOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOoOoOoOo\nOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"[..],
}),
AnyPart::Txt(Text {
interpreted: mime::MIME {
interpreted: mime::r#type::Text {
mime: mime::MIME {
interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Html,
charset: mime::charset::EmailCharset::US_ASCII,
},
charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII),
}),
parsed: mime::NaiveMIME {
fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType {
main: &b"text"[..],
sub: &b"html"[..],

View file

@ -4,14 +4,14 @@ use crate::mime;
#[derive(PartialEq)]
pub struct Text<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Text>,
pub mime: mime::MIME<'a, mime::r#type::DeductibleText>,
pub body: &'a [u8],
}
impl<'a> fmt::Debug for Text<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Text")
.field("mime", &self.interpreted)
.field("mime", &self.mime)
.field(
"body",
&format_args!("\"{}\"", String::from_utf8_lossy(self.body)),
@ -22,14 +22,14 @@ impl<'a> fmt::Debug for Text<'a> {
#[derive(PartialEq)]
pub struct Binary<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Binary>,
pub mime: mime::MIME<'a, mime::r#type::Binary>,
pub body: &'a [u8],
}
impl<'a> fmt::Debug for Binary<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Binary")
.field("mime", &self.interpreted)
.field("mime", &self.mime)
.field(
"body",
&format_args!("\"{}\"", String::from_utf8_lossy(self.body)),

View file

@ -65,21 +65,21 @@ pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> {
AnyMIME::Mult(a) => multipart(a)(rpart)
.map(|(rest, multi)| AnyPart::Mult(multi.with_epilogue(rest)))
.unwrap_or(AnyPart::Txt(Text {
interpreted: mime::MIME::<mime::r#type::Text>::default(),
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: rpart,
})),
AnyMIME::Msg(a) => message(a)(rpart)
.map(|(rest, msg)| AnyPart::Msg(msg.with_epilogue(rest)))
.unwrap_or(AnyPart::Txt(Text {
interpreted: mime::MIME::<mime::r#type::Text>::default(),
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: rpart,
})),
AnyMIME::Txt(a) => AnyPart::Txt(Text {
interpreted: a,
mime: a,
body: rpart,
}),
AnyMIME::Bin(a) => AnyPart::Bin(Binary {
interpreted: a,
mime: a,
body: rpart,
}),
}