Compare commits

..

26 commits
0.1.0 ... main

Author SHA1 Message Date
a7bd3c475a
fix mime() method 2023-09-18 18:08:07 +02:00
df0c6fa34f Merge pull request 'expose naive fields' (#27) from fetch-mime-from-anypart into main
Reviewed-on: #27
2023-09-18 16:07:16 +00:00
7920955ee5 Merge branch 'main' into fetch-mime-from-anypart 2023-09-18 16:07:07 +00:00
0a7179a17c
expose naive fields 2023-09-18 18:06:37 +02:00
303fdc3e91 Merge pull request 'attach to child' (#26) from header2 into main
Reviewed-on: #26
2023-09-18 15:25:55 +00:00
f5f8b8e018
attach to child 2023-09-18 17:24:58 +02:00
8b17af73fa Merge pull request 'always attach headers to naivemime' (#25) from headers_kv2 into main
Reviewed-on: #25
2023-09-18 14:38:47 +00:00
9eb44b03f7
always attach headers to naivemime 2023-09-18 16:38:04 +02:00
5ccc212d15 Merge pull request 'Access headers as key/values' (#24) from headers_map into main
Reviewed-on: #24
2023-08-30 17:50:25 +00:00
d9285c9ddf
format code 2023-08-30 19:49:04 +02:00
2529b0145e
fixed tests! 2023-08-30 19:48:23 +02:00
9b828ad6ad
better debug 2023-08-30 19:30:10 +02:00
18bb04340a
refactor headers 2023-08-30 19:00:08 +02:00
d9cf6b225d
fix raw mime test 2/2 2023-08-30 13:31:24 +02:00
628fbc507d
fix raw mime test 1/2 2023-08-30 11:46:23 +02:00
dfb5b9fe0f
refactor imf parsing 2023-08-30 11:35:46 +02:00
ba59b037ef
add an header kv function 2023-08-30 11:35:29 +02:00
5cff5510ac Merge pull request 'add a raw field to mime' (#22) from better_access_to_bytes into main
Reviewed-on: #22
2023-08-16 14:18:51 +00:00
8aa23ac5f2
add a raw field to mime 2023-08-16 16:15:57 +02:00
32ca628358
prepare v0.1.1 release 2023-07-25 18:33:00 +02:00
b64c032bff
add compatibility for \r\r\n 2023-07-25 18:27:19 +02:00
987024430b
collect raw stuff 2023-07-25 16:20:36 +02:00
91fa0d38c3
implement to_string for some types 2023-07-25 14:39:30 +02:00
64407b6bee
Add a to_string for mechanism 2023-07-25 14:06:40 +02:00
6e3b12c11a
add info about deductible fields 2023-07-25 14:00:01 +02:00
7b7d9de92d
improve cargo.toml 2023-07-24 22:14:51 +02:00
21 changed files with 850 additions and 442 deletions

2
Cargo.lock generated
View file

@ -70,7 +70,7 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]] [[package]]
name = "eml-codec" name = "eml-codec"
version = "0.1.0" version = "0.1.2"
dependencies = [ dependencies = [
"base64", "base64",
"chrono", "chrono",

View file

@ -1,9 +1,16 @@
[package] [package]
name = "eml-codec" name = "eml-codec"
version = "0.1.0" version = "0.1.2"
edition = "2021" edition = "2021"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
repository = "https://git.deuxfleurs.fr/Deuxfleurs/eml-codec"
description = "Email enCOder DECoder in Rust. Support Internet Message Format and MIME (RFC 822, 5322, 2045, 2046, 2047, 2048, 2049)."
documentation = "https://docs.rs/eml-codec"
readme = "README.md"
exclude = [
"doc/",
"resources/",
]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib] [lib]
@ -23,3 +30,4 @@ encoding_rs = "0.8"
[dev-dependencies] [dev-dependencies]
walkdir = "2" walkdir = "2"

View file

@ -18,7 +18,7 @@ Content-Type: text/plain; charset=us-ascii
This is the plain text body of the message. Note the blank line This is the plain text body of the message. Note the blank line
between the header information and the body of the message."#; between the header information and the body of the message."#;
let (_, email) = eml_codec::email(input).unwrap(); let (_, email) = eml_codec::parse_message(input).unwrap();
println!( println!(
"{} just sent you an email with subject \"{}\"", "{} just sent you an email with subject \"{}\"",
email.imf.from[0].to_string(), email.imf.from[0].to_string(),

View file

@ -10,7 +10,7 @@ This is the plain text body of the message. Note the blank line
between the header information and the body of the message."#; between the header information and the body of the message."#;
// if you are only interested in email metadata/headers // if you are only interested in email metadata/headers
let (_, imf) = eml_codec::imf(input).unwrap(); let (_, imf) = eml_codec::parse_imf(input).unwrap();
println!( println!(
"{} just sent you an email with subject \"{}\"", "{} just sent you an email with subject \"{}\"",
imf.from[0].to_string(), imf.from[0].to_string(),
@ -18,7 +18,7 @@ between the header information and the body of the message."#;
); );
// if you like to also parse the body/content // if you like to also parse the body/content
let (_, email) = eml_codec::email(input).unwrap(); let (_, email) = eml_codec::parse_message(input).unwrap();
println!( println!(
"{} raw message is:\n{}", "{} raw message is:\n{}",
email.imf.from[0].to_string(), email.imf.from[0].to_string(),

View file

@ -1,55 +1,62 @@
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::{foldable_line, obs_crlf};
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{tag, tag_no_case, take_while1}, bytes::complete::{tag, take_while1},
character::complete::space0, character::complete::space0,
combinator::map, combinator::{into, recognize},
multi::{fold_many0}, multi::many0,
sequence::{pair, terminated, tuple}, sequence::{pair, terminated, tuple},
IResult, IResult,
}; };
use std::fmt;
#[derive(Debug, PartialEq)] use crate::text::misc_token::unstructured;
pub enum CompField<'a, T> { use crate::text::whitespace::{foldable_line, obs_crlf};
Known(T),
Unknown(Kv<'a>), #[derive(PartialEq, Clone)]
Bad(&'a [u8]), pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]);
impl<'a> From<(&'a [u8], &'a [u8])> for Kv2<'a> {
fn from(pair: (&'a [u8], &'a [u8])) -> Self {
Self(pair.0, pair.1)
}
} }
impl<'a> fmt::Debug for Kv2<'a> {
#[derive(Debug, PartialEq, Clone)] fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
pub struct Kv<'a>(pub &'a [u8], pub Unstructured<'a>); fmt.debug_tuple("header::Kv2")
.field(&String::from_utf8_lossy(self.0))
.field(&String::from_utf8_lossy(self.1))
pub fn header<'a, T>( .finish()
fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], (Vec::<T>, Vec::<Kv>, Vec<&'a [u8]>)> {
move |input| {
terminated(
fold_many0(
alt((
map(fx, CompField::Known),
map(opt_field, |(k, v)| CompField::Unknown(Kv(k, v))),
map(foldable_line, CompField::Bad),
)),
|| (Vec::<T>::new(), Vec::<Kv>::new(), Vec::<&'a [u8]>::new()),
|(mut known, mut unknown, mut bad), item| {
match item {
CompField::Known(v) => known.push(v),
CompField::Unknown(v) => unknown.push(v),
CompField::Bad(v) => bad.push(v),
};
(known, unknown, bad)
}
),
obs_crlf,
)(input)
} }
} }
pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { #[derive(Debug, PartialEq, Clone)]
move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input) pub enum Field<'a> {
Good(Kv2<'a>),
Bad(&'a [u8]),
}
impl<'a> From<Kv2<'a>> for Field<'a> {
fn from(kv: Kv2<'a>) -> Self {
Self::Good(kv)
}
}
impl<'a> From<&'a [u8]> for Field<'a> {
fn from(bad: &'a [u8]) -> Self {
Self::Bad(bad)
}
}
/// Parse headers as key/values
pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec<Field>> {
terminated(
many0(alt((into(correct_field), into(foldable_line)))),
obs_crlf,
)(input)
}
pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(
take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A),
tuple((space0, tag(b":"), space0)),
)(input)
} }
/// Optional field /// Optional field
@ -61,15 +68,6 @@ pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [
/// %d59-126 ; characters not including /// %d59-126 ; characters not including
/// ; ":". /// ; ":".
/// ``` /// ```
pub fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> { pub fn correct_field(input: &[u8]) -> IResult<&[u8], Kv2> {
terminated( terminated(into(pair(field_any, recognize(unstructured))), obs_crlf)(input)
pair(
terminated(
take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A),
tuple((space0, tag(b":"), space0)),
),
unstructured,
),
obs_crlf,
)(input)
} }

View file

@ -1,21 +1,14 @@
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
use nom::{ use nom::combinator::map;
branch::alt,
combinator::map,
sequence::{preceded, terminated},
IResult,
};
use crate::header::{field_name, header}; use crate::header;
use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList}; use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList};
use crate::imf::datetime::section as date; use crate::imf::datetime::section as date;
use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList}; use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList};
use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef}; use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
use crate::imf::mime::{version, Version}; use crate::imf::mime::{version, Version};
use crate::imf::trace::{received_log, return_path, ReceivedLog}; use crate::imf::trace::{received_log, return_path, ReceivedLog};
use crate::imf::Imf;
use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured}; use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Field<'a> { pub enum Field<'a> {
@ -49,94 +42,34 @@ pub enum Field<'a> {
MIMEVersion(Version), MIMEVersion(Version),
} }
impl<'a> TryFrom<&header::Field<'a>> for Field<'a> {
pub fn field(input: &[u8]) -> IResult<&[u8], Field> { type Error = ();
terminated( fn try_from(f: &header::Field<'a>) -> Result<Self, Self::Error> {
alt(( let content = match f {
preceded(field_name(b"date"), map(date, Field::Date)), header::Field::Good(header::Kv2(key, value)) => {
preceded(field_name(b"from"), map(mailbox_list, Field::From)), match key.to_ascii_lowercase().as_slice() {
preceded(field_name(b"sender"), map(mailbox, Field::Sender)), b"date" => map(date, Field::Date)(value),
preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)), b"from" => map(mailbox_list, Field::From)(value),
preceded(field_name(b"to"), map(address_list, Field::To)), b"sender" => map(mailbox, Field::Sender)(value),
preceded(field_name(b"cc"), map(address_list, Field::Cc)), b"reply-to" => map(address_list, Field::ReplyTo)(value),
preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)), b"to" => map(address_list, Field::To)(value),
preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)), b"cc" => map(address_list, Field::Cc)(value),
preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)), b"bcc" => map(nullable_address_list, Field::Bcc)(value),
preceded(field_name(b"references"), map(msg_list, Field::References)), b"message-id" => map(msg_id, Field::MessageID)(value),
preceded(field_name(b"subject"), map(unstructured, Field::Subject)), b"in-reply-to" => map(msg_list, Field::InReplyTo)(value),
preceded(field_name(b"comments"), map(unstructured, Field::Comments)), b"references" => map(msg_list, Field::References)(value),
preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)), b"subject" => map(unstructured, Field::Subject)(value),
preceded( b"comments" => map(unstructured, Field::Comments)(value),
field_name(b"return-path"), b"keywords" => map(phrase_list, Field::Keywords)(value),
map(return_path, Field::ReturnPath), b"return-path" => map(return_path, Field::ReturnPath)(value),
), b"received" => map(received_log, Field::Received)(value),
preceded(field_name(b"received"), map(received_log, Field::Received)), b"mime-version" => map(version, Field::MIMEVersion)(value),
preceded( _ => return Err(()),
field_name(b"mime-version"),
map(version, Field::MIMEVersion),
),
)),
obs_crlf,
)(input)
}
pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> {
map(header(field), |(known, unknown, bad)| {
let mut imf = Imf::from_iter(known);
imf.header_ext = unknown;
imf.header_bad = bad;
imf
})(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::imf::address::*;
use crate::imf::mailbox::*;
use crate::text::misc_token::*;
use chrono::{FixedOffset, TimeZone};
#[test]
fn test_header() {
let fullmail = b"Date: 7 Mar 2023 08:00:00 +0200
From: someone@example.com
To: someone_else@example.com
Subject: An RFC 822 formatted message
This is the plain text body of the message. Note the blank line
between the header information and the body of the message.";
assert_eq!(
imf(fullmail),
Ok((
&b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..],
Imf {
date: Some(FixedOffset::east_opt(2 * 3600).unwrap().with_ymd_and_hms(2023, 3, 7, 8, 0, 0).unwrap()),
from: vec![MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
}],
to: vec![AddressRef::Single(MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone_else"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
})],
subject: Some(Unstructured(vec![
UnstrToken::Plain(&b"An"[..]),
UnstrToken::Plain(&b"RFC"[..]),
UnstrToken::Plain(&b"822"[..]),
UnstrToken::Plain(&b"formatted"[..]),
UnstrToken::Plain(&b"message"[..]),
])),
..Imf::default()
} }
)), }
) _ => return Err(()),
};
content.map(|(_, content)| content).or(Err(()))
} }
} }

View file

@ -1,5 +1,4 @@
/// Parse and represent IMF (Internet Message Format) headers (RFC822, RFC5322) /// Parse and represent IMF (Internet Message Format) headers (RFC822, RFC5322)
pub mod address; pub mod address;
pub mod datetime; pub mod datetime;
pub mod field; pub mod field;
@ -8,13 +7,15 @@ pub mod mailbox;
pub mod mime; pub mod mime;
pub mod trace; pub mod trace;
use nom::{combinator::map, IResult};
use crate::header;
use crate::imf::address::AddressRef; use crate::imf::address::AddressRef;
use crate::imf::field::Field; use crate::imf::field::Field;
use crate::imf::identification::MessageID; use crate::imf::identification::MessageID;
use crate::imf::mailbox::{AddrSpec, MailboxRef}; use crate::imf::mailbox::{AddrSpec, MailboxRef};
use crate::imf::mime::Version; use crate::imf::mime::Version;
use crate::imf::trace::ReceivedLog; use crate::imf::trace::ReceivedLog;
use crate::header;
use crate::text::misc_token::{PhraseList, Unstructured}; use crate::text::misc_token::{PhraseList, Unstructured};
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
@ -50,19 +51,6 @@ pub struct Imf<'a> {
// MIME // MIME
pub mime_version: Option<Version>, pub mime_version: Option<Version>,
// Junk
pub header_ext: Vec<header::Kv<'a>>,
pub header_bad: Vec<&'a [u8]>,
}
impl<'a> Imf<'a> {
pub fn with_opt(mut self, opt: Vec<header::Kv<'a>>) -> Self {
self.header_ext = opt; self
}
pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self {
self.header_bad = bad; self
}
} }
//@FIXME min and max limits are not enforced, //@FIXME min and max limits are not enforced,
@ -92,3 +80,65 @@ impl<'a> FromIterator<Field<'a>> for Imf<'a> {
}) })
} }
} }
pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> {
map(header::header_kv, |fields| {
fields
.iter()
.flat_map(Field::try_from)
.into_iter()
.collect::<Imf>()
})(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::imf::address::*;
use crate::imf::mailbox::*;
use crate::text::misc_token::*;
use chrono::{FixedOffset, TimeZone};
#[test]
fn test_header() {
let fullmail = b"Date: 7 Mar 2023 08:00:00 +0200
From: someone@example.com
To: someone_else@example.com
Subject: An RFC 822 formatted message
This is the plain text body of the message. Note the blank line
between the header information and the body of the message.";
assert_eq!(
imf(fullmail),
Ok((
&b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..],
Imf {
date: Some(FixedOffset::east_opt(2 * 3600).unwrap().with_ymd_and_hms(2023, 3, 7, 8, 0, 0).unwrap()),
from: vec![MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
}],
to: vec![AddressRef::Single(MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone_else"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
})],
subject: Some(Unstructured(vec![
UnstrToken::Plain(&b"An"[..]),
UnstrToken::Plain(&b"RFC"[..]),
UnstrToken::Plain(&b"822"[..]),
UnstrToken::Plain(&b"formatted"[..]),
UnstrToken::Plain(&b"message"[..]),
])),
..Imf::default()
}
)),
)
}
}

View file

@ -15,7 +15,10 @@ pub mod header;
/// Low-level email-specific text-based representation for data /// Low-level email-specific text-based representation for data
pub mod text; pub mod text;
use nom::IResult; /// Manipulate buffer of bytes
mod pointers;
use nom::{combinator::into, IResult};
/// Parse a whole email including its (MIME) body /// Parse a whole email including its (MIME) body
/// ///
@ -46,15 +49,17 @@ use nom::IResult;
/// This is the plain text body of the message. Note the blank line /// This is the plain text body of the message. Note the blank line
/// between the header information and the body of the message."#; /// between the header information and the body of the message."#;
/// ///
/// let (_, email) = eml_codec::email(input).unwrap(); /// let (_, email) = eml_codec::parse_message(input).unwrap();
/// println!( /// println!(
/// "{} raw message is:\n{}", /// "{} raw message is:\n{}",
/// email.imf.from[0].to_string(), /// email.imf.from[0].to_string(),
/// String::from_utf8_lossy(email.child.as_text().unwrap().body), /// String::from_utf8_lossy(email.child.as_text().unwrap().body),
/// ); /// );
/// ``` /// ```
pub fn email(input: &[u8]) -> IResult<&[u8], part::composite::Message> { pub fn parse_message(input: &[u8]) -> IResult<&[u8], part::composite::Message> {
part::composite::message(mime::MIME::<mime::r#type::Message>::default())(input) into(part::composite::message(mime::MIME::<
mime::r#type::DeductibleMessage,
>::default()))(input)
} }
/// Only extract the headers of the email that are part of the Internet Message Format spec /// Only extract the headers of the email that are part of the Internet Message Format spec
@ -87,13 +92,13 @@ pub fn email(input: &[u8]) -> IResult<&[u8], part::composite::Message> {
/// This is the plain text body of the message. Note the blank line /// This is the plain text body of the message. Note the blank line
/// between the header information and the body of the message."#; /// between the header information and the body of the message."#;
/// ///
/// let (_, imf) = eml_codec::imf(input).unwrap(); /// let (_, imf) = eml_codec::parse_imf(input).unwrap();
/// println!( /// println!(
/// "{} just sent you an email with subject \"{}\"", /// "{} just sent you an email with subject \"{}\"",
/// imf.from[0].to_string(), /// imf.from[0].to_string(),
/// imf.subject.unwrap().to_string(), /// imf.subject.unwrap().to_string(),
/// ); /// );
/// ``` /// ```
pub fn imf(input: &[u8]) -> IResult<&[u8], imf::Imf> { pub fn parse_imf(input: &[u8]) -> IResult<&[u8], imf::Imf> {
imf::field::imf(input) imf::imf(input)
} }

View file

@ -77,6 +77,12 @@ impl<'a> From<&'a [u8]> for EmailCharset {
} }
} }
impl ToString for EmailCharset {
fn to_string(&self) -> String {
self.as_str().into()
}
}
impl EmailCharset { impl EmailCharset {
pub fn as_str(&self) -> &'static str { pub fn as_str(&self) -> &'static str {
use EmailCharset::*; use EmailCharset::*;

View file

@ -1,16 +1,10 @@
use nom::{ use nom::combinator::map;
branch::alt,
combinator::map,
sequence::{preceded, terminated},
IResult,
};
use crate::header::{field_name}; use crate::header;
use crate::imf::identification::{msg_id, MessageID}; use crate::imf::identification::{msg_id, MessageID};
use crate::mime::mechanism::{mechanism, Mechanism}; use crate::mime::mechanism::{mechanism, Mechanism};
use crate::mime::r#type::{naive_type, NaiveType}; use crate::mime::r#type::{naive_type, NaiveType};
use crate::text::misc_token::{unstructured, Unstructured}; use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Content<'a> { pub enum Content<'a> {
@ -47,38 +41,38 @@ impl<'a> Content<'a> {
} }
} }
/* impl<'a> TryFrom<&header::Field<'a>> for Content<'a> {
pub fn to_mime<'a, T: WithDefaultType>(list: Vec<Content<'a>>) -> AnyMIMEWithDefault<'a, T> { type Error = ();
list.into_iter().collect::<AnyMIMEWithDefault<T>>() fn try_from(f: &header::Field<'a>) -> Result<Self, Self::Error> {
}*/ let content = match f {
header::Field::Good(header::Kv2(key, value)) => match key
.to_ascii_lowercase()
.as_slice()
{
b"content-type" => map(naive_type, Content::Type)(value),
b"content-transfer-encoding" => map(mechanism, Content::TransferEncoding)(value),
b"content-id" => map(msg_id, Content::ID)(value),
b"content-description" => map(unstructured, Content::Description)(value),
_ => return Err(()),
},
_ => return Err(()),
};
pub fn content(input: &[u8]) -> IResult<&[u8], Content> { //@TODO check that the full value is parsed, otherwise maybe log an error ?!
terminated( content.map(|(_, content)| content).or(Err(()))
alt(( }
preceded(field_name(b"content-type"), map(naive_type, Content::Type)),
preceded(
field_name(b"content-transfer-encoding"),
map(mechanism, Content::TransferEncoding),
),
preceded(field_name(b"content-id"), map(msg_id, Content::ID)),
preceded(
field_name(b"content-description"),
map(unstructured, Content::Description),
),
)),
obs_crlf,
)(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::header::{header}; use crate::header;
use crate::mime::charset::EmailCharset; //use crate::mime::charset::EmailCharset;
use crate::mime::r#type::*; use crate::mime::r#type::*;
use crate::text::misc_token::MIMEWord; use crate::text::misc_token::MIMEWord;
use crate::text::quoted::QuotedString; use crate::text::quoted::QuotedString;
/*
#[test] #[test]
fn test_content_type() { fn test_content_type() {
let (rest, content) = let (rest, content) =
@ -88,15 +82,15 @@ mod tests {
if let Content::Type(nt) = content { if let Content::Type(nt) = content {
assert_eq!( assert_eq!(
nt.to_type(), nt.to_type(),
AnyType::Text(Text { AnyType::Text(Deductible::Explicit(Text {
charset: EmailCharset::UTF_8, charset: Deductible::Explicit(EmailCharset::UTF_8),
subtype: TextSubtype::Plain, subtype: TextSubtype::Plain,
}), })),
); );
} else { } else {
panic!("Expected Content::Type, got {:?}", content); panic!("Expected Content::Type, got {:?}", content);
} }
} }*/
#[test] #[test]
fn test_header() { fn test_header() {
@ -116,7 +110,10 @@ This is a multipart message.
.as_bytes(); .as_bytes();
assert_eq!( assert_eq!(
map(header(content), |(k, _, _)| k)(fullmail), map(header::header_kv, |k| k
.iter()
.flat_map(Content::try_from)
.collect())(fullmail),
Ok(( Ok((
&b"This is a multipart message.\n\n"[..], &b"This is a multipart message.\n\n"[..],
vec![ vec![

View file

@ -18,6 +18,20 @@ pub enum Mechanism<'a> {
Base64, Base64,
Other(&'a [u8]), Other(&'a [u8]),
} }
impl<'a> ToString for Mechanism<'a> {
fn to_string(&self) -> String {
use Mechanism::*;
let buf: &[u8] = match self {
_7Bit => b"7bit",
_8Bit => b"8bit",
Binary => b"binary",
QuotedPrintable => b"quoted-printable",
Base64 => b"base64",
Other(x) => x,
};
String::from_utf8_lossy(buf).to_string()
}
}
pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism> { pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism> {
use Mechanism::*; use Mechanism::*;

View file

@ -10,33 +10,34 @@ pub mod mechanism;
/// Content-Type representation /// Content-Type representation
pub mod r#type; pub mod r#type;
use std::fmt;
use std::marker::PhantomData; use std::marker::PhantomData;
use crate::header;
use crate::imf::identification::MessageID; use crate::imf::identification::MessageID;
use crate::mime::field::Content; use crate::mime::field::Content;
use crate::mime::mechanism::Mechanism; use crate::mime::mechanism::Mechanism;
use crate::mime::r#type::{AnyType, NaiveType}; use crate::mime::r#type::{AnyType, NaiveType};
use crate::header;
use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary}; use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct MIME<'a, T> { pub struct MIME<'a, T> {
pub interpreted: T, pub interpreted_type: T,
pub parsed: NaiveMIME<'a> pub fields: NaiveMIME<'a>,
} }
impl<'a> Default for MIME<'a, r#type::Text> { impl<'a> Default for MIME<'a, r#type::DeductibleText> {
fn default() -> Self { fn default() -> Self {
Self { Self {
interpreted: r#type::Text::default(), interpreted_type: r#type::DeductibleText::default(),
parsed: NaiveMIME::default(), fields: NaiveMIME::default(),
} }
} }
} }
impl<'a> Default for MIME<'a, r#type::Message> { impl<'a> Default for MIME<'a, r#type::DeductibleMessage> {
fn default() -> Self { fn default() -> Self {
Self { Self {
interpreted: r#type::Message::default(), interpreted_type: r#type::DeductibleMessage::default(),
parsed: NaiveMIME::default(), fields: NaiveMIME::default(),
} }
} }
} }
@ -44,10 +45,20 @@ impl<'a> Default for MIME<'a, r#type::Message> {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum AnyMIME<'a> { pub enum AnyMIME<'a> {
Mult(MIME<'a, r#type::Multipart>), Mult(MIME<'a, r#type::Multipart>),
Msg(MIME<'a, r#type::Message>), Msg(MIME<'a, r#type::DeductibleMessage>),
Txt(MIME<'a, r#type::Text>), Txt(MIME<'a, r#type::DeductibleText>),
Bin(MIME<'a, r#type::Binary>), Bin(MIME<'a, r#type::Binary>),
} }
impl<'a> AnyMIME<'a> {
pub fn fields(&self) -> &NaiveMIME<'a> {
match self {
Self::Mult(v) => &v.fields,
Self::Msg(v) => &v.fields,
Self::Txt(v) => &v.fields,
Self::Bin(v) => &v.fields,
}
}
}
impl<'a, T: WithDefaultType> From<AnyMIMEWithDefault<'a, T>> for AnyMIME<'a> { impl<'a, T: WithDefaultType> From<AnyMIMEWithDefault<'a, T>> for AnyMIME<'a> {
fn from(a: AnyMIMEWithDefault<'a, T>) -> Self { fn from(a: AnyMIMEWithDefault<'a, T>) -> Self {
@ -55,21 +66,32 @@ impl<'a, T: WithDefaultType> From<AnyMIMEWithDefault<'a, T>> for AnyMIME<'a> {
} }
} }
#[derive(Debug, PartialEq, Default, Clone)] #[derive(PartialEq, Default, Clone)]
pub struct NaiveMIME<'a> { pub struct NaiveMIME<'a> {
pub ctype: Option<NaiveType<'a>>, pub ctype: Option<NaiveType<'a>>,
pub transfer_encoding: Mechanism<'a>, pub transfer_encoding: Mechanism<'a>,
pub id: Option<MessageID<'a>>, pub id: Option<MessageID<'a>>,
pub description: Option<Unstructured<'a>>, pub description: Option<Unstructured<'a>>,
pub header_ext: Vec<header::Kv<'a>>, pub kv: Vec<header::Field<'a>>,
pub header_bad: Vec<&'a [u8]>, pub raw: &'a [u8],
}
impl<'a> fmt::Debug for NaiveMIME<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("NaiveMime")
.field("ctype", &self.ctype)
.field("transfer_encoding", &self.transfer_encoding)
.field("id", &self.id)
.field("description", &self.description)
.field("kv", &self.kv)
.field("raw", &String::from_utf8_lossy(self.raw))
.finish()
}
} }
impl<'a> FromIterator<Content<'a>> for NaiveMIME<'a> { impl<'a> FromIterator<Content<'a>> for NaiveMIME<'a> {
fn from_iter<I: IntoIterator<Item = Content<'a>>>(it: I) -> Self { fn from_iter<I: IntoIterator<Item = Content<'a>>>(it: I) -> Self {
it.into_iter().fold( it.into_iter()
NaiveMIME::default(), .fold(NaiveMIME::default(), |mut section, field| {
|mut section, field| {
match field { match field {
Content::Type(v) => section.ctype = Some(v), Content::Type(v) => section.ctype = Some(v),
Content::TransferEncoding(v) => section.transfer_encoding = v, Content::TransferEncoding(v) => section.transfer_encoding = v,
@ -77,25 +99,29 @@ impl<'a> FromIterator<Content<'a>> for NaiveMIME<'a> {
Content::Description(v) => section.description = Some(v), Content::Description(v) => section.description = Some(v),
}; };
section section
}, })
)
} }
} }
impl<'a> NaiveMIME<'a> { impl<'a> NaiveMIME<'a> {
pub fn with_opt(mut self, opt: Vec<header::Kv<'a>>) -> Self { pub fn with_kv(mut self, fields: Vec<header::Field<'a>>) -> Self {
self.header_ext = opt; self self.kv = fields;
self
} }
pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self { pub fn with_raw(mut self, raw: &'a [u8]) -> Self {
self.header_bad = bad; self self.raw = raw;
self
} }
pub fn to_interpreted<T: WithDefaultType>(self) -> AnyMIME<'a> { pub fn to_interpreted<T: WithDefaultType>(self) -> AnyMIME<'a> {
self.ctype.as_ref().map(|c| c.to_type()).unwrap_or(T::default_type()).to_mime(self).into() self.ctype
.as_ref()
.map(|c| c.to_type())
.unwrap_or(T::default_type())
.to_mime(self)
.into()
} }
} }
pub trait WithDefaultType { pub trait WithDefaultType {
fn default_type() -> AnyType; fn default_type() -> AnyType;
} }
@ -103,13 +129,13 @@ pub trait WithDefaultType {
pub struct WithGenericDefault {} pub struct WithGenericDefault {}
impl WithDefaultType for WithGenericDefault { impl WithDefaultType for WithGenericDefault {
fn default_type() -> AnyType { fn default_type() -> AnyType {
AnyType::Text(r#type::Text::default()) AnyType::Text(r#type::DeductibleText::default())
} }
} }
pub struct WithDigestDefault {} pub struct WithDigestDefault {}
impl WithDefaultType for WithDigestDefault { impl WithDefaultType for WithDigestDefault {
fn default_type() -> AnyType { fn default_type() -> AnyType {
AnyType::Message(r#type::Message::default()) AnyType::Message(r#type::DeductibleMessage::default())
} }
} }

View file

@ -5,19 +5,29 @@ use nom::{
sequence::{preceded, terminated, tuple}, sequence::{preceded, terminated, tuple},
IResult, IResult,
}; };
use std::fmt;
use crate::mime::charset::EmailCharset; use crate::mime::charset::EmailCharset;
use crate::mime::{AnyMIME, NaiveMIME, MIME};
use crate::text::misc_token::{mime_word, MIMEWord}; use crate::text::misc_token::{mime_word, MIMEWord};
use crate::text::words::mime_atom; use crate::text::words::mime_atom;
use crate::mime::{AnyMIME, MIME, NaiveMIME};
// --------- NAIVE TYPE // --------- NAIVE TYPE
#[derive(Debug, PartialEq, Clone)] #[derive(PartialEq, Clone)]
pub struct NaiveType<'a> { pub struct NaiveType<'a> {
pub main: &'a [u8], pub main: &'a [u8],
pub sub: &'a [u8], pub sub: &'a [u8],
pub params: Vec<Parameter<'a>>, pub params: Vec<Parameter<'a>>,
} }
impl<'a> fmt::Debug for NaiveType<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("mime::NaiveType")
.field("main", &String::from_utf8_lossy(self.main))
.field("sub", &String::from_utf8_lossy(self.sub))
.field("params", &self.params)
.finish()
}
}
impl<'a> NaiveType<'a> { impl<'a> NaiveType<'a> {
pub fn to_type(&self) -> AnyType { pub fn to_type(&self) -> AnyType {
self.into() self.into()
@ -30,11 +40,20 @@ pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
)(input) )(input)
} }
#[derive(Debug, PartialEq, Clone)] #[derive(PartialEq, Clone)]
pub struct Parameter<'a> { pub struct Parameter<'a> {
pub name: &'a [u8], pub name: &'a [u8],
pub value: MIMEWord<'a>, pub value: MIMEWord<'a>,
} }
impl<'a> fmt::Debug for Parameter<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("mime::Parameter")
.field("name", &String::from_utf8_lossy(self.name))
.field("value", &self.value)
.finish()
}
}
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> { pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
map( map(
tuple((mime_atom, tag(b"="), mime_word)), tuple((mime_atom, tag(b"="), mime_word)),
@ -51,10 +70,10 @@ pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
pub enum AnyType { pub enum AnyType {
// Composite types // Composite types
Multipart(Multipart), Multipart(Multipart),
Message(Message), Message(Deductible<Message>),
// Discrete types // Discrete types
Text(Text), Text(Deductible<Text>),
Binary(Binary), Binary(Binary),
} }
@ -63,30 +82,60 @@ impl<'a> From<&'a NaiveType<'a>> for AnyType {
match nt.main.to_ascii_lowercase().as_slice() { match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => Multipart::try_from(nt) b"multipart" => Multipart::try_from(nt)
.map(Self::Multipart) .map(Self::Multipart)
.unwrap_or(Self::Text(Text::default())), .unwrap_or(Self::Text(DeductibleText::default())),
b"message" => Self::Message(Message::from(nt)), b"message" => Self::Message(DeductibleMessage::Explicit(Message::from(nt))),
b"text" => Self::Text(Text::from(nt)), b"text" => Self::Text(DeductibleText::Explicit(Text::from(nt))),
_ => Self::Binary(Binary::default()), _ => Self::Binary(Binary::default()),
} }
} }
} }
impl<'a> AnyType { impl<'a> AnyType {
pub fn to_mime(self, parsed: NaiveMIME<'a>) -> AnyMIME<'a> { pub fn to_mime(self, fields: NaiveMIME<'a>) -> AnyMIME<'a> {
match self { match self {
Self::Multipart(interpreted) => AnyMIME::Mult(MIME::<Multipart> { interpreted, parsed }), Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME::<Multipart> {
Self::Message(interpreted) => AnyMIME::Msg(MIME::<Message> { interpreted, parsed }), interpreted_type,
Self::Text(interpreted) => AnyMIME::Txt(MIME::<Text> { interpreted, parsed }), fields,
Self::Binary(interpreted) => AnyMIME::Bin(MIME::<Binary> { interpreted, parsed }), }),
Self::Message(interpreted_type) => AnyMIME::Msg(MIME::<DeductibleMessage> {
interpreted_type,
fields,
}),
Self::Text(interpreted_type) => AnyMIME::Txt(MIME::<DeductibleText> {
interpreted_type,
fields,
}),
Self::Binary(interpreted_type) => AnyMIME::Bin(MIME::<Binary> {
interpreted_type,
fields,
}),
} }
} }
} }
#[derive(Debug, PartialEq, Clone)]
pub enum Deductible<T: Default> {
Inferred(T),
Explicit(T),
}
impl<T: Default> Default for Deductible<T> {
fn default() -> Self {
Self::Inferred(T::default())
}
}
// REAL PARTS
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct Multipart { pub struct Multipart {
pub subtype: MultipartSubtype, pub subtype: MultipartSubtype,
pub boundary: String, pub boundary: String,
} }
impl Multipart {
pub fn main_type(&self) -> String {
"multipart".into()
}
}
impl<'a> TryFrom<&'a NaiveType<'a>> for Multipart { impl<'a> TryFrom<&'a NaiveType<'a>> for Multipart {
type Error = (); type Error = ();
@ -111,6 +160,19 @@ pub enum MultipartSubtype {
Report, Report,
Unknown, Unknown,
} }
impl ToString for MultipartSubtype {
fn to_string(&self) -> String {
match self {
Self::Alternative => "alternative",
Self::Mixed => "mixed",
Self::Digest => "digest",
Self::Parallel => "parallel",
Self::Report => "report",
Self::Unknown => "mixed",
}
.into()
}
}
impl<'a> From<&NaiveType<'a>> for MultipartSubtype { impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() { match nt.sub.to_ascii_lowercase().as_slice() {
@ -125,28 +187,61 @@ impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
} }
#[derive(Debug, PartialEq, Default, Clone)] #[derive(Debug, PartialEq, Default, Clone)]
pub enum Message { pub enum MessageSubtype {
#[default] #[default]
RFC822, RFC822,
Partial, Partial,
External, External,
Unknown, Unknown,
} }
impl ToString for MessageSubtype {
fn to_string(&self) -> String {
match self {
Self::RFC822 => "rfc822",
Self::Partial => "partial",
Self::External => "external",
Self::Unknown => "rfc822",
}
.into()
}
}
pub type DeductibleMessage = Deductible<Message>;
#[derive(Debug, PartialEq, Default, Clone)]
pub struct Message {
pub subtype: MessageSubtype,
}
impl<'a> From<&NaiveType<'a>> for Message { impl<'a> From<&NaiveType<'a>> for Message {
fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() { match nt.sub.to_ascii_lowercase().as_slice() {
b"rfc822" => Self::RFC822, b"rfc822" => Self {
b"partial" => Self::Partial, subtype: MessageSubtype::RFC822,
b"external" => Self::External, },
_ => Self::Unknown, b"partial" => Self {
subtype: MessageSubtype::Partial,
},
b"external" => Self {
subtype: MessageSubtype::External,
},
_ => Self {
subtype: MessageSubtype::Unknown,
},
}
}
}
impl From<Deductible<Message>> for Message {
fn from(d: Deductible<Message>) -> Self {
match d {
Deductible::Inferred(t) | Deductible::Explicit(t) => t,
} }
} }
} }
pub type DeductibleText = Deductible<Text>;
#[derive(Debug, PartialEq, Default, Clone)] #[derive(Debug, PartialEq, Default, Clone)]
pub struct Text { pub struct Text {
pub subtype: TextSubtype, pub subtype: TextSubtype,
pub charset: EmailCharset, pub charset: Deductible<EmailCharset>,
} }
impl<'a> From<&NaiveType<'a>> for Text { impl<'a> From<&NaiveType<'a>> for Text {
fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
@ -156,8 +251,15 @@ impl<'a> From<&NaiveType<'a>> for Text {
.params .params
.iter() .iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset") .find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
.map(|x| EmailCharset::from(x.value.to_string().as_bytes())) .map(|x| Deductible::Explicit(EmailCharset::from(x.value.to_string().as_bytes())))
.unwrap_or(EmailCharset::US_ASCII), .unwrap_or(Deductible::Inferred(EmailCharset::US_ASCII)),
}
}
}
impl From<Deductible<Text>> for Text {
fn from(d: Deductible<Text>) -> Self {
match d {
Deductible::Inferred(t) | Deductible::Explicit(t) => t,
} }
} }
} }
@ -169,6 +271,15 @@ pub enum TextSubtype {
Html, Html,
Unknown, Unknown,
} }
impl ToString for TextSubtype {
fn to_string(&self) -> String {
match self {
Self::Plain | Self::Unknown => "plain",
Self::Html => "html",
}
.into()
}
}
impl<'a> From<&NaiveType<'a>> for TextSubtype { impl<'a> From<&NaiveType<'a>> for TextSubtype {
fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() { match nt.sub.to_ascii_lowercase().as_slice() {
@ -186,6 +297,7 @@ pub struct Binary {}
mod tests { mod tests {
use super::*; use super::*;
use crate::mime::charset::EmailCharset; use crate::mime::charset::EmailCharset;
use crate::mime::r#type::Deductible;
use crate::text::quoted::QuotedString; use crate::text::quoted::QuotedString;
#[test] #[test]
@ -219,10 +331,10 @@ mod tests {
assert_eq!( assert_eq!(
nt.to_type(), nt.to_type(),
AnyType::Text(Text { AnyType::Text(Deductible::Explicit(Text {
charset: EmailCharset::UTF_8, charset: Deductible::Explicit(EmailCharset::UTF_8),
subtype: TextSubtype::Plain, subtype: TextSubtype::Plain,
}) }))
); );
} }
@ -244,7 +356,12 @@ mod tests {
let (rest, nt) = naive_type(b"message/rfc822").unwrap(); let (rest, nt) = naive_type(b"message/rfc822").unwrap();
assert_eq!(rest, &[]); assert_eq!(rest, &[]);
assert_eq!(nt.to_type(), AnyType::Message(Message::RFC822),); assert_eq!(
nt.to_type(),
AnyType::Message(Deductible::Explicit(Message {
subtype: MessageSubtype::RFC822
}))
);
} }
#[test] #[test]

View file

@ -8,7 +8,7 @@ fn main() {
let mut rawmail = Vec::new(); let mut rawmail = Vec::new();
io::stdin().lock().read_to_end(&mut rawmail).unwrap(); io::stdin().lock().read_to_end(&mut rawmail).unwrap();
let (_, eml) = eml_codec::email(&rawmail).unwrap(); let (_, eml) = eml_codec::parse_message(&rawmail).unwrap();
println!("{:#?}", eml); println!("{:#?}", eml);
assert!(eml.imf.date.is_some()); assert!(eml.imf.date.is_some());
assert!(!eml.imf.from.is_empty()); assert!(!eml.imf.from.is_empty());

View file

@ -1,23 +1,49 @@
use nom::IResult; use nom::IResult;
use std::fmt;
use crate::header::{header, self}; use crate::header;
use crate::imf; use crate::imf;
use crate::mime; use crate::mime;
use crate::part::{self, AnyPart, field::MixedField}; use crate::part::{self, AnyPart};
use crate::pointers;
use crate::text::boundary::{boundary, Delimiter}; use crate::text::boundary::{boundary, Delimiter};
//--- Multipart //--- Multipart
#[derive(Debug, PartialEq)] #[derive(PartialEq)]
pub struct Multipart<'a> { pub struct Multipart<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Multipart>, pub mime: mime::MIME<'a, mime::r#type::Multipart>,
pub children: Vec<AnyPart<'a>>, pub children: Vec<AnyPart<'a>>,
pub preamble: &'a [u8], pub raw_part_inner: &'a [u8],
pub epilogue: &'a [u8], pub raw_part_outer: &'a [u8],
}
impl<'a> fmt::Debug for Multipart<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Multipart")
.field("mime", &self.mime)
.field("children", &self.children)
.field(
"raw_part_inner",
&String::from_utf8_lossy(self.raw_part_inner),
)
.field(
"raw_part_outer",
&String::from_utf8_lossy(self.raw_part_outer),
)
.finish()
}
} }
impl<'a> Multipart<'a> { impl<'a> Multipart<'a> {
pub fn with_epilogue(mut self, e: &'a [u8]) -> Self { pub fn preamble(&self) -> &'a [u8] {
self.epilogue = e; pointers::parsed(self.raw_part_outer, self.raw_part_inner)
self }
pub fn epilogue(&self) -> &'a [u8] {
pointers::rest(self.raw_part_outer, self.raw_part_inner)
}
pub fn preamble_and_body(&self) -> &'a [u8] {
pointers::with_preamble(self.raw_part_outer, self.raw_part_inner)
}
pub fn body_and_epilogue(&self) -> &'a [u8] {
pointers::with_epilogue(self.raw_part_outer, self.raw_part_inner)
} }
} }
@ -27,19 +53,25 @@ pub fn multipart<'a>(
let m = m.clone(); let m = m.clone();
move |input| { move |input| {
let bound = m.interpreted.boundary.as_bytes(); // init
let (mut input_loop, preamble) = part::part_raw(bound)(input)?; let outer_orig = input;
let bound = m.interpreted_type.boundary.as_bytes();
let mut mparts: Vec<AnyPart> = vec![]; let mut mparts: Vec<AnyPart> = vec![];
// skip preamble
let (mut input_loop, _) = part::part_raw(bound)(input)?;
let inner_orig = input_loop;
loop { loop {
let input = match boundary(bound)(input_loop) { let input = match boundary(bound)(input_loop) {
Err(_) => { Err(_) => {
return Ok(( return Ok((
input_loop, input_loop,
Multipart { Multipart {
interpreted: m.clone(), mime: m.clone(),
children: mparts, children: mparts,
preamble, raw_part_inner: pointers::parsed(inner_orig, input_loop),
epilogue: &[], raw_part_outer: pointers::parsed(outer_orig, input_loop),
}, },
)) ))
} }
@ -47,10 +79,13 @@ pub fn multipart<'a>(
return Ok(( return Ok((
inp, inp,
Multipart { Multipart {
interpreted: m.clone(), mime: m.clone(),
children: mparts, children: mparts,
preamble, raw_part_inner: pointers::parsed(inner_orig, inp),
epilogue: &[], raw_part_outer: pointers::parsed(
outer_orig,
&outer_orig[outer_orig.len()..],
),
}, },
)) ))
} }
@ -58,23 +93,40 @@ pub fn multipart<'a>(
}; };
// parse mime headers, otherwise pick default mime // parse mime headers, otherwise pick default mime
let (input, naive_mime) = match header(mime::field::content)(input) { let (input, naive_mime) = match header::header_kv(input) {
Ok((input, (known, unknown, bad))) => (input, known.into_iter().collect::<mime::NaiveMIME>().with_opt(unknown).with_bad(bad)), Ok((input_eom, fields)) => {
let raw_hdrs = pointers::parsed(input, input_eom);
let mime = fields
.iter()
.flat_map(mime::field::Content::try_from)
.into_iter()
.collect::<mime::NaiveMIME>();
let mime = mime.with_kv(fields).with_raw(raw_hdrs);
(input_eom, mime)
}
Err(_) => (input, mime::NaiveMIME::default()), Err(_) => (input, mime::NaiveMIME::default()),
}; };
// interpret mime according to context // interpret mime according to context
let mime = match m.interpreted.subtype { let mime = match m.interpreted_type.subtype {
mime::r#type::MultipartSubtype::Digest => naive_mime.to_interpreted::<mime::WithDigestDefault>().into(), mime::r#type::MultipartSubtype::Digest => naive_mime
_ => naive_mime.to_interpreted::<mime::WithGenericDefault>().into(), .to_interpreted::<mime::WithDigestDefault>()
.into(),
_ => naive_mime
.to_interpreted::<mime::WithGenericDefault>()
.into(),
}; };
// parse raw part // parse raw part
let (input, rpart) = part::part_raw(bound)(input)?; let (input, rpart) = part::part_raw(bound)(input)?;
// parse mime body // parse mime body
mparts.push(part::to_anypart(mime, rpart)); // -- we do not keep the input as we are using the
// part_raw function as our cursor here.
let (_, part) = part::anypart(mime)(rpart)?;
mparts.push(part);
input_loop = input; input_loop = input;
} }
@ -83,47 +135,72 @@ pub fn multipart<'a>(
//--- Message //--- Message
#[derive(Debug, PartialEq)] #[derive(PartialEq)]
pub struct Message<'a> { pub struct Message<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Message>, pub mime: mime::MIME<'a, mime::r#type::DeductibleMessage>,
pub imf: imf::Imf<'a>, pub imf: imf::Imf<'a>,
pub child: Box<AnyPart<'a>>, pub child: Box<AnyPart<'a>>,
pub epilogue: &'a [u8],
pub raw_part: &'a [u8],
pub raw_headers: &'a [u8],
pub raw_body: &'a [u8],
} }
impl<'a> Message<'a> { impl<'a> fmt::Debug for Message<'a> {
pub fn with_epilogue(mut self, e: &'a [u8]) -> Self { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
self.epilogue = e; fmt.debug_struct("part::Message")
self .field("mime", &self.mime)
.field("imf", &self.imf)
.field("child", &self.child)
.field("raw_part", &String::from_utf8_lossy(self.raw_part))
.field("raw_headers", &String::from_utf8_lossy(self.raw_headers))
.field("raw_body", &String::from_utf8_lossy(self.raw_body))
.finish()
} }
} }
pub fn message<'a>( pub fn message<'a>(
m: mime::MIME<'a, mime::r#type::Message>, m: mime::MIME<'a, mime::r#type::DeductibleMessage>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> { ) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
move |input: &[u8]| { move |input: &[u8]| {
let orig = input;
// parse header fields // parse header fields
let (input, (known, unknown, bad)): (_, (Vec::<MixedField>, Vec<header::Kv>, Vec<&[u8]>)) = let (input, headers) = header::header_kv(input)?;
header(part::field::mixed_field)(input)?;
// extract raw parts 1/2
let raw_headers = pointers::parsed(orig, input);
let body_orig = input;
//---------------
// aggregate header fields // aggregate header fields
let (naive_mime, imf) = part::field::sections(known); let (naive_mime, imf) = part::field::split_and_build(&headers);
// attach bad headers to imf // Bind headers to mime
let imf = imf.with_opt(unknown).with_bad(bad); let naive_mime = naive_mime.with_kv(headers);
// interpret headers to choose a mime type // interpret headers to choose the child mime type
let in_mime = naive_mime.to_interpreted::<mime::WithGenericDefault>().into(); let in_mime = naive_mime
.with_raw(raw_headers)
.to_interpreted::<mime::WithGenericDefault>()
.into();
//---------------
// parse this mimetype // parse a part following this mime specification
let part = part::to_anypart(in_mime, input); let (input, part) = part::anypart(in_mime)(input)?;
// extract raw parts 2/2
let raw_body = pointers::parsed(body_orig, input);
let raw_part = pointers::parsed(orig, input);
Ok(( Ok((
&[], input,
Message { Message {
interpreted: m.clone(), mime: m.clone(),
imf, imf,
raw_part,
raw_headers,
raw_body,
child: Box::new(part), child: Box::new(part),
epilogue: &[],
}, },
)) ))
} }
@ -135,22 +212,21 @@ mod tests {
use crate::part::discrete::Text; use crate::part::discrete::Text;
use crate::part::AnyPart; use crate::part::AnyPart;
use crate::text::encoding::{Base64Word, EncodedWord, QuotedChunk, QuotedWord}; use crate::text::encoding::{Base64Word, EncodedWord, QuotedChunk, QuotedWord};
use crate::text::misc_token::{Phrase, UnstrToken, Unstructured, Word, MIMEWord}; use crate::text::misc_token::{MIMEWord, Phrase, UnstrToken, Unstructured, Word};
use crate::text::quoted::QuotedString; use crate::text::quoted::QuotedString;
use chrono::{FixedOffset, TimeZone}; use chrono::{FixedOffset, TimeZone};
#[test] #[test]
fn test_multipart() { fn test_multipart() {
let base_mime = mime::MIME { let base_mime = mime::MIME {
interpreted: mime::r#type::Multipart { interpreted_type: mime::r#type::Multipart {
subtype: mime::r#type::MultipartSubtype::Alternative, subtype: mime::r#type::MultipartSubtype::Alternative,
boundary: "simple boundary".to_string(), boundary: "simple boundary".to_string(),
}, },
parsed: mime::NaiveMIME::default(), fields: mime::NaiveMIME::default(),
}; };
assert_eq!( let input = b"This is the preamble. It is to be ignored, though it
multipart(base_mime.clone())(b"This is the preamble. It is to be ignored, though it
is a handy place for composition agents to include an is a handy place for composition agents to include an
explanatory note to non-MIME conformant readers. explanatory note to non-MIME conformant readers.
@ -167,30 +243,50 @@ It DOES end with a linebreak.
--simple boundary-- --simple boundary--
This is the epilogue. It is also to be ignored. This is the epilogue. It is also to be ignored.
"), ";
let inner = b"
--simple boundary
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
--simple boundary
Content-type: text/plain; charset=us-ascii
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
";
assert_eq!(
multipart(base_mime.clone())(input),
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..], Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
Multipart { Multipart {
interpreted: base_mime, mime: base_mime,
preamble: &b"This is the preamble. It is to be ignored, though it\nis a handy place for composition agents to include an\nexplanatory note to non-MIME conformant readers.\n"[..], raw_part_outer: input,
epilogue: &b""[..], raw_part_inner: inner,
children: vec![ children: vec![
AnyPart::Txt(Text { AnyPart::Txt(Text {
interpreted: mime::MIME { mime: mime::MIME {
interpreted: mime::r#type::Text { interpreted_type: mime::r#type::Deductible::Inferred(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain, subtype: mime::r#type::TextSubtype::Plain,
charset: mime::charset::EmailCharset::US_ASCII, charset: mime::r#type::Deductible::Inferred(mime::charset::EmailCharset::US_ASCII),
}),
fields: mime::NaiveMIME {
raw: &b"\n"[..],
..mime::NaiveMIME::default()
}, },
parsed: mime::NaiveMIME::default(),
}, },
body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..], body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
}), }),
AnyPart::Txt(Text { AnyPart::Txt(Text {
interpreted: mime::MIME { mime: mime::MIME {
interpreted: mime::r#type::Text { interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain, subtype: mime::r#type::TextSubtype::Plain,
charset: mime::charset::EmailCharset::US_ASCII, charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII),
}, }),
parsed: mime::NaiveMIME { fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType { ctype: Some(mime::r#type::NaiveType {
main: &b"text"[..], main: &b"text"[..],
sub: &b"plain"[..], sub: &b"plain"[..],
@ -201,6 +297,10 @@ This is the epilogue. It is also to be ignored.
} }
] ]
}), }),
raw: &b"Content-type: text/plain; charset=us-ascii\n\n"[..],
kv: vec![
header::Field::Good(header::Kv2(&b"Content-type"[..], &b"text/plain; charset=us-ascii"[..]))
],
..mime::NaiveMIME::default() ..mime::NaiveMIME::default()
}, },
}, },
@ -259,14 +359,90 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
"# "#
.as_bytes(); .as_bytes();
let base_mime = mime::MIME::<mime::r#type::Message>::default(); let hdrs = br#"Date: Sat, 8 Jul 2023 07:14:29 +0200
From: Grrrnd Zero <grrrndzero@example.org>
To: John Doe <jdoe@machine.example>
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
X-Unknown: something something
Bad entry
on multiple lines
Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
MIME-Version: 1.0
Content-Type: multipart/alternative;
boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
Content-Transfer-Encoding: 7bit
"#;
let body = br#"This is a multi-part message in MIME format.
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
GZ
OoOoO
oOoOoOoOo
oOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/html; charset=us-ascii
<div style="text-align: center;"><strong>GZ</strong><br />
OoOoO<br />
oOoOoOoOo<br />
oOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
</div>
--b1_e376dc71bafc953c0b0fdeb9983a9956--
"#;
let inner = br#"
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
GZ
OoOoO
oOoOoOoOo
oOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/html; charset=us-ascii
<div style="text-align: center;"><strong>GZ</strong><br />
OoOoO<br />
oOoOoOoOo<br />
oOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
</div>
--b1_e376dc71bafc953c0b0fdeb9983a9956--
"#;
let base_mime = mime::MIME::<mime::r#type::DeductibleMessage>::default();
assert_eq!( assert_eq!(
message(base_mime.clone())(fullmail), message(base_mime.clone())(fullmail),
Ok(( Ok((
&[][..], &[][..],
Message { Message {
interpreted: base_mime, mime: base_mime,
epilogue: &b""[..], raw_part: fullmail,
raw_headers: hdrs,
raw_body: body,
imf: imf::Imf { imf: imf::Imf {
date: Some(FixedOffset::east_opt(2 * 3600) date: Some(FixedOffset::east_opt(2 * 3600)
.unwrap() .unwrap()
@ -330,24 +506,15 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
right: &b"www.grrrndzero.org"[..], right: &b"www.grrrndzero.org"[..],
}), }),
mime_version: Some(imf::mime::Version { major: 1, minor: 0}), mime_version: Some(imf::mime::Version { major: 1, minor: 0}),
header_ext: vec![
header::Kv(&b"X-Unknown"[..], Unstructured(vec![
UnstrToken::Plain(&b"something"[..]),
UnstrToken::Plain(&b"something"[..]),
]))
],
header_bad: vec![
&b"Bad entry\n on multiple lines\n"[..],
],
..imf::Imf::default() ..imf::Imf::default()
}, },
child: Box::new(AnyPart::Mult(Multipart { child: Box::new(AnyPart::Mult(Multipart {
interpreted: mime::MIME { mime: mime::MIME {
interpreted: mime::r#type::Multipart { interpreted_type: mime::r#type::Multipart {
subtype: mime::r#type::MultipartSubtype::Alternative, subtype: mime::r#type::MultipartSubtype::Alternative,
boundary: "b1_e376dc71bafc953c0b0fdeb9983a9956".to_string(), boundary: "b1_e376dc71bafc953c0b0fdeb9983a9956".to_string(),
}, },
parsed: mime::NaiveMIME { fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType { ctype: Some(mime::r#type::NaiveType {
main: &b"multipart"[..], main: &b"multipart"[..],
sub: &b"alternative"[..], sub: &b"alternative"[..],
@ -358,19 +525,33 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
} }
] ]
}), }),
raw: hdrs,
kv: vec![
header::Field::Good(header::Kv2(&b"Date"[..], &b"Sat, 8 Jul 2023 07:14:29 +0200"[..])),
header::Field::Good(header::Kv2(&b"From"[..], &b"Grrrnd Zero <grrrndzero@example.org>"[..])),
header::Field::Good(header::Kv2(&b"To"[..], &b"John Doe <jdoe@machine.example>"[..])),
header::Field::Good(header::Kv2(&b"CC"[..], &b"=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>"[..])),
header::Field::Good(header::Kv2(&b"Subject"[..], &b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="[..])),
header::Field::Good(header::Kv2(&b"X-Unknown"[..], &b"something something"[..])),
header::Field::Bad(&b"Bad entry\n on multiple lines\n"[..]),
header::Field::Good(header::Kv2(&b"Message-ID"[..], &b"<NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>"[..])),
header::Field::Good(header::Kv2(&b"MIME-Version"[..], &b"1.0"[..])),
header::Field::Good(header::Kv2(&b"Content-Type"[..], &b"multipart/alternative;\n boundary=\"b1_e376dc71bafc953c0b0fdeb9983a9956\""[..])),
header::Field::Good(header::Kv2(&b"Content-Transfer-Encoding"[..], &b"7bit"[..])),
],
..mime::NaiveMIME::default() ..mime::NaiveMIME::default()
}, },
}, },
preamble: &b"This is a multi-part message in MIME format.\n"[..], raw_part_inner: inner,
epilogue: &b""[..], raw_part_outer: body,
children: vec![ children: vec![
AnyPart::Txt(Text { AnyPart::Txt(Text {
interpreted: mime::MIME { mime: mime::MIME {
interpreted: mime::r#type::Text { interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain, subtype: mime::r#type::TextSubtype::Plain,
charset: mime::charset::EmailCharset::UTF_8, charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::UTF_8),
}, }),
parsed: mime::NaiveMIME { fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType { ctype: Some(mime::r#type::NaiveType {
main: &b"text"[..], main: &b"text"[..],
sub: &b"plain"[..], sub: &b"plain"[..],
@ -382,19 +563,24 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
] ]
}), }),
transfer_encoding: mime::mechanism::Mechanism::QuotedPrintable, transfer_encoding: mime::mechanism::Mechanism::QuotedPrintable,
kv: vec![
header::Field::Good(header::Kv2(&b"Content-Type"[..], &b"text/plain; charset=utf-8"[..])),
header::Field::Good(header::Kv2(&b"Content-Transfer-Encoding"[..], &b"quoted-printable"[..])),
],
raw: &b"Content-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: quoted-printable\n\n"[..],
..mime::NaiveMIME::default() ..mime::NaiveMIME::default()
} }
}, },
body: &b"GZ\nOoOoO\noOoOoOoOo\noOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOoOoOoOo\nOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"[..], body: &b"GZ\nOoOoO\noOoOoOoOo\noOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOo\noOoOoOoOoOoOoOoOoOoOoOoOoOoOo\nOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO\n"[..],
}), }),
AnyPart::Txt(Text { AnyPart::Txt(Text {
interpreted: mime::MIME { mime: mime::MIME {
interpreted: mime::r#type::Text { interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Html, subtype: mime::r#type::TextSubtype::Html,
charset: mime::charset::EmailCharset::US_ASCII, charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII),
}, }),
parsed: mime::NaiveMIME { fields: mime::NaiveMIME {
ctype: Some(mime::r#type::NaiveType { ctype: Some(mime::r#type::NaiveType {
main: &b"text"[..], main: &b"text"[..],
sub: &b"html"[..], sub: &b"html"[..],
@ -405,6 +591,10 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
} }
] ]
}), }),
kv: vec![
header::Field::Good(header::Kv2(&b"Content-Type"[..], &b"text/html; charset=us-ascii"[..])),
],
raw: &b"Content-Type: text/html; charset=us-ascii\n\n"[..],
..mime::NaiveMIME::default() ..mime::NaiveMIME::default()
}, },
}, },

View file

@ -4,36 +4,30 @@ use crate::mime;
#[derive(PartialEq)] #[derive(PartialEq)]
pub struct Text<'a> { pub struct Text<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Text>, pub mime: mime::MIME<'a, mime::r#type::DeductibleText>,
pub body: &'a [u8], pub body: &'a [u8],
} }
impl<'a> fmt::Debug for Text<'a> { impl<'a> fmt::Debug for Text<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Text") fmt.debug_struct("part::Text")
.field("mime", &self.interpreted) .field("mime", &self.mime)
.field( .field("body", &String::from_utf8_lossy(self.body))
"body",
&format_args!("\"{}\"", String::from_utf8_lossy(self.body)),
)
.finish() .finish()
} }
} }
#[derive(PartialEq)] #[derive(PartialEq)]
pub struct Binary<'a> { pub struct Binary<'a> {
pub interpreted: mime::MIME<'a, mime::r#type::Binary>, pub mime: mime::MIME<'a, mime::r#type::Binary>,
pub body: &'a [u8], pub body: &'a [u8],
} }
impl<'a> fmt::Debug for Binary<'a> { impl<'a> fmt::Debug for Binary<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Binary") fmt.debug_struct("part::Binary")
.field("mime", &self.interpreted) .field("mime", &self.mime)
.field( .field("body", &String::from_utf8_lossy(self.body))
"body",
&format_args!("\"{}\"", String::from_utf8_lossy(self.body)),
)
.finish() .finish()
} }
} }

View file

@ -1,50 +1,24 @@
use nom::{branch::alt, combinator::map, IResult}; use crate::header;
use crate::imf; use crate::imf;
use crate::mime; use crate::mime;
pub enum MixedField<'a> { pub fn split_and_build<'a>(v: &Vec<header::Field<'a>>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) {
MIME(mime::field::Content<'a>), let (mimev, imfv) = v.iter().fold(
IMF(imf::field::Field<'a>), (
} Vec::<mime::field::Content>::new(),
#[allow(dead_code)] Vec::<imf::field::Field>::new(),
impl<'a> MixedField<'a> { ),
pub fn mime(&self) -> Option<&mime::field::Content<'a>> { |(mut mime, mut imf), f| {
match self { if let Ok(m) = mime::field::Content::try_from(f) {
Self::MIME(v) => Some(v), mime.push(m);
_ => None, } else if let Ok(i) = imf::field::Field::try_from(f) {
} imf.push(i);
} }
pub fn to_mime(self) -> Option<mime::field::Content<'a>> { (mime, imf)
match self { },
Self::MIME(v) => Some(v), );
_ => None,
}
}
pub fn imf(&self) -> Option<&imf::field::Field<'a>> {
match self {
Self::IMF(v) => Some(v),
_ => None,
}
}
pub fn to_imf(self) -> Option<imf::field::Field<'a>> {
match self {
Self::IMF(v) => Some(v),
_ => None,
}
}
}
pub fn sections<'a>(list: Vec<MixedField<'a>>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) { let fmime = mimev.into_iter().collect::<mime::NaiveMIME>();
let (v1, v2): (Vec<MixedField>, Vec<_>) = list.into_iter().partition(|v| v.mime().is_some()); let fimf = imfv.into_iter().collect::<imf::Imf>();
let mime = v1.into_iter().flat_map(MixedField::to_mime).collect::<mime::NaiveMIME>(); (fmime, fimf)
let imf = v2.into_iter().flat_map(MixedField::to_imf).collect::<imf::Imf>();
(mime, imf)
}
pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> {
alt((
map(mime::field::content, MixedField::MIME),
map(imf::field::field, MixedField::IMF),
))(input)
} }

View file

@ -17,7 +17,7 @@ use nom::{
}; };
use crate::mime; use crate::mime;
use crate::mime::AnyMIME; use crate::mime::{AnyMIME, NaiveMIME};
use crate::part::{ use crate::part::{
composite::{message, multipart, Message, Multipart}, composite::{message, multipart, Message, Multipart},
discrete::{Binary, Text}, discrete::{Binary, Text},
@ -58,30 +58,61 @@ impl<'a> AnyPart<'a> {
_ => None, _ => None,
} }
} }
pub fn mime(&self) -> &NaiveMIME<'a> {
match self {
Self::Mult(v) => &v.mime.fields,
Self::Msg(v) => &v.mime.fields,
Self::Txt(v) => &v.mime.fields,
Self::Bin(v) => &v.mime.fields,
}
}
}
impl<'a> From<Multipart<'a>> for AnyPart<'a> {
fn from(m: Multipart<'a>) -> Self {
Self::Mult(m)
}
}
impl<'a> From<Message<'a>> for AnyPart<'a> {
fn from(m: Message<'a>) -> Self {
Self::Msg(m)
}
} }
pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> { /// Parse any type of part
match m { ///
AnyMIME::Mult(a) => multipart(a)(rpart) /// ## Note
.map(|(rest, multi)| AnyPart::Mult(multi.with_epilogue(rest))) ///
.unwrap_or(AnyPart::Txt(Text { /// Multiparts are a bit special as they have a clearly delimited beginning
interpreted: mime::MIME::<mime::r#type::Text>::default(), /// and end contrary to all the other parts that are going up to the end of the buffer
body: rpart, pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8], AnyPart<'a>> {
})), move |input| {
AnyMIME::Msg(a) => message(a)(rpart) let part = match m {
.map(|(rest, msg)| AnyPart::Msg(msg.with_epilogue(rest))) AnyMIME::Mult(a) => multipart(a)(input)
.unwrap_or(AnyPart::Txt(Text { .map(|(_, multi)| multi.into())
interpreted: mime::MIME::<mime::r#type::Text>::default(), .unwrap_or(AnyPart::Txt(Text {
body: rpart, mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
})), body: input,
AnyMIME::Txt(a) => AnyPart::Txt(Text { })),
interpreted: a, AnyMIME::Msg(a) => {
body: rpart, message(a)(input)
}), .map(|(_, msg)| msg.into())
AnyMIME::Bin(a) => AnyPart::Bin(Binary { .unwrap_or(AnyPart::Txt(Text {
interpreted: a, mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: rpart, body: input,
}), }))
}
AnyMIME::Txt(a) => AnyPart::Txt(Text {
mime: a,
body: input,
}),
AnyMIME::Bin(a) => AnyPart::Bin(Binary {
mime: a,
body: input,
}),
};
// This function always consumes the whole input
Ok((&input[input.len()..], part))
} }
} }

55
src/pointers.rs Normal file
View file

@ -0,0 +1,55 @@
pub fn parsed<'a>(input: &'a [u8], rest: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = rest.as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[..idx]
}
pub fn rest<'a>(input: &'a [u8], parsed: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = (&parsed[parsed.len()..]).as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[idx..]
}
pub fn with_preamble<'a>(input: &'a [u8], parsed: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = (&parsed[parsed.len()..]).as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[..idx]
}
pub fn with_epilogue<'a>(input: &'a [u8], rest: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = rest.as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[idx..]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_all() {
let outer = b"aa bb cc";
let inner = &outer[3..5];
assert_eq!(inner, b"bb");
let p = parsed(outer, inner);
assert_eq!(p, b"aa ");
let r = rest(outer, inner);
assert_eq!(r, b" cc");
let wp = with_preamble(outer, inner);
assert_eq!(wp, b"aa bb");
let we = with_epilogue(outer, inner);
assert_eq!(we, b"bb cc");
}
}

View file

@ -136,6 +136,11 @@ pub const TILDE: u8 = 0x7E; // ~
// GROUP OF CHARACTERS // GROUP OF CHARACTERS
// -- CRLF // -- CRLF
pub const CRLF: &[u8] = &[CR, LF]; pub const CRLF: &[u8] = &[CR, LF];
// -- CRCRLF
// Sometimes portable libraries replace transparently
// the "\n" with "\r\n" on Windows. When developpers
// explicitly write "\r\n", the library generates "\r\r\n".
pub const CRCRLF: &[u8] = &[CR, CR, LF];
// -- WHITESPACE // -- WHITESPACE
pub const WS: &[u8] = &[HT, SP]; pub const WS: &[u8] = &[HT, SP];

View file

@ -22,7 +22,12 @@ use nom::{
/// \r or \n is allowed nowhere else, so we also add this support. /// \r or \n is allowed nowhere else, so we also add this support.
pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> { pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((tag(ascii::CRLF), tag(&[ascii::CR]), tag(&[ascii::LF])))(input) alt((
tag(ascii::CRLF),
tag(ascii::CRCRLF),
tag(&[ascii::CR]),
tag(&[ascii::LF]),
))(input)
} }
/// ```abnf /// ```abnf