Access headers as key/values #24

Merged
quentin merged 8 commits from headers_map into main 2023-08-30 17:50:26 +00:00
12 changed files with 167 additions and 121 deletions
Showing only changes of commit d9285c9ddf - Show all commits

View file

@ -1,4 +1,3 @@
use std::fmt;
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
@ -8,9 +7,10 @@ use nom::{
sequence::{pair, terminated, tuple},
IResult,
};
use std::fmt;
use crate::text::whitespace::{foldable_line, obs_crlf};
use crate::text::misc_token::unstructured;
use crate::text::whitespace::{foldable_line, obs_crlf};
#[derive(PartialEq, Clone)]
pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]);
@ -47,13 +47,8 @@ impl<'a> From<&'a [u8]> for Field<'a> {
/// Parse headers as key/values
pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec<Field>> {
terminated(
many0(
alt((
into(correct_field),
into(foldable_line),
))
),
obs_crlf
many0(alt((into(correct_field), into(foldable_line)))),
obs_crlf,
)(input)
}
@ -74,11 +69,5 @@ pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> {
/// ; ":".
/// ```
pub fn correct_field(input: &[u8]) -> IResult<&[u8], Kv2> {
terminated(
into(pair(
field_any,
recognize(unstructured),
)),
obs_crlf,
)(input)
terminated(into(pair(field_any, recognize(unstructured))), obs_crlf)(input)
}

View file

@ -46,25 +46,27 @@ impl<'a> TryFrom<&header::Field<'a>> for Field<'a> {
type Error = ();
fn try_from(f: &header::Field<'a>) -> Result<Self, Self::Error> {
let content = match f {
header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() {
b"date" => map(date, Field::Date)(value),
b"from" => map(mailbox_list, Field::From)(value),
b"sender" => map(mailbox, Field::Sender)(value),
b"reply-to" => map(address_list, Field::ReplyTo)(value),
b"to" => map(address_list, Field::To)(value),
b"cc" => map(address_list, Field::Cc)(value),
b"bcc" => map(nullable_address_list, Field::Bcc)(value),
b"message-id" => map(msg_id, Field::MessageID)(value),
b"in-reply-to" => map(msg_list, Field::InReplyTo)(value),
b"references" => map(msg_list, Field::References)(value),
b"subject" => map(unstructured, Field::Subject)(value),
b"comments" => map(unstructured, Field::Comments)(value),
b"keywords" => map(phrase_list, Field::Keywords)(value),
b"return-path" => map(return_path, Field::ReturnPath)(value),
b"received" => map(received_log, Field::Received)(value),
b"mime-version" => map(version, Field::MIMEVersion)(value),
_ => return Err(()),
},
header::Field::Good(header::Kv2(key, value)) => {
match key.to_ascii_lowercase().as_slice() {
b"date" => map(date, Field::Date)(value),
b"from" => map(mailbox_list, Field::From)(value),
b"sender" => map(mailbox, Field::Sender)(value),
b"reply-to" => map(address_list, Field::ReplyTo)(value),
b"to" => map(address_list, Field::To)(value),
b"cc" => map(address_list, Field::Cc)(value),
b"bcc" => map(nullable_address_list, Field::Bcc)(value),
b"message-id" => map(msg_id, Field::MessageID)(value),
b"in-reply-to" => map(msg_list, Field::InReplyTo)(value),
b"references" => map(msg_list, Field::References)(value),
b"subject" => map(unstructured, Field::Subject)(value),
b"comments" => map(unstructured, Field::Comments)(value),
b"keywords" => map(phrase_list, Field::Keywords)(value),
b"return-path" => map(return_path, Field::ReturnPath)(value),
b"received" => map(received_log, Field::Received)(value),
b"mime-version" => map(version, Field::MIMEVersion)(value),
_ => return Err(()),
}
}
_ => return Err(()),
};

View file

@ -1,5 +1,4 @@
/// Parse and represent IMF (Internet Message Format) headers (RFC822, RFC5322)
pub mod address;
pub mod datetime;
pub mod field;
@ -8,10 +7,7 @@ pub mod mailbox;
pub mod mime;
pub mod trace;
use nom::{
combinator::map,
IResult,
};
use nom::{combinator::map, IResult};
use crate::header;
use crate::imf::address::AddressRef;
@ -61,7 +57,8 @@ pub struct Imf<'a> {
}
impl<'a> Imf<'a> {
pub fn with_kv(mut self, v: Vec<header::Field<'a>>) -> Self {
self.kv = v; self
self.kv = v;
self
}
}
@ -94,12 +91,15 @@ impl<'a> FromIterator<Field<'a>> for Imf<'a> {
}
pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> {
map(header::header_kv, |fields| {
fields.iter().flat_map(Field::try_from).into_iter().collect::<Imf>()
map(header::header_kv, |fields| {
fields
.iter()
.flat_map(Field::try_from)
.into_iter()
.collect::<Imf>()
})(input)
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -18,7 +18,7 @@ pub mod text;
/// Manipulate buffer of bytes
mod pointers;
use nom::{IResult, combinator::into};
use nom::{combinator::into, IResult};
/// Parse a whole email including its (MIME) body
///
@ -37,7 +37,7 @@ use nom::{IResult, combinator::into};
/// * `msg` - The parsed message
///
/// # Examples
///
///
/// ```
/// let input = br#"Date: 7 Mar 2023 08:00:00 +0200
/// From: deuxfleurs@example.com
@ -57,7 +57,9 @@ use nom::{IResult, combinator::into};
/// );
/// ```
pub fn parse_message(input: &[u8]) -> IResult<&[u8], part::composite::Message> {
into(part::composite::message(mime::MIME::<mime::r#type::DeductibleMessage>::default()))(input)
into(part::composite::message(mime::MIME::<
mime::r#type::DeductibleMessage,
>::default()))(input)
}
/// Only extract the headers of the email that are part of the Internet Message Format spec

View file

@ -45,7 +45,10 @@ impl<'a> TryFrom<&header::Field<'a>> for Content<'a> {
type Error = ();
fn try_from(f: &header::Field<'a>) -> Result<Self, Self::Error> {
let content = match f {
header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() {
header::Field::Good(header::Kv2(key, value)) => match key
.to_ascii_lowercase()
.as_slice()
{
b"content-type" => map(naive_type, Content::Type)(value),
b"content-transfer-encoding" => map(mechanism, Content::TransferEncoding)(value),
b"content-id" => map(msg_id, Content::ID)(value),
@ -107,7 +110,10 @@ This is a multipart message.
.as_bytes();
assert_eq!(
map(header::header_kv, |k| k.iter().flat_map(Content::try_from).collect())(fullmail),
map(header::header_kv, |k| k
.iter()
.flat_map(Content::try_from)
.collect())(fullmail),
Ok((
&b"This is a multipart message.\n\n"[..],
vec![

View file

@ -1,4 +1,4 @@
/// Parsed and represent an email character set
/// Parsed and represent an email character set
pub mod charset;
/// MIME specific headers
@ -13,17 +13,17 @@ pub mod r#type;
use std::fmt;
use std::marker::PhantomData;
use crate::header;
use crate::imf::identification::MessageID;
use crate::mime::field::Content;
use crate::mime::mechanism::Mechanism;
use crate::mime::r#type::{AnyType, NaiveType};
use crate::header;
use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
#[derive(Debug, PartialEq, Clone)]
pub struct MIME<'a, T> {
pub interpreted_type: T,
pub fields: NaiveMIME<'a>
pub interpreted_type: T,
pub fields: NaiveMIME<'a>,
}
impl<'a> Default for MIME<'a, r#type::DeductibleText> {
fn default() -> Self {
@ -80,9 +80,8 @@ impl<'a> fmt::Debug for NaiveMIME<'a> {
impl<'a> FromIterator<Content<'a>> for NaiveMIME<'a> {
fn from_iter<I: IntoIterator<Item = Content<'a>>>(it: I) -> Self {
it.into_iter().fold(
NaiveMIME::default(),
|mut section, field| {
it.into_iter()
.fold(NaiveMIME::default(), |mut section, field| {
match field {
Content::Type(v) => section.ctype = Some(v),
Content::TransferEncoding(v) => section.transfer_encoding = v,
@ -90,25 +89,29 @@ impl<'a> FromIterator<Content<'a>> for NaiveMIME<'a> {
Content::Description(v) => section.description = Some(v),
};
section
},
)
})
}
}
impl<'a> NaiveMIME<'a> {
pub fn with_kv(mut self, fields: Vec<header::Field<'a>>) -> Self {
self.kv = fields; self
self.kv = fields;
self
}
pub fn with_raw(mut self, raw: &'a [u8]) -> Self {
self.raw = raw; self
self.raw = raw;
self
}
pub fn to_interpreted<T: WithDefaultType>(self) -> AnyMIME<'a> {
self.ctype.as_ref().map(|c| c.to_type()).unwrap_or(T::default_type()).to_mime(self).into()
self.ctype
.as_ref()
.map(|c| c.to_type())
.unwrap_or(T::default_type())
.to_mime(self)
.into()
}
}
pub trait WithDefaultType {
fn default_type() -> AnyType;
}

View file

@ -1,4 +1,3 @@
use std::fmt;
use nom::{
bytes::complete::tag,
combinator::{map, opt},
@ -6,11 +5,12 @@ use nom::{
sequence::{preceded, terminated, tuple},
IResult,
};
use std::fmt;
use crate::mime::charset::EmailCharset;
use crate::mime::{AnyMIME, NaiveMIME, MIME};
use crate::text::misc_token::{mime_word, MIMEWord};
use crate::text::words::mime_atom;
use crate::mime::{AnyMIME, MIME, NaiveMIME};
// --------- NAIVE TYPE
#[derive(PartialEq, Clone)]
@ -92,16 +92,27 @@ impl<'a> From<&'a NaiveType<'a>> for AnyType {
impl<'a> AnyType {
pub fn to_mime(self, fields: NaiveMIME<'a>) -> AnyMIME<'a> {
match self {
Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME::<Multipart> { interpreted_type, fields }),
Self::Message(interpreted_type) => AnyMIME::Msg(MIME::<DeductibleMessage> { interpreted_type, fields }),
Self::Text(interpreted_type) => AnyMIME::Txt(MIME::<DeductibleText> { interpreted_type, fields }),
Self::Binary(interpreted_type) => AnyMIME::Bin(MIME::<Binary> { interpreted_type, fields }),
}
match self {
Self::Multipart(interpreted_type) => AnyMIME::Mult(MIME::<Multipart> {
interpreted_type,
fields,
}),
Self::Message(interpreted_type) => AnyMIME::Msg(MIME::<DeductibleMessage> {
interpreted_type,
fields,
}),
Self::Text(interpreted_type) => AnyMIME::Txt(MIME::<DeductibleText> {
interpreted_type,
fields,
}),
Self::Binary(interpreted_type) => AnyMIME::Bin(MIME::<Binary> {
interpreted_type,
fields,
}),
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Deductible<T: Default> {
Inferred(T),
@ -158,7 +169,8 @@ impl ToString for MultipartSubtype {
Self::Parallel => "parallel",
Self::Report => "report",
Self::Unknown => "mixed",
}.into()
}
.into()
}
}
impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
@ -174,8 +186,6 @@ impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
}
}
#[derive(Debug, PartialEq, Default, Clone)]
pub enum MessageSubtype {
#[default]
@ -191,7 +201,8 @@ impl ToString for MessageSubtype {
Self::Partial => "partial",
Self::External => "external",
Self::Unknown => "rfc822",
}.into()
}
.into()
}
}
@ -203,17 +214,25 @@ pub struct Message {
impl<'a> From<&NaiveType<'a>> for Message {
fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() {
b"rfc822" => Self { subtype: MessageSubtype::RFC822 },
b"partial" => Self { subtype: MessageSubtype::Partial },
b"external" => Self { subtype: MessageSubtype::External },
_ => Self { subtype: MessageSubtype::Unknown },
b"rfc822" => Self {
subtype: MessageSubtype::RFC822,
},
b"partial" => Self {
subtype: MessageSubtype::Partial,
},
b"external" => Self {
subtype: MessageSubtype::External,
},
_ => Self {
subtype: MessageSubtype::Unknown,
},
}
}
}
impl From<Deductible<Message>> for Message {
fn from(d: Deductible<Message>) -> Self {
match d {
Deductible::Inferred(t) | Deductible::Explicit(t) => t
Deductible::Inferred(t) | Deductible::Explicit(t) => t,
}
}
}
@ -240,7 +259,7 @@ impl<'a> From<&NaiveType<'a>> for Text {
impl From<Deductible<Text>> for Text {
fn from(d: Deductible<Text>) -> Self {
match d {
Deductible::Inferred(t) | Deductible::Explicit(t) => t
Deductible::Inferred(t) | Deductible::Explicit(t) => t,
}
}
}
@ -257,7 +276,8 @@ impl ToString for TextSubtype {
match self {
Self::Plain | Self::Unknown => "plain",
Self::Html => "html",
}.into()
}
.into()
}
}
impl<'a> From<&NaiveType<'a>> for TextSubtype {
@ -277,8 +297,8 @@ pub struct Binary {}
mod tests {
use super::*;
use crate::mime::charset::EmailCharset;
use crate::text::quoted::QuotedString;
use crate::mime::r#type::Deductible;
use crate::text::quoted::QuotedString;
#[test]
fn test_parameter() {
@ -336,7 +356,12 @@ mod tests {
let (rest, nt) = naive_type(b"message/rfc822").unwrap();
assert_eq!(rest, &[]);
assert_eq!(nt.to_type(), AnyType::Message(Deductible::Explicit(Message { subtype: MessageSubtype::RFC822 })));
assert_eq!(
nt.to_type(),
AnyType::Message(Deductible::Explicit(Message {
subtype: MessageSubtype::RFC822
}))
);
}
#[test]

View file

@ -1,12 +1,12 @@
use std::fmt;
use nom::IResult;
use std::fmt;
use crate::header;
use crate::imf;
use crate::mime;
use crate::part::{self, AnyPart};
use crate::text::boundary::{boundary, Delimiter};
use crate::pointers;
use crate::text::boundary::{boundary, Delimiter};
//--- Multipart
#[derive(PartialEq)]
@ -21,8 +21,14 @@ impl<'a> fmt::Debug for Multipart<'a> {
fmt.debug_struct("part::Multipart")
.field("mime", &self.mime)
.field("children", &self.children)
.field("raw_part_inner", &String::from_utf8_lossy(self.raw_part_inner))
.field("raw_part_outer", &String::from_utf8_lossy(self.raw_part_outer))
.field(
"raw_part_inner",
&String::from_utf8_lossy(self.raw_part_inner),
)
.field(
"raw_part_outer",
&String::from_utf8_lossy(self.raw_part_outer),
)
.finish()
}
}
@ -76,7 +82,10 @@ pub fn multipart<'a>(
mime: m.clone(),
children: mparts,
raw_part_inner: pointers::parsed(inner_orig, inp),
raw_part_outer: pointers::parsed(outer_orig, &outer_orig[outer_orig.len()..]),
raw_part_outer: pointers::parsed(
outer_orig,
&outer_orig[outer_orig.len()..],
),
},
))
}
@ -93,19 +102,21 @@ pub fn multipart<'a>(
.into_iter()
.collect::<mime::NaiveMIME>();
let mime = mime
.with_kv(fields)
.with_raw(raw_hdrs);
let mime = mime.with_kv(fields).with_raw(raw_hdrs);
(input_eom, mime)
},
}
Err(_) => (input, mime::NaiveMIME::default()),
};
// interpret mime according to context
let mime = match m.interpreted_type.subtype {
mime::r#type::MultipartSubtype::Digest => naive_mime.to_interpreted::<mime::WithDigestDefault>().into(),
_ => naive_mime.to_interpreted::<mime::WithGenericDefault>().into(),
mime::r#type::MultipartSubtype::Digest => naive_mime
.to_interpreted::<mime::WithDigestDefault>()
.into(),
_ => naive_mime
.to_interpreted::<mime::WithGenericDefault>()
.into(),
};
// parse raw part
@ -168,7 +179,10 @@ pub fn message<'a>(
let imf = imf.with_kv(headers);
// interpret headers to choose a mime type
let in_mime = naive_mime.with_raw(raw_headers).to_interpreted::<mime::WithGenericDefault>().into();
let in_mime = naive_mime
.with_raw(raw_headers)
.to_interpreted::<mime::WithGenericDefault>()
.into();
//---------------
// parse a part following this mime specification
@ -183,7 +197,9 @@ pub fn message<'a>(
Message {
mime: m.clone(),
imf,
raw_part, raw_headers, raw_body,
raw_part,
raw_headers,
raw_body,
child: Box::new(part),
},
))
@ -196,7 +212,7 @@ mod tests {
use crate::part::discrete::Text;
use crate::part::AnyPart;
use crate::text::encoding::{Base64Word, EncodedWord, QuotedChunk, QuotedWord};
use crate::text::misc_token::{Phrase, UnstrToken, Unstructured, Word, MIMEWord};
use crate::text::misc_token::{MIMEWord, Phrase, UnstrToken, Unstructured, Word};
use crate::text::quoted::QuotedString;
use chrono::{FixedOffset, TimeZone};
@ -265,7 +281,7 @@ It DOES end with a linebreak.
body: &b"This is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
}),
AnyPart::Txt(Text {
mime: mime::MIME {
mime: mime::MIME {
interpreted_type: mime::r#type::Deductible::Explicit(mime::r#type::Text {
subtype: mime::r#type::TextSubtype::Plain,
charset: mime::r#type::Deductible::Explicit(mime::charset::EmailCharset::US_ASCII),

View file

@ -12,10 +12,7 @@ impl<'a> fmt::Debug for Text<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Text")
.field("mime", &self.mime)
.field(
"body",
&String::from_utf8_lossy(self.body),
)
.field("body", &String::from_utf8_lossy(self.body))
.finish()
}
}
@ -30,10 +27,7 @@ impl<'a> fmt::Debug for Binary<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("part::Binary")
.field("mime", &self.mime)
.field(
"body",
&String::from_utf8_lossy(self.body),
)
.field("body", &String::from_utf8_lossy(self.body))
.finish()
}
}

View file

@ -4,7 +4,10 @@ use crate::mime;
pub fn split_and_build<'a>(v: &Vec<header::Field<'a>>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) {
let (mimev, imfv) = v.iter().fold(
(Vec::<mime::field::Content>::new(), Vec::<imf::field::Field>::new()),
(
Vec::<mime::field::Content>::new(),
Vec::<imf::field::Field>::new(),
),
|(mut mime, mut imf), f| {
if let Ok(m) = mime::field::Content::try_from(f) {
mime.push(m);
@ -12,9 +15,9 @@ pub fn split_and_build<'a>(v: &Vec<header::Field<'a>>) -> (mime::NaiveMIME<'a>,
imf.push(i);
}
(mime, imf)
}
},
);
let fmime = mimev.into_iter().collect::<mime::NaiveMIME>();
let fimf = imfv.into_iter().collect::<imf::Imf>();
(fmime, fimf)

View file

@ -80,18 +80,19 @@ pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8],
move |input| {
let part = match m {
AnyMIME::Mult(a) => multipart(a)(input)
.map(|(_, multi)|
multi.into())
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: input,
})),
AnyMIME::Msg(a) => message(a)(input)
.map(|(_, msg)| msg.into())
.map(|(_, multi)| multi.into())
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: input,
})),
AnyMIME::Msg(a) => {
message(a)(input)
.map(|(_, msg)| msg.into())
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: input,
}))
}
AnyMIME::Txt(a) => AnyPart::Txt(Text {
mime: a,
body: input,
@ -101,7 +102,7 @@ pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8],
body: input,
}),
};
// This function always consumes the whole input
Ok((&input[input.len()..], part))
}

View file

@ -22,7 +22,12 @@ use nom::{
/// \r or \n is allowed nowhere else, so we also add this support.
pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((tag(ascii::CRLF), tag(ascii::CRCRLF), tag(&[ascii::CR]), tag(&[ascii::LF])))(input)
alt((
tag(ascii::CRLF),
tag(ascii::CRCRLF),
tag(&[ascii::CR]),
tag(&[ascii::LF]),
))(input)
}
/// ```abnf