refactor headers

This commit is contained in:
Quentin 2023-08-30 19:00:08 +02:00
parent d9cf6b225d
commit 18bb04340a
Signed by: quentin
GPG key ID: E9602264D639FF68
7 changed files with 107 additions and 221 deletions

View file

@ -1,38 +1,31 @@
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::{foldable_line, obs_crlf};
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{tag, tag_no_case, take_while1}, bytes::complete::{tag, take_while1},
character::complete::space0, character::complete::space0,
combinator::{into, map}, combinator::{into, recognize},
multi::{fold_many0, many0}, multi::many0,
sequence::{pair, terminated, tuple}, sequence::{pair, terminated, tuple},
IResult, IResult,
}; };
#[derive(Debug, PartialEq)] use crate::text::whitespace::{foldable_line, obs_crlf};
pub enum CompField<'a, T> { use crate::text::misc_token::unstructured;
Known(T),
Unknown(Kv<'a>),
Bad(&'a [u8]),
}
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct Kv<'a>(pub &'a [u8], pub Unstructured<'a>); pub struct Kv2<'a>(pub &'a [u8], pub &'a [u8]);
impl<'a> From<(&'a [u8], Unstructured<'a>)> for Kv<'a> { impl<'a> From<(&'a [u8], &'a [u8])> for Kv2<'a> {
fn from(pair: (&'a [u8], Unstructured<'a>)) -> Self { fn from(pair: (&'a [u8], &'a [u8])) -> Self {
Self(pair.0, pair.1) Self(pair.0, pair.1)
} }
} }
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum Field<'a> { pub enum Field<'a> {
Good(Kv<'a>), Good(Kv2<'a>),
Bad(&'a [u8]), Bad(&'a [u8]),
} }
impl<'a> From<Kv<'a>> for Field<'a> { impl<'a> From<Kv2<'a>> for Field<'a> {
fn from(kv: Kv<'a>) -> Self { fn from(kv: Kv2<'a>) -> Self {
Self::Good(kv) Self::Good(kv)
} }
} }
@ -47,7 +40,7 @@ pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec<Field>> {
terminated( terminated(
many0( many0(
alt(( alt((
into(opt_field), into(correct_field),
into(foldable_line), into(foldable_line),
)) ))
), ),
@ -55,37 +48,6 @@ pub fn header_kv(input: &[u8]) -> IResult<&[u8], Vec<Field>> {
)(input) )(input)
} }
pub fn header<'a, T>(
fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], (Vec::<T>, Vec::<Kv>, Vec<&'a [u8]>)> {
move |input| {
terminated(
fold_many0(
alt((
map(fx, CompField::Known),
map(opt_field, CompField::Unknown),
map(foldable_line, CompField::Bad),
)),
|| (Vec::<T>::new(), Vec::<Kv>::new(), Vec::<&'a [u8]>::new()),
|(mut known, mut unknown, mut bad), item| {
match item {
CompField::Known(v) => known.push(v),
CompField::Unknown(v) => unknown.push(v),
CompField::Bad(v) => bad.push(v),
};
(known, unknown, bad)
}
),
obs_crlf,
)(input)
}
}
pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input)
}
pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> { pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated( terminated(
take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A), take_while1(|c| (0x21..=0x7E).contains(&c) && c != 0x3A),
@ -102,11 +64,11 @@ pub fn field_any(input: &[u8]) -> IResult<&[u8], &[u8]> {
/// %d59-126 ; characters not including /// %d59-126 ; characters not including
/// ; ":". /// ; ":".
/// ``` /// ```
pub fn opt_field(input: &[u8]) -> IResult<&[u8], Kv> { pub fn correct_field(input: &[u8]) -> IResult<&[u8], Kv2> {
terminated( terminated(
into(pair( into(pair(
field_any, field_any,
unstructured, recognize(unstructured),
)), )),
obs_crlf, obs_crlf,
)(input) )(input)

View file

@ -1,12 +1,7 @@
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
use nom::{ use nom::combinator::map;
branch::alt,
combinator::map,
sequence::{preceded, terminated},
IResult,
};
use crate::header::{field_name}; use crate::header;
use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList}; use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList};
use crate::imf::datetime::section as date; use crate::imf::datetime::section as date;
use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList}; use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList};
@ -14,7 +9,6 @@ use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
use crate::imf::mime::{version, Version}; use crate::imf::mime::{version, Version};
use crate::imf::trace::{received_log, return_path, ReceivedLog}; use crate::imf::trace::{received_log, return_path, ReceivedLog};
use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured}; use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Field<'a> { pub enum Field<'a> {
@ -48,36 +42,32 @@ pub enum Field<'a> {
MIMEVersion(Version), MIMEVersion(Version),
} }
/*impl<'a> From<header::Field<'a>> for Field<'a> { impl<'a> TryFrom<&header::Field<'a>> for Field<'a> {
fn from(raw: header::Field type Error = ();
}*/ fn try_from(f: &header::Field<'a>) -> Result<Self, Self::Error> {
let content = match f {
header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() {
b"date" => map(date, Field::Date)(value),
b"from" => map(mailbox_list, Field::From)(value),
b"sender" => map(mailbox, Field::Sender)(value),
b"reply-to" => map(address_list, Field::ReplyTo)(value),
b"to" => map(address_list, Field::To)(value),
b"cc" => map(address_list, Field::Cc)(value),
b"bcc" => map(nullable_address_list, Field::Bcc)(value),
b"message-id" => map(msg_id, Field::MessageID)(value),
b"in-reply-to" => map(msg_list, Field::InReplyTo)(value),
b"references" => map(msg_list, Field::References)(value),
b"subject" => map(unstructured, Field::Subject)(value),
b"comments" => map(unstructured, Field::Comments)(value),
b"keywords" => map(phrase_list, Field::Keywords)(value),
b"return-path" => map(return_path, Field::ReturnPath)(value),
b"received" => map(received_log, Field::Received)(value),
b"mime-version" => map(version, Field::MIMEVersion)(value),
_ => return Err(()),
},
_ => return Err(()),
};
pub fn field(input: &[u8]) -> IResult<&[u8], Field> { content.map(|(_, content)| content).or(Err(()))
terminated( }
alt((
preceded(field_name(b"date"), map(date, Field::Date)),
preceded(field_name(b"from"), map(mailbox_list, Field::From)),
preceded(field_name(b"sender"), map(mailbox, Field::Sender)),
preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)),
preceded(field_name(b"to"), map(address_list, Field::To)),
preceded(field_name(b"cc"), map(address_list, Field::Cc)),
preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)),
preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)),
preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)),
preceded(field_name(b"references"), map(msg_list, Field::References)),
preceded(field_name(b"subject"), map(unstructured, Field::Subject)),
preceded(field_name(b"comments"), map(unstructured, Field::Comments)),
preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)),
preceded(
field_name(b"return-path"),
map(return_path, Field::ReturnPath),
),
preceded(field_name(b"received"), map(received_log, Field::Received)),
preceded(
field_name(b"mime-version"),
map(version, Field::MIMEVersion),
),
)),
obs_crlf,
)(input)
} }

View file

@ -13,14 +13,13 @@ use nom::{
IResult, IResult,
}; };
use crate::header::header; use crate::header;
use crate::imf::address::AddressRef; use crate::imf::address::AddressRef;
use crate::imf::field::{field, Field}; use crate::imf::field::Field;
use crate::imf::identification::MessageID; use crate::imf::identification::MessageID;
use crate::imf::mailbox::{AddrSpec, MailboxRef}; use crate::imf::mailbox::{AddrSpec, MailboxRef};
use crate::imf::mime::Version; use crate::imf::mime::Version;
use crate::imf::trace::ReceivedLog; use crate::imf::trace::ReceivedLog;
use crate::header;
use crate::text::misc_token::{PhraseList, Unstructured}; use crate::text::misc_token::{PhraseList, Unstructured};
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
@ -56,19 +55,6 @@ pub struct Imf<'a> {
// MIME // MIME
pub mime_version: Option<Version>, pub mime_version: Option<Version>,
// Junk
pub header_ext: Vec<header::Kv<'a>>,
pub header_bad: Vec<&'a [u8]>,
}
impl<'a> Imf<'a> {
pub fn with_opt(mut self, opt: Vec<header::Kv<'a>>) -> Self {
self.header_ext = opt; self
}
pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self {
self.header_bad = bad; self
}
} }
//@FIXME min and max limits are not enforced, //@FIXME min and max limits are not enforced,
@ -100,11 +86,8 @@ impl<'a> FromIterator<Field<'a>> for Imf<'a> {
} }
pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> { pub fn imf(input: &[u8]) -> IResult<&[u8], Imf> {
map(header(field), |(known, unknown, bad)| { map(header::header_kv, |fields| {
let mut imf = Imf::from_iter(known); fields.iter().flat_map(Field::try_from).into_iter().collect::<Imf>()
imf.header_ext = unknown;
imf.header_bad = bad;
imf
})(input) })(input)
} }

View file

@ -1,16 +1,10 @@
use nom::{ use nom::combinator::map;
branch::alt,
combinator::map,
sequence::{preceded, terminated},
IResult,
};
use crate::header::{field_name}; use crate::header;
use crate::imf::identification::{msg_id, MessageID}; use crate::imf::identification::{msg_id, MessageID};
use crate::mime::mechanism::{mechanism, Mechanism}; use crate::mime::mechanism::{mechanism, Mechanism};
use crate::mime::r#type::{naive_type, NaiveType}; use crate::mime::r#type::{naive_type, NaiveType};
use crate::text::misc_token::{unstructured, Unstructured}; use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Content<'a> { pub enum Content<'a> {
@ -47,38 +41,35 @@ impl<'a> Content<'a> {
} }
} }
/* impl<'a> TryFrom<&header::Field<'a>> for Content<'a> {
pub fn to_mime<'a, T: WithDefaultType>(list: Vec<Content<'a>>) -> AnyMIMEWithDefault<'a, T> { type Error = ();
list.into_iter().collect::<AnyMIMEWithDefault<T>>() fn try_from(f: &header::Field<'a>) -> Result<Self, Self::Error> {
}*/ let content = match f {
header::Field::Good(header::Kv2(key, value)) => match key.to_ascii_lowercase().as_slice() {
b"content-type" => map(naive_type, Content::Type)(value),
b"content-transfer-encoding" => map(mechanism, Content::TransferEncoding)(value),
b"content-id" => map(msg_id, Content::ID)(value),
b"content-description" => map(unstructured, Content::Description)(value),
_ => return Err(()),
},
_ => return Err(()),
};
pub fn content(input: &[u8]) -> IResult<&[u8], Content> { //@TODO check that the full value is parsed, otherwise maybe log an error ?!
terminated( content.map(|(_, content)| content).or(Err(()))
alt(( }
preceded(field_name(b"content-type"), map(naive_type, Content::Type)),
preceded(
field_name(b"content-transfer-encoding"),
map(mechanism, Content::TransferEncoding),
),
preceded(field_name(b"content-id"), map(msg_id, Content::ID)),
preceded(
field_name(b"content-description"),
map(unstructured, Content::Description),
),
)),
obs_crlf,
)(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::header::{header}; use crate::header;
use crate::mime::charset::EmailCharset; //use crate::mime::charset::EmailCharset;
use crate::mime::r#type::*; use crate::mime::r#type::*;
use crate::text::misc_token::MIMEWord; use crate::text::misc_token::MIMEWord;
use crate::text::quoted::QuotedString; use crate::text::quoted::QuotedString;
/*
#[test] #[test]
fn test_content_type() { fn test_content_type() {
let (rest, content) = let (rest, content) =
@ -96,7 +87,7 @@ mod tests {
} else { } else {
panic!("Expected Content::Type, got {:?}", content); panic!("Expected Content::Type, got {:?}", content);
} }
} }*/
#[test] #[test]
fn test_header() { fn test_header() {
@ -116,7 +107,7 @@ This is a multipart message.
.as_bytes(); .as_bytes();
assert_eq!( assert_eq!(
map(header(content), |(k, _, _)| k)(fullmail), map(header::header_kv, |k| k.iter().flat_map(Content::try_from).collect())(fullmail),
Ok(( Ok((
&b"This is a multipart message.\n\n"[..], &b"This is a multipart message.\n\n"[..],
vec![ vec![

View file

@ -61,8 +61,7 @@ pub struct NaiveMIME<'a> {
pub transfer_encoding: Mechanism<'a>, pub transfer_encoding: Mechanism<'a>,
pub id: Option<MessageID<'a>>, pub id: Option<MessageID<'a>>,
pub description: Option<Unstructured<'a>>, pub description: Option<Unstructured<'a>>,
pub header_ext: Vec<header::Kv<'a>>, pub fields: Vec<header::Field<'a>>,
pub header_bad: Vec<&'a [u8]>,
pub raw: &'a [u8], pub raw: &'a [u8],
} }
@ -84,11 +83,8 @@ impl<'a> FromIterator<Content<'a>> for NaiveMIME<'a> {
} }
impl<'a> NaiveMIME<'a> { impl<'a> NaiveMIME<'a> {
pub fn with_opt(mut self, opt: Vec<header::Kv<'a>>) -> Self { pub fn with_fields(mut self, fields: Vec<header::Field<'a>>) -> Self {
self.header_ext = opt; self self.fields = fields; self
}
pub fn with_bad(mut self, bad: Vec<&'a [u8]>) -> Self {
self.header_bad = bad; self
} }
pub fn with_raw(mut self, raw: &'a [u8]) -> Self { pub fn with_raw(mut self, raw: &'a [u8]) -> Self {
self.raw = raw; self self.raw = raw; self

View file

@ -1,9 +1,9 @@
use nom::IResult; use nom::IResult;
use crate::header::{header, self}; use crate::header;
use crate::imf; use crate::imf;
use crate::mime; use crate::mime;
use crate::part::{self, AnyPart, field::MixedField}; use crate::part::{self, AnyPart};
use crate::text::boundary::{boundary, Delimiter}; use crate::text::boundary::{boundary, Delimiter};
use crate::pointers; use crate::pointers;
@ -73,15 +73,19 @@ pub fn multipart<'a>(
}; };
// parse mime headers, otherwise pick default mime // parse mime headers, otherwise pick default mime
let (input, naive_mime) = match header(mime::field::content)(input) { let (input, naive_mime) = match header::header_kv(input) {
Ok((input_eom, (known, unknown, bad))) => { Ok((input_eom, fields)) => {
let raw_hdrs = pointers::parsed(input, input_eom); let raw_hdrs = pointers::parsed(input, input_eom);
let mime = known let mime = fields
.iter()
.flat_map(mime::field::Content::try_from)
.into_iter() .into_iter()
.collect::<mime::NaiveMIME>() .collect::<mime::NaiveMIME>();
.with_opt(unknown)
.with_bad(bad) let mime = mime
.with_fields(fields)
.with_raw(raw_hdrs); .with_raw(raw_hdrs);
(input_eom, mime) (input_eom, mime)
}, },
Err(_) => (input, mime::NaiveMIME::default()), Err(_) => (input, mime::NaiveMIME::default()),
@ -127,23 +131,21 @@ pub fn message<'a>(
let orig = input; let orig = input;
// parse header fields // parse header fields
let (input, (known, unknown, bad)): (_, (Vec::<MixedField>, Vec<header::Kv>, Vec<&[u8]>)) = let (input, headers) = header::header_kv(input)?;
header(part::field::mixed_field)(input)?;
// extract raw parts 1/2 // extract raw parts 1/2
let raw_headers = pointers::parsed(orig, input); let raw_headers = pointers::parsed(orig, input);
let body_orig = input; let body_orig = input;
//---------------
// aggregate header fields // aggregate header fields
let (naive_mime, imf) = part::field::sections(known); let (naive_mime, imf) = part::field::split_and_build(&headers);
// attach bad headers to imf
let imf = imf.with_opt(unknown).with_bad(bad);
// interpret headers to choose a mime type // interpret headers to choose a mime type
let in_mime = naive_mime.with_raw(raw_headers).to_interpreted::<mime::WithGenericDefault>().into(); let in_mime = naive_mime.with_fields(headers).with_raw(raw_headers).to_interpreted::<mime::WithGenericDefault>().into();
//---------------
// parse this mimetype // parse a part following this mime specification
let (input, part) = part::anypart(in_mime)(input)?; let (input, part) = part::anypart(in_mime)(input)?;
// extract raw parts 2/2 // extract raw parts 2/2
@ -459,15 +461,6 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
right: &b"www.grrrndzero.org"[..], right: &b"www.grrrndzero.org"[..],
}), }),
mime_version: Some(imf::mime::Version { major: 1, minor: 0}), mime_version: Some(imf::mime::Version { major: 1, minor: 0}),
header_ext: vec![
header::Kv(&b"X-Unknown"[..], Unstructured(vec![
UnstrToken::Plain(&b"something"[..]),
UnstrToken::Plain(&b"something"[..]),
]))
],
header_bad: vec![
&b"Bad entry\n on multiple lines\n"[..],
],
..imf::Imf::default() ..imf::Imf::default()
}, },
child: Box::new(AnyPart::Mult(Multipart { child: Box::new(AnyPart::Mult(Multipart {

View file

@ -1,50 +1,21 @@
use nom::{branch::alt, combinator::map, IResult}; use crate::header;
use crate::imf; use crate::imf;
use crate::mime; use crate::mime;
pub enum MixedField<'a> { pub fn split_and_build<'a>(v: &Vec<header::Field<'a>>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) {
MIME(mime::field::Content<'a>), let (mimev, imfv) = v.iter().fold(
IMF(imf::field::Field<'a>), (Vec::<mime::field::Content>::new(), Vec::<imf::field::Field>::new()),
|(mut mime, mut imf), f| {
if let Ok(m) = mime::field::Content::try_from(f) {
mime.push(m);
} else if let Ok(i) = imf::field::Field::try_from(f) {
imf.push(i);
} }
#[allow(dead_code)]
impl<'a> MixedField<'a> {
pub fn mime(&self) -> Option<&mime::field::Content<'a>> {
match self {
Self::MIME(v) => Some(v),
_ => None,
}
}
pub fn to_mime(self) -> Option<mime::field::Content<'a>> {
match self {
Self::MIME(v) => Some(v),
_ => None,
}
}
pub fn imf(&self) -> Option<&imf::field::Field<'a>> {
match self {
Self::IMF(v) => Some(v),
_ => None,
}
}
pub fn to_imf(self) -> Option<imf::field::Field<'a>> {
match self {
Self::IMF(v) => Some(v),
_ => None,
}
}
}
pub fn sections<'a>(list: Vec<MixedField<'a>>) -> (mime::NaiveMIME<'a>, imf::Imf<'a>) {
let (v1, v2): (Vec<MixedField>, Vec<_>) = list.into_iter().partition(|v| v.mime().is_some());
let mime = v1.into_iter().flat_map(MixedField::to_mime).collect::<mime::NaiveMIME>();
let imf = v2.into_iter().flat_map(MixedField::to_imf).collect::<imf::Imf>();
(mime, imf) (mime, imf)
} }
);
pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> { let fmime = mimev.into_iter().collect::<mime::NaiveMIME>();
alt(( let fimf = imfv.into_iter().collect::<imf::Imf>();
map(mime::field::content, MixedField::MIME), (fmime, fimf)
map(imf::field::field, MixedField::IMF),
))(input)
} }