From 6b3343f1370a56c7a4af9748a5082f73f0ee3fcf Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Fri, 14 Jul 2023 10:43:31 +0200 Subject: [PATCH] implement mime headers --- src/fragments/mime.rs | 20 +++++------ src/fragments/misc_token.rs | 40 ++++++++++++++------- src/fragments/section.rs | 14 +++++++- src/fragments/whitespace.rs | 11 +++++- tests/known.rs | 70 +++++++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 24 deletions(-) diff --git a/src/fragments/mime.rs b/src/fragments/mime.rs index 0004e4b..14ece11 100644 --- a/src/fragments/mime.rs +++ b/src/fragments/mime.rs @@ -17,8 +17,8 @@ use crate::fragments::quoted::quoted_string; #[derive(Debug, PartialEq)] pub struct Version { - major: u32, - minor: u32, + pub major: u32, + pub minor: u32, } #[derive(Debug, PartialEq)] @@ -40,9 +40,9 @@ pub enum Type<'a> { #[derive(Debug, PartialEq)] pub struct MultipartDesc<'a> { - boundary: String, - subtype: MultipartSubtype<'a>, - unknown_parameters: Vec>, + pub boundary: String, + pub subtype: MultipartSubtype<'a>, + pub unknown_parameters: Vec>, } #[derive(Debug, PartialEq)] @@ -57,8 +57,8 @@ pub enum MultipartSubtype<'a> { #[derive(Debug, PartialEq)] pub struct MessageDesc<'a> { - subtype: MessageSubtype<'a>, - unknown_parameters: Vec>, + pub subtype: MessageSubtype<'a>, + pub unknown_parameters: Vec>, } #[derive(Debug, PartialEq)] @@ -71,9 +71,9 @@ pub enum MessageSubtype<'a> { #[derive(Debug, PartialEq)] pub struct TextDesc<'a> { - charset: Option>, - subtype: TextSubtype<'a>, - unknown_parameters: Vec>, + pub charset: Option>, + pub subtype: TextSubtype<'a>, + pub unknown_parameters: Vec>, } #[derive(Debug, PartialEq)] diff --git a/src/fragments/misc_token.rs b/src/fragments/misc_token.rs index 3f18213..11e25af 100644 --- a/src/fragments/misc_token.rs +++ b/src/fragments/misc_token.rs @@ -2,7 +2,7 @@ use nom::{ branch::alt, bytes::complete::{tag, take_while1}, character::complete::space0, - combinator::{into, opt}, + combinator::{into, map, opt}, multi::{many0, many1, separated_list1}, sequence::tuple, IResult, @@ -14,6 +14,7 @@ use crate::fragments::lazy; use crate::fragments::quoted::quoted_string; use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl}; use crate::fragments::words::{atom, is_vchar}; +use crate::fragments::encoding::encoded_word; #[derive(Debug, PartialEq, Default)] pub struct Unstructured(pub String); @@ -47,7 +48,7 @@ impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList { /// word = atom / quoted-string /// ``` pub fn word(input: &str) -> IResult<&str, Cow> { - alt((into(quoted_string), into(atom)))(input) + alt((into(quoted_string), into(encoded_word), into(atom)))(input) } /// Phrase @@ -70,31 +71,46 @@ fn is_unstructured(c: char) -> bool { is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00' } +enum UnstrToken { + Init, + Encoded, + Plain, +} + /// Unstructured header field body /// /// ```abnf /// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct /// ``` pub fn unstructured(input: &str) -> IResult<&str, String> { - let (input, r) = many0(tuple((opt(fws), take_while1(is_unstructured))))(input)?; + let (input, r) = many0(tuple((opt(fws), alt(( + map(encoded_word, |v| (Cow::Owned(v), UnstrToken::Encoded)), + map(take_while1(is_unstructured), |v| (Cow::Borrowed(v), UnstrToken::Plain)), + )))))(input)?; + let (input, _) = space0(input)?; // Try to optimize for the most common cases let body = match r.as_slice() { - [(None, content)] => content.to_string(), - [(Some(_), content)] => " ".to_string() + content, - lines => lines.iter().fold(String::with_capacity(255), |acc, item| { - let (may_ws, content) = item; - match may_ws { - Some(_) => acc + " " + content, - None => acc + content, - } - }), + // Optimization when there is only one line + [(None, (content, _))] | [(_, (content, UnstrToken::Encoded))] => content.to_string(), + [(Some(_), (content, _))] => " ".to_string() + content, + // Generic case, with multiple lines + lines => lines.iter().fold( + (&UnstrToken::Init, String::with_capacity(255)), + |(prev_token, result), (may_ws, (content, current_token))| { + let new_res = match (may_ws, prev_token, current_token) { + (_, UnstrToken::Encoded, UnstrToken::Encoded) | (None, _, _) => result + content, + _ => result + " " + content, + }; + (current_token, new_res) + }).1, }; Ok((input, body)) } + #[cfg(test)] mod tests { use super::*; diff --git a/src/fragments/section.rs b/src/fragments/section.rs index efdd19f..4d66867 100644 --- a/src/fragments/section.rs +++ b/src/fragments/section.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; use crate::fragments::eager::Field; use crate::fragments::lazy; use crate::fragments::misc_token::{PhraseList, Unstructured}; +use crate::fragments::mime::{Version,Type,Mechanism}; use crate::fragments::model::{AddressRef, MailboxRef, MessageId}; use crate::fragments::trace::ReceivedLog; use chrono::{DateTime, FixedOffset}; @@ -40,6 +41,13 @@ pub struct Section<'a> { // 3.6.8. Optional Fields pub optional: HashMap<&'a str, &'a Unstructured>, + // MIME + pub mime_version: Option<&'a Version>, + pub content_type: Option<&'a Type<'a>>, + pub content_transfer_encoding: Option<&'a Mechanism<'a>>, + pub content_id: Option<&'a MessageId<'a>>, + pub content_description: Option<&'a Unstructured>, + // Recovery pub bad_fields: Vec<&'a lazy::Field<'a>>, pub unparsed: Vec<&'a str>, @@ -71,7 +79,11 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> { section.optional.insert(k, v); } Field::Rescue(v) => section.unparsed.push(v), - _ => todo!(), + Field::MIMEVersion(v) => section.mime_version = Some(v), + Field::ContentType(v) => section.content_type = Some(v), + Field::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v), + Field::ContentID(v) => section.content_id = Some(v), + Field::ContentDescription(v) => section.content_description = Some(v), } } section diff --git a/src/fragments/whitespace.rs b/src/fragments/whitespace.rs index 4acb8e8..57aec12 100644 --- a/src/fragments/whitespace.rs +++ b/src/fragments/whitespace.rs @@ -8,6 +8,7 @@ use nom::{ sequence::tuple, IResult, }; +use crate::fragments::encoding::encoded_word; // --- whitespaces and comments @@ -75,7 +76,7 @@ pub fn comment(input: &str) -> IResult<&str, ()> { } pub fn ccontent(input: &str) -> IResult<&str, &str> { - alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input) + alt((recognize(ctext), recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input) } pub fn ctext(input: &str) -> IResult<&str, char> { @@ -155,4 +156,12 @@ mod tests { Ok(("wouch", "(double (comment) is fun) ")) ); } + + #[test] + fn test_cfws_encoded_word() { + assert_eq!( + cfws("(=?US-ASCII?Q?Keith_Moore?=)"), + Ok(("", "(=?US-ASCII?Q?Keith_Moore?=)")), + ); + } } diff --git a/tests/known.rs b/tests/known.rs index 9eac7c8..03ef6a8 100644 --- a/tests/known.rs +++ b/tests/known.rs @@ -172,7 +172,77 @@ This is a reply to your hello. "Héron: Raté\n Raté raté\n", "Not a real header but should still recover\n", ], + ..section::Section::default() } ) }) } + +#[test] +fn test_headers_mime() { + use imf_codec::fragments::mime; + let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= +To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= +CC: =?ISO-8859-1?Q?Andr=E9?= Pirard +Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=ISO-8859-1 +Content-Transfer-Encoding: quoted-printable +Content-ID: +Content-Description: hello + +Now's the time = +for all folk to come= + to the aid of their country. +"# + .as_bytes(); + + parser(fullmail, |parsed_section| { + assert_eq!( + parsed_section, + §ion::Section { + from: vec![ + &model::MailboxRef { + name: Some("Keith Moore".into()), + addrspec: model::AddrSpec { + local_part: "moore".into(), + domain: "cs.utk.edu".into(), + } + }, + ], + + to: vec![&model::AddressRef::Single(model::MailboxRef { + name: Some("Keld Jørn Simonsen".into()), + addrspec: model::AddrSpec { + local_part: "keld".into(), + domain: "dkuug.dk".into(), + } + })], + + cc: vec![&model::AddressRef::Single(model::MailboxRef { + name: Some("André Pirard".into()), + addrspec: model::AddrSpec { + local_part: "PIRARD".into(), + domain: "vm1.ulg.ac.be".into(), + } + })], + + subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())), + mime_version: Some(&mime::Version{ major: 1, minor: 0 }), + content_type: Some(&mime::Type::Text(mime::TextDesc { + charset: Some(mime::EmailCharset::ISO_8859_1), + subtype: mime::TextSubtype::Plain, + unknown_parameters: vec![] + })), + content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable), + content_id: Some(&model::MessageId { + left: "a", + right: "example.com" + }), + content_description: Some(&misc_token::Unstructured("hello".into())), + ..section::Section::default() + } + ); + }) +}