From 0d4e472d41433867f0e4b3499a34060ead180566 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 22 Jul 2023 11:53:26 +0200 Subject: [PATCH] implement content-type --- src/mime/charset.rs | 21 +++-- src/mime/content_fields.rs | 42 ---------- src/mime/mod.rs | 1 + src/mime/type.rs | 162 +++++++++++++++++++++++++------------ src/rfc5322/field.rs | 2 +- src/text/misc_token.rs | 7 +- 6 files changed, 134 insertions(+), 101 deletions(-) diff --git a/src/mime/charset.rs b/src/mime/charset.rs index 1514e51..bb49765 100644 --- a/src/mime/charset.rs +++ b/src/mime/charset.rs @@ -7,8 +7,9 @@ use encoding_rs::Encoding; /// using encoding_rs datastructures directly would lead to a loss of information. /// https://www.iana.org/assignments/character-sets/character-sets.xhtml #[allow(non_camel_case_types)] -#[derive(Debug, PartialEq)] -pub enum EmailCharset<'a> { +#[derive(Debug, PartialEq, Default)] +pub enum EmailCharset { + #[default] US_ASCII, ISO_8859_1, ISO_8859_2, @@ -34,10 +35,16 @@ pub enum EmailCharset<'a> { Big5, KOI8_R, UTF_8, - Other(&'a [u8]), + Unknown, } -impl<'a> From<&'a [u8]> for EmailCharset<'a> { +impl<'a> From<&'a str> for EmailCharset { + fn from(s: &'a str) -> Self { + Self::from(s.as_bytes()) + } +} + +impl<'a> From<&'a [u8]> for EmailCharset { fn from(s: &'a [u8]) -> Self { match s.to_ascii_lowercase().as_slice() { b"us-ascii" | b"ascii" => EmailCharset::US_ASCII, @@ -65,13 +72,13 @@ impl<'a> From<&'a [u8]> for EmailCharset<'a> { b"big5" => EmailCharset::Big5, b"koi8-r" => EmailCharset::KOI8_R, b"utf-8" | b"utf8" => EmailCharset::UTF_8, - _ => EmailCharset::Other(s) + _ => EmailCharset::Unknown, } } } -impl<'a> EmailCharset<'a> { +impl EmailCharset { pub fn as_str(&self) -> &'static str { use EmailCharset::*; match self { @@ -100,7 +107,7 @@ impl<'a> EmailCharset<'a> { Big5 => "Big5", KOI8_R => "KOI8-R", UTF_8 => "UTF-8", - Other(_) => "UTF-8", //@FIXME bad idea... + Unknown => "UTF-8", } } diff --git a/src/mime/content_fields.rs b/src/mime/content_fields.rs index 272ca94..c9617be 100644 --- a/src/mime/content_fields.rs +++ b/src/mime/content_fields.rs @@ -60,49 +60,7 @@ mod tests { use super::*; use crate::fragments::lazy; - #[test] - fn test_version() { - assert_eq!(version("1.0"), Ok(("", Version { major: 1, minor: 0 })),); - assert_eq!( - version(" 1.0 (produced by MetaSend Vx.x)"), - Ok(("", Version { major: 1, minor: 0 })), - ); - - assert_eq!( - version("(produced by MetaSend Vx.x) 1.0"), - Ok(("", Version { major: 1, minor: 0 })), - ); - - assert_eq!( - version("1.(produced by MetaSend Vx.x)0"), - Ok(("", Version { major: 1, minor: 0 })), - ); - } - - #[test] - fn test_parameter() { - assert_eq!( - parameter("charset=utf-8"), - Ok(("", Parameter::Charset(EmailCharset::UTF_8))), - ); - assert_eq!( - parameter("charset=\"utf-8\""), - Ok(("", Parameter::Charset(EmailCharset::UTF_8))), - ); - } - - #[test] - fn test_content_type_plaintext() { - assert_eq!( - Type::try_from(&lazy::Type("text/plain; charset=utf-8")), - Ok(Type::Text(TextDesc { - charset: Some(EmailCharset::UTF_8), - subtype: TextSubtype::Plain, - unknown_parameters: vec![], - })) - ); - } #[test] fn test_content_type_multipart() { diff --git a/src/mime/mod.rs b/src/mime/mod.rs index fb8d367..7f7117a 100644 --- a/src/mime/mod.rs +++ b/src/mime/mod.rs @@ -1,3 +1,4 @@ pub mod charset; pub mod mechanism; +pub mod r#type; //pub mod field; diff --git a/src/mime/type.rs b/src/mime/type.rs index e1e918b..c8bb1c5 100644 --- a/src/mime/type.rs +++ b/src/mime/type.rs @@ -1,84 +1,87 @@ -use std::borrow::Cow; -use encoding_rs::Encoding; use nom::{ - branch::alt, - bytes::complete::{tag,take_while1}, - character::complete as character, - combinator::{into, opt}, + bytes::complete::tag, + combinator::map, multi::many0, - sequence::{delimited, preceded, tuple}, + sequence::{preceded, tuple}, IResult, }; -use crate::text::whitespace::cfws; -use crate::text::quoted::quoted_string; +use crate::mime::charset::EmailCharset; use crate::text::misc_token::{MIMEWord, mime_word}; use crate::text::words::{mime_atom}; // --------- NAIVE TYPE #[derive(Debug, PartialEq)] pub struct NaiveType<'a> { - main: MIMEWord<'a>, - sub: MIMEWord<'a>, - params: Parameter<'a>, + main: &'a [u8], + sub: &'a [u8], + params: Vec>, } impl<'a> NaiveType<'a> { - pub fn decode(&self) -> Type<'a> { - Type::from_naive_type(self) - } + pub fn to_type(&self) -> Type { self.into() } } -pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> { +pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> { map( tuple((mime_atom, tag("/"), mime_atom, parameter_list)), - |(main, _, sub, params)| Type { main, sub, params }, + |(main, _, sub, params)| NaiveType { main, sub, params }, )(input) } #[derive(Debug, PartialEq)] -pub enum Parameter<'a> { +pub struct Parameter<'a> { name: &'a [u8], value: MIMEWord<'a>, } pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> { - map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input) + map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input) } pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec> { many0(preceded(tag(";"), parameter))(input) } // -------- TYPE -#[derive(Debug, PartialEq, Default)] -pub enum Type<'a> { +#[derive(Debug, PartialEq)] +pub enum Type { // Composite types - Multipart(MultipartDesc<'a>), - Message(MessageSubtype<'a>), + Multipart(MultipartDesc), + Message(MessageSubtype), // Discrete types - Text(TextDesc<'a>), + Text(TextDesc), Binary, } -impl<'a> Type<'a> { - pub fn from_naive_type(nt: &NaiveType<'a>) -> Self { +impl Default for Type { + fn default() -> Self { + Self::Text(TextDesc::default()) + } +} +impl<'a> From<&'a NaiveType<'a>> for Type { + fn from(nt: &'a NaiveType<'a>) -> Self { match nt.main.to_ascii_lowercase().as_slice() { - b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()), - b"message" => Self::Message(MessageDesc::from_naive_type(nt)), - b"text" => Self::Text(TextDesc::from_naive_type(nt)), + b"multipart" => MultipartDesc::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()), + b"message" => Self::Message(MessageSubtype::from(nt)), + b"text" => Self::Text(TextDesc::from(nt)), _ => Self::Binary, } } } #[derive(Debug, PartialEq)] -pub struct MultipartDesc<'a> { - pub subtype: MultipartSubtype<'a>, - pub boundary: &'a [u8], +pub struct MultipartDesc { + pub subtype: MultipartSubtype, + pub boundary: String, } -impl<'a> MultipartDesc<'a> { - pub fn from_naive_type(nt: &NaiveType<'a>) -> Option { - MultipartDesc { - subtype: MultipartSubtype::from_naive_type(nt), - boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]), - } +impl<'a> TryFrom<&'a NaiveType<'a>> for MultipartDesc { + type Error = (); + + fn try_from(nt: &'a NaiveType<'a>) -> Result { + nt.params.iter() + .find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary") + .map(|boundary| MultipartDesc { + subtype: MultipartSubtype::from(nt), + boundary: boundary.value.to_string(), + }) + .ok_or(()) } } @@ -91,9 +94,9 @@ pub enum MultipartSubtype { Report, Unknown, } -impl<'a> From<&NaiveType<'a>> for MultipartSubtype<'a> { - pub fn from(nt: &NaiveType<'a>) -> Self { - match nt.sub.as_ascii_lowercase().as_slice() { +impl<'a> From<&NaiveType<'a>> for MultipartSubtype { + fn from(nt: &NaiveType<'a>) -> Self { + match nt.sub.to_ascii_lowercase().as_slice() { b"alternative" => Self::Alternative, b"mixed" => Self::Mixed, b"digest" => Self::Digest, @@ -113,24 +116,83 @@ pub enum MessageSubtype { } impl<'a> From<&NaiveType<'a>> for MessageSubtype { fn from(nt: &NaiveType<'a>) -> Self { - match csub.to_lowercase().as_ref() { - "rfc822" => MessageSubtype::RFC822, - "partial" => MessageSubtype::Partial, - "external" => MessageSubtype::External, + match nt.sub.to_ascii_lowercase().as_slice() { + b"rfc822" => Self::RFC822, + b"partial" => Self::Partial, + b"external" => Self::External, _ => Self::Unknown, } } } #[derive(Debug, PartialEq, Default)] -pub struct TextDesc<'a> { - pub charset: Option>, - pub subtype: TextSubtype<'a>, +pub struct TextDesc { + pub subtype: TextSubtype, + pub charset: EmailCharset, +} +impl<'a> From<&NaiveType<'a>> for TextDesc { + fn from(nt: &NaiveType<'a>) -> Self { + TextDesc { + subtype: TextSubtype::from(nt), + charset: nt.params.iter() + .find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset") + .map(|x| EmailCharset::from(x.value.to_string().as_bytes())) + .unwrap_or(EmailCharset::US_ASCII), + } + } } #[derive(Debug, PartialEq, Default)] -pub enum TextSubtype<'a> { +pub enum TextSubtype { + #[default] Plain, Html, - Other(&'a str), + Unknown, +} +impl<'a> From<&NaiveType<'a>> for TextSubtype { + fn from(nt: &NaiveType<'a>) -> Self { + match nt.sub.to_ascii_lowercase().as_slice() { + b"plain" => Self::Plain, + b"html" => Self::Html, + _ => Self::Unknown, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::text::quoted::QuotedString; + + #[test] + fn test_parameter() { + assert_eq!( + parameter(b"charset=utf-8"), + Ok((&b""[..], Parameter { + name: &b"charset"[..], + value: MIMEWord::Atom(&b"utf-8"[..]), + })), + ); + assert_eq!( + parameter(b"charset=\"utf-8\""), + Ok((&b""[..], Parameter { + name: &b"charset"[..], + value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])), + })), + ); + } + + #[test] + fn test_content_type_plaintext() { + let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8").unwrap(); + assert_eq!(rest, &b""[..]); + + assert_eq!( + nt.to_type(), + Type::Text(TextDesc { + charset: EmailCharset::UTF_8, + subtype: TextSubtype::Plain, + }) + ); + } } diff --git a/src/rfc5322/field.rs b/src/rfc5322/field.rs index 8baf467..fa4142a 100644 --- a/src/rfc5322/field.rs +++ b/src/rfc5322/field.rs @@ -99,7 +99,7 @@ pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList> { #[cfg(test)] mod tests { use super::*; - use chrono::{DateTime, FixedOffset, TimeZone}; + use chrono::{FixedOffset, TimeZone}; use crate::rfc5322::mailbox::*; use crate::rfc5322::address::*; use crate::text::misc_token::*; diff --git a/src/text/misc_token.rs b/src/text/misc_token.rs index 1e619eb..b3e0708 100644 --- a/src/text/misc_token.rs +++ b/src/text/misc_token.rs @@ -27,11 +27,16 @@ pub enum MIMEWord<'a> { Quoted(QuotedString<'a>), Atom(&'a [u8]), } +impl Default for MIMEWord<'static> { + fn default() -> Self { + Self::Atom(&[]) + } +} impl<'a> MIMEWord<'a> { pub fn to_string(&self) -> String { match self { Self::Quoted(v) => v.to_string(), - Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(), + Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(), } } }