implement content-type

2023-07-22 11:53:26 +02:00 · 2023-07-22 11:53:26 +02:00 · 0d4e472d41
commit 0d4e472d41
parent 8fff581fb4
6 changed files with 134 additions and 101 deletions
--- a/src/mime/charset.rs
+++ b/src/mime/charset.rs
@ -7,8 +7,9 @@ use encoding_rs::Encoding;
 /// using encoding_rs datastructures directly would lead to a loss of information.
 /// https://www.iana.org/assignments/character-sets/character-sets.xhtml
 #[allow(non_camel_case_types)]
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Default)]
-pub enum EmailCharset<'a> {
+pub enum EmailCharset {
    #[default]
    US_ASCII,
    ISO_8859_1,
    ISO_8859_2,
@ -34,10 +35,16 @@ pub enum EmailCharset<'a> {
    Big5,
    KOI8_R,
    UTF_8,
-    Other(&'a [u8]),
+    Unknown,
 }
-impl<'a> From<&'a [u8]> for EmailCharset<'a> {
+impl<'a> From<&'a str> for EmailCharset {
    fn from(s: &'a str) -> Self {
        Self::from(s.as_bytes())
    }
 }
 impl<'a> From<&'a [u8]> for EmailCharset {
    fn from(s: &'a [u8]) -> Self {
        match s.to_ascii_lowercase().as_slice() {
            b"us-ascii" | b"ascii" => EmailCharset::US_ASCII,
@ -65,13 +72,13 @@ impl<'a> From<&'a [u8]> for EmailCharset<'a> {
            b"big5" => EmailCharset::Big5,
            b"koi8-r" => EmailCharset::KOI8_R,
            b"utf-8" | b"utf8" => EmailCharset::UTF_8,
-            _ => EmailCharset::Other(s)
+            _ => EmailCharset::Unknown,
        }
    }
 }
-impl<'a> EmailCharset<'a> {
+impl EmailCharset {
    pub fn as_str(&self) -> &'static str {
        use EmailCharset::*;
        match self {
@ -100,7 +107,7 @@ impl<'a> EmailCharset<'a> {
            Big5 => "Big5",
            KOI8_R => "KOI8-R",
            UTF_8 => "UTF-8",
-            Other(_) => "UTF-8", //@FIXME bad idea...
+            Unknown => "UTF-8",
        }
    }
--- a/src/mime/content_fields.rs
+++ b/src/mime/content_fields.rs
@ -60,49 +60,7 @@ mod tests {
    use super::*;
    use crate::fragments::lazy;
    #[test]
    fn test_version() {
        assert_eq!(version("1.0"), Ok(("", Version { major: 1, minor: 0 })),);
        assert_eq!(
            version(" 1.0 (produced by MetaSend Vx.x)"),
            Ok(("", Version { major: 1, minor: 0 })),
        );
        assert_eq!(
            version("(produced by MetaSend Vx.x) 1.0"),
            Ok(("", Version { major: 1, minor: 0 })),
        );
        assert_eq!(
            version("1.(produced by MetaSend Vx.x)0"),
            Ok(("", Version { major: 1, minor: 0 })),
        );
    }
    #[test]
    fn test_parameter() {
        assert_eq!(
            parameter("charset=utf-8"),
            Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
        );
        assert_eq!(
            parameter("charset=\"utf-8\""),
            Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
        );
    }
    #[test]
    fn test_content_type_plaintext() {
        assert_eq!(
            Type::try_from(&lazy::Type("text/plain; charset=utf-8")),
            Ok(Type::Text(TextDesc {
                charset: Some(EmailCharset::UTF_8),
                subtype: TextSubtype::Plain,
                unknown_parameters: vec![],
            }))
        );
    }
    #[test]
    fn test_content_type_multipart() {
--- a/src/mime/mod.rs
+++ b/src/mime/mod.rs
@ -1,3 +1,4 @@
 pub mod charset;
 pub mod mechanism;
 pub mod r#type;
 //pub mod field;
--- a/src/mime/type.rs
+++ b/src/mime/type.rs
@ -1,84 +1,87 @@
 use std::borrow::Cow;
 use encoding_rs::Encoding;
 use nom::{
-    branch::alt,
+    bytes::complete::tag, 
-    bytes::complete::{tag,take_while1}, 
+    combinator::map, 
    character::complete as character, 
    combinator::{into, opt}, 
    multi::many0,
-    sequence::{delimited, preceded, tuple},
+    sequence::{preceded, tuple},
    IResult,
 };
-use crate::text::whitespace::cfws;
+use crate::mime::charset::EmailCharset;
 use crate::text::quoted::quoted_string;
 use crate::text::misc_token::{MIMEWord, mime_word};
 use crate::text::words::{mime_atom};
 // --------- NAIVE TYPE
 #[derive(Debug, PartialEq)]
 pub struct NaiveType<'a> {
-    main: MIMEWord<'a>,
+    main: &'a [u8],
-    sub: MIMEWord<'a>,
+    sub: &'a [u8],
-    params: Parameter<'a>,
+    params: Vec<Parameter<'a>>,
 }
 impl<'a> NaiveType<'a> {
-    pub fn decode(&self) -> Type<'a> {
+    pub fn to_type(&self) -> Type { self.into() } 
        Type::from_naive_type(self)
    } 
 }
-pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> {
+pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
    map(
        tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
-        |(main, _, sub, params)| Type { main, sub, params },
+        |(main, _, sub, params)| NaiveType { main, sub, params },
    )(input)
 }
 #[derive(Debug, PartialEq)]
-pub enum Parameter<'a> {
+pub struct Parameter<'a> {
    name: &'a [u8],
    value: MIMEWord<'a>,
 }
 pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
-    map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input)
+    map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input)
 }
 pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
    many0(preceded(tag(";"), parameter))(input)
 }
 // -------- TYPE
-#[derive(Debug, PartialEq, Default)]
+#[derive(Debug, PartialEq)]
-pub enum Type<'a> {
+pub enum Type {
    // Composite types
-    Multipart(MultipartDesc<'a>),
+    Multipart(MultipartDesc),
-    Message(MessageSubtype<'a>),
+    Message(MessageSubtype),
    // Discrete types
-    Text(TextDesc<'a>),
+    Text(TextDesc),
    Binary,
 }
-impl<'a> Type<'a> {
+impl Default for Type {
-    pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
+    fn default() -> Self {
        Self::Text(TextDesc::default())
    }
 }
 impl<'a> From<&'a NaiveType<'a>> for Type {
    fn from(nt: &'a NaiveType<'a>) -> Self {
        match nt.main.to_ascii_lowercase().as_slice() {
-            b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()),
+            b"multipart" => MultipartDesc::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()),
-            b"message" => Self::Message(MessageDesc::from_naive_type(nt)),
+            b"message" => Self::Message(MessageSubtype::from(nt)),
-            b"text" => Self::Text(TextDesc::from_naive_type(nt)),
+            b"text" => Self::Text(TextDesc::from(nt)),
            _ => Self::Binary,
        }
    }
 }
 #[derive(Debug, PartialEq)]
-pub struct MultipartDesc<'a> {
+pub struct MultipartDesc {
-    pub subtype: MultipartSubtype<'a>,
+    pub subtype: MultipartSubtype,
-    pub boundary: &'a [u8],
+    pub boundary: String,
 }
-impl<'a> MultipartDesc<'a> {
+impl<'a> TryFrom<&'a NaiveType<'a>> for MultipartDesc {
-    pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> {
+    type Error = ();
-        MultipartDesc {
+
-            subtype: MultipartSubtype::from_naive_type(nt),
+    fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
-            boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]),
+        nt.params.iter()
-        }
+            .find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
            .map(|boundary| MultipartDesc {
                subtype: MultipartSubtype::from(nt),
                boundary: boundary.value.to_string(),
            })
            .ok_or(())
    }
 }
@ -91,9 +94,9 @@ pub enum MultipartSubtype {
    Report,
    Unknown,
 }
-impl<'a> From<&NaiveType<'a>> for MultipartSubtype<'a> {
+impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
-    pub fn from(nt: &NaiveType<'a>) -> Self {
+    fn from(nt: &NaiveType<'a>) -> Self {
-        match nt.sub.as_ascii_lowercase().as_slice() {
+        match nt.sub.to_ascii_lowercase().as_slice() {
            b"alternative" => Self::Alternative,
            b"mixed" => Self::Mixed,
            b"digest" => Self::Digest,
@ -113,24 +116,83 @@ pub enum MessageSubtype {
 }
 impl<'a> From<&NaiveType<'a>> for MessageSubtype {
    fn from(nt: &NaiveType<'a>) -> Self {
-        match csub.to_lowercase().as_ref() {
+        match nt.sub.to_ascii_lowercase().as_slice() {
-            "rfc822" => MessageSubtype::RFC822,
+            b"rfc822" => Self::RFC822,
-            "partial" => MessageSubtype::Partial,
+            b"partial" => Self::Partial,
-            "external" => MessageSubtype::External,
+            b"external" => Self::External,
            _ => Self::Unknown,
        }
    }
 }
 #[derive(Debug, PartialEq, Default)]
-pub struct TextDesc<'a> {
+pub struct TextDesc {
-    pub charset: Option<EmailCharset<'a>>,
+    pub subtype: TextSubtype,
-    pub subtype: TextSubtype<'a>,
+    pub charset: EmailCharset,
 }
 impl<'a> From<&NaiveType<'a>> for TextDesc {
    fn from(nt: &NaiveType<'a>) -> Self {
        TextDesc {
            subtype: TextSubtype::from(nt),
            charset: nt.params.iter()
                .find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
                .map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
                .unwrap_or(EmailCharset::US_ASCII),
        }
    }
 }
 #[derive(Debug, PartialEq, Default)]
-pub enum TextSubtype<'a> {
+pub enum TextSubtype {
    #[default]
    Plain,
    Html,
-    Other(&'a str),
+    Unknown,
 }
 impl<'a> From<&NaiveType<'a>> for TextSubtype {
    fn from(nt: &NaiveType<'a>) -> Self {
        match nt.sub.to_ascii_lowercase().as_slice() {
            b"plain" => Self::Plain,
            b"html" => Self::Html,
            _ => Self::Unknown,
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::text::quoted::QuotedString;
    #[test]
    fn test_parameter() {
        assert_eq!(
            parameter(b"charset=utf-8"),
            Ok((&b""[..], Parameter { 
                name: &b"charset"[..], 
                value: MIMEWord::Atom(&b"utf-8"[..]), 
            })),
        );
        assert_eq!(
            parameter(b"charset=\"utf-8\""),
            Ok((&b""[..], Parameter {
                name: &b"charset"[..],
                value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
            })),
        );
    }
    #[test]
    fn test_content_type_plaintext() {
        let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8").unwrap();
        assert_eq!(rest, &b""[..]);
        assert_eq!(
            nt.to_type(), 
            Type::Text(TextDesc {
                charset: EmailCharset::UTF_8,
                subtype: TextSubtype::Plain,
            })
        );
    }
 }
--- a/src/rfc5322/field.rs
+++ b/src/rfc5322/field.rs
@ -99,7 +99,7 @@ pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList<Field>> {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use chrono::{DateTime, FixedOffset, TimeZone};
+    use chrono::{FixedOffset, TimeZone};
    use crate::rfc5322::mailbox::*;
    use crate::rfc5322::address::*;
    use crate::text::misc_token::*;
--- a/src/text/misc_token.rs
+++ b/src/text/misc_token.rs
@ -27,11 +27,16 @@ pub enum MIMEWord<'a> {
    Quoted(QuotedString<'a>),
    Atom(&'a [u8]),
 }
 impl Default for MIMEWord<'static> {
    fn default() -> Self {
        Self::Atom(&[])
    }
 }
 impl<'a> MIMEWord<'a> {
    pub fn to_string(&self) -> String {
        match self {
            Self::Quoted(v) => v.to_string(),
-            Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(),
+            Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
        }
    }
 }