implement content-type

This commit is contained in:
Quentin 2023-07-22 11:53:26 +02:00
parent 8fff581fb4
commit 0d4e472d41
Signed by: quentin
GPG key ID: E9602264D639FF68
6 changed files with 134 additions and 101 deletions

View file

@ -7,8 +7,9 @@ use encoding_rs::Encoding;
/// using encoding_rs datastructures directly would lead to a loss of information. /// using encoding_rs datastructures directly would lead to a loss of information.
/// https://www.iana.org/assignments/character-sets/character-sets.xhtml /// https://www.iana.org/assignments/character-sets/character-sets.xhtml
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq, Default)]
pub enum EmailCharset<'a> { pub enum EmailCharset {
#[default]
US_ASCII, US_ASCII,
ISO_8859_1, ISO_8859_1,
ISO_8859_2, ISO_8859_2,
@ -34,10 +35,16 @@ pub enum EmailCharset<'a> {
Big5, Big5,
KOI8_R, KOI8_R,
UTF_8, UTF_8,
Other(&'a [u8]), Unknown,
} }
impl<'a> From<&'a [u8]> for EmailCharset<'a> { impl<'a> From<&'a str> for EmailCharset {
fn from(s: &'a str) -> Self {
Self::from(s.as_bytes())
}
}
impl<'a> From<&'a [u8]> for EmailCharset {
fn from(s: &'a [u8]) -> Self { fn from(s: &'a [u8]) -> Self {
match s.to_ascii_lowercase().as_slice() { match s.to_ascii_lowercase().as_slice() {
b"us-ascii" | b"ascii" => EmailCharset::US_ASCII, b"us-ascii" | b"ascii" => EmailCharset::US_ASCII,
@ -65,13 +72,13 @@ impl<'a> From<&'a [u8]> for EmailCharset<'a> {
b"big5" => EmailCharset::Big5, b"big5" => EmailCharset::Big5,
b"koi8-r" => EmailCharset::KOI8_R, b"koi8-r" => EmailCharset::KOI8_R,
b"utf-8" | b"utf8" => EmailCharset::UTF_8, b"utf-8" | b"utf8" => EmailCharset::UTF_8,
_ => EmailCharset::Other(s) _ => EmailCharset::Unknown,
} }
} }
} }
impl<'a> EmailCharset<'a> { impl EmailCharset {
pub fn as_str(&self) -> &'static str { pub fn as_str(&self) -> &'static str {
use EmailCharset::*; use EmailCharset::*;
match self { match self {
@ -100,7 +107,7 @@ impl<'a> EmailCharset<'a> {
Big5 => "Big5", Big5 => "Big5",
KOI8_R => "KOI8-R", KOI8_R => "KOI8-R",
UTF_8 => "UTF-8", UTF_8 => "UTF-8",
Other(_) => "UTF-8", //@FIXME bad idea... Unknown => "UTF-8",
} }
} }

View file

@ -60,49 +60,7 @@ mod tests {
use super::*; use super::*;
use crate::fragments::lazy; use crate::fragments::lazy;
#[test]
fn test_version() {
assert_eq!(version("1.0"), Ok(("", Version { major: 1, minor: 0 })),);
assert_eq!(
version(" 1.0 (produced by MetaSend Vx.x)"),
Ok(("", Version { major: 1, minor: 0 })),
);
assert_eq!(
version("(produced by MetaSend Vx.x) 1.0"),
Ok(("", Version { major: 1, minor: 0 })),
);
assert_eq!(
version("1.(produced by MetaSend Vx.x)0"),
Ok(("", Version { major: 1, minor: 0 })),
);
}
#[test]
fn test_parameter() {
assert_eq!(
parameter("charset=utf-8"),
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
);
assert_eq!(
parameter("charset=\"utf-8\""),
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
);
}
#[test]
fn test_content_type_plaintext() {
assert_eq!(
Type::try_from(&lazy::Type("text/plain; charset=utf-8")),
Ok(Type::Text(TextDesc {
charset: Some(EmailCharset::UTF_8),
subtype: TextSubtype::Plain,
unknown_parameters: vec![],
}))
);
}
#[test] #[test]
fn test_content_type_multipart() { fn test_content_type_multipart() {

View file

@ -1,3 +1,4 @@
pub mod charset; pub mod charset;
pub mod mechanism; pub mod mechanism;
pub mod r#type;
//pub mod field; //pub mod field;

View file

@ -1,84 +1,87 @@
use std::borrow::Cow;
use encoding_rs::Encoding;
use nom::{ use nom::{
branch::alt, bytes::complete::tag,
bytes::complete::{tag,take_while1}, combinator::map,
character::complete as character,
combinator::{into, opt},
multi::many0, multi::many0,
sequence::{delimited, preceded, tuple}, sequence::{preceded, tuple},
IResult, IResult,
}; };
use crate::text::whitespace::cfws; use crate::mime::charset::EmailCharset;
use crate::text::quoted::quoted_string;
use crate::text::misc_token::{MIMEWord, mime_word}; use crate::text::misc_token::{MIMEWord, mime_word};
use crate::text::words::{mime_atom}; use crate::text::words::{mime_atom};
// --------- NAIVE TYPE // --------- NAIVE TYPE
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct NaiveType<'a> { pub struct NaiveType<'a> {
main: MIMEWord<'a>, main: &'a [u8],
sub: MIMEWord<'a>, sub: &'a [u8],
params: Parameter<'a>, params: Vec<Parameter<'a>>,
} }
impl<'a> NaiveType<'a> { impl<'a> NaiveType<'a> {
pub fn decode(&self) -> Type<'a> { pub fn to_type(&self) -> Type { self.into() }
Type::from_naive_type(self)
}
} }
pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> { pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
map( map(
tuple((mime_atom, tag("/"), mime_atom, parameter_list)), tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|(main, _, sub, params)| Type { main, sub, params }, |(main, _, sub, params)| NaiveType { main, sub, params },
)(input) )(input)
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Parameter<'a> { pub struct Parameter<'a> {
name: &'a [u8], name: &'a [u8],
value: MIMEWord<'a>, value: MIMEWord<'a>,
} }
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> { pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input) map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input)
} }
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> { pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
many0(preceded(tag(";"), parameter))(input) many0(preceded(tag(";"), parameter))(input)
} }
// -------- TYPE // -------- TYPE
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq)]
pub enum Type<'a> { pub enum Type {
// Composite types // Composite types
Multipart(MultipartDesc<'a>), Multipart(MultipartDesc),
Message(MessageSubtype<'a>), Message(MessageSubtype),
// Discrete types // Discrete types
Text(TextDesc<'a>), Text(TextDesc),
Binary, Binary,
} }
impl<'a> Type<'a> { impl Default for Type {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self { fn default() -> Self {
Self::Text(TextDesc::default())
}
}
impl<'a> From<&'a NaiveType<'a>> for Type {
fn from(nt: &'a NaiveType<'a>) -> Self {
match nt.main.to_ascii_lowercase().as_slice() { match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()), b"multipart" => MultipartDesc::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()),
b"message" => Self::Message(MessageDesc::from_naive_type(nt)), b"message" => Self::Message(MessageSubtype::from(nt)),
b"text" => Self::Text(TextDesc::from_naive_type(nt)), b"text" => Self::Text(TextDesc::from(nt)),
_ => Self::Binary, _ => Self::Binary,
} }
} }
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct MultipartDesc<'a> { pub struct MultipartDesc {
pub subtype: MultipartSubtype<'a>, pub subtype: MultipartSubtype,
pub boundary: &'a [u8], pub boundary: String,
} }
impl<'a> MultipartDesc<'a> { impl<'a> TryFrom<&'a NaiveType<'a>> for MultipartDesc {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> { type Error = ();
MultipartDesc {
subtype: MultipartSubtype::from_naive_type(nt), fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]), nt.params.iter()
} .find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
.map(|boundary| MultipartDesc {
subtype: MultipartSubtype::from(nt),
boundary: boundary.value.to_string(),
})
.ok_or(())
} }
} }
@ -91,9 +94,9 @@ pub enum MultipartSubtype {
Report, Report,
Unknown, Unknown,
} }
impl<'a> From<&NaiveType<'a>> for MultipartSubtype<'a> { impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
pub fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.as_ascii_lowercase().as_slice() { match nt.sub.to_ascii_lowercase().as_slice() {
b"alternative" => Self::Alternative, b"alternative" => Self::Alternative,
b"mixed" => Self::Mixed, b"mixed" => Self::Mixed,
b"digest" => Self::Digest, b"digest" => Self::Digest,
@ -113,24 +116,83 @@ pub enum MessageSubtype {
} }
impl<'a> From<&NaiveType<'a>> for MessageSubtype { impl<'a> From<&NaiveType<'a>> for MessageSubtype {
fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
match csub.to_lowercase().as_ref() { match nt.sub.to_ascii_lowercase().as_slice() {
"rfc822" => MessageSubtype::RFC822, b"rfc822" => Self::RFC822,
"partial" => MessageSubtype::Partial, b"partial" => Self::Partial,
"external" => MessageSubtype::External, b"external" => Self::External,
_ => Self::Unknown, _ => Self::Unknown,
} }
} }
} }
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct TextDesc<'a> { pub struct TextDesc {
pub charset: Option<EmailCharset<'a>>, pub subtype: TextSubtype,
pub subtype: TextSubtype<'a>, pub charset: EmailCharset,
}
impl<'a> From<&NaiveType<'a>> for TextDesc {
fn from(nt: &NaiveType<'a>) -> Self {
TextDesc {
subtype: TextSubtype::from(nt),
charset: nt.params.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
.map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
.unwrap_or(EmailCharset::US_ASCII),
}
}
} }
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub enum TextSubtype<'a> { pub enum TextSubtype {
#[default]
Plain, Plain,
Html, Html,
Other(&'a str), Unknown,
}
impl<'a> From<&NaiveType<'a>> for TextSubtype {
fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() {
b"plain" => Self::Plain,
b"html" => Self::Html,
_ => Self::Unknown,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::text::quoted::QuotedString;
#[test]
fn test_parameter() {
assert_eq!(
parameter(b"charset=utf-8"),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Atom(&b"utf-8"[..]),
})),
);
assert_eq!(
parameter(b"charset=\"utf-8\""),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
})),
);
}
#[test]
fn test_content_type_plaintext() {
let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(
nt.to_type(),
Type::Text(TextDesc {
charset: EmailCharset::UTF_8,
subtype: TextSubtype::Plain,
})
);
}
} }

View file

@ -99,7 +99,7 @@ pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList<Field>> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use chrono::{DateTime, FixedOffset, TimeZone}; use chrono::{FixedOffset, TimeZone};
use crate::rfc5322::mailbox::*; use crate::rfc5322::mailbox::*;
use crate::rfc5322::address::*; use crate::rfc5322::address::*;
use crate::text::misc_token::*; use crate::text::misc_token::*;

View file

@ -27,11 +27,16 @@ pub enum MIMEWord<'a> {
Quoted(QuotedString<'a>), Quoted(QuotedString<'a>),
Atom(&'a [u8]), Atom(&'a [u8]),
} }
impl Default for MIMEWord<'static> {
fn default() -> Self {
Self::Atom(&[])
}
}
impl<'a> MIMEWord<'a> { impl<'a> MIMEWord<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match self { match self {
Self::Quoted(v) => v.to_string(), Self::Quoted(v) => v.to_string(),
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(), Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
} }
} }
} }