implement content-type

This commit is contained in:
Quentin 2023-07-22 11:53:26 +02:00
parent 8fff581fb4
commit 0d4e472d41
Signed by: quentin
GPG key ID: E9602264D639FF68
6 changed files with 134 additions and 101 deletions

View file

@ -7,8 +7,9 @@ use encoding_rs::Encoding;
/// using encoding_rs datastructures directly would lead to a loss of information.
/// https://www.iana.org/assignments/character-sets/character-sets.xhtml
#[allow(non_camel_case_types)]
#[derive(Debug, PartialEq)]
pub enum EmailCharset<'a> {
#[derive(Debug, PartialEq, Default)]
pub enum EmailCharset {
#[default]
US_ASCII,
ISO_8859_1,
ISO_8859_2,
@ -34,10 +35,16 @@ pub enum EmailCharset<'a> {
Big5,
KOI8_R,
UTF_8,
Other(&'a [u8]),
Unknown,
}
impl<'a> From<&'a [u8]> for EmailCharset<'a> {
impl<'a> From<&'a str> for EmailCharset {
fn from(s: &'a str) -> Self {
Self::from(s.as_bytes())
}
}
impl<'a> From<&'a [u8]> for EmailCharset {
fn from(s: &'a [u8]) -> Self {
match s.to_ascii_lowercase().as_slice() {
b"us-ascii" | b"ascii" => EmailCharset::US_ASCII,
@ -65,13 +72,13 @@ impl<'a> From<&'a [u8]> for EmailCharset<'a> {
b"big5" => EmailCharset::Big5,
b"koi8-r" => EmailCharset::KOI8_R,
b"utf-8" | b"utf8" => EmailCharset::UTF_8,
_ => EmailCharset::Other(s)
_ => EmailCharset::Unknown,
}
}
}
impl<'a> EmailCharset<'a> {
impl EmailCharset {
pub fn as_str(&self) -> &'static str {
use EmailCharset::*;
match self {
@ -100,7 +107,7 @@ impl<'a> EmailCharset<'a> {
Big5 => "Big5",
KOI8_R => "KOI8-R",
UTF_8 => "UTF-8",
Other(_) => "UTF-8", //@FIXME bad idea...
Unknown => "UTF-8",
}
}

View file

@ -60,49 +60,7 @@ mod tests {
use super::*;
use crate::fragments::lazy;
#[test]
fn test_version() {
assert_eq!(version("1.0"), Ok(("", Version { major: 1, minor: 0 })),);
assert_eq!(
version(" 1.0 (produced by MetaSend Vx.x)"),
Ok(("", Version { major: 1, minor: 0 })),
);
assert_eq!(
version("(produced by MetaSend Vx.x) 1.0"),
Ok(("", Version { major: 1, minor: 0 })),
);
assert_eq!(
version("1.(produced by MetaSend Vx.x)0"),
Ok(("", Version { major: 1, minor: 0 })),
);
}
#[test]
fn test_parameter() {
assert_eq!(
parameter("charset=utf-8"),
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
);
assert_eq!(
parameter("charset=\"utf-8\""),
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
);
}
#[test]
fn test_content_type_plaintext() {
assert_eq!(
Type::try_from(&lazy::Type("text/plain; charset=utf-8")),
Ok(Type::Text(TextDesc {
charset: Some(EmailCharset::UTF_8),
subtype: TextSubtype::Plain,
unknown_parameters: vec![],
}))
);
}
#[test]
fn test_content_type_multipart() {

View file

@ -1,3 +1,4 @@
pub mod charset;
pub mod mechanism;
pub mod r#type;
//pub mod field;

View file

@ -1,84 +1,87 @@
use std::borrow::Cow;
use encoding_rs::Encoding;
use nom::{
branch::alt,
bytes::complete::{tag,take_while1},
character::complete as character,
combinator::{into, opt},
bytes::complete::tag,
combinator::map,
multi::many0,
sequence::{delimited, preceded, tuple},
sequence::{preceded, tuple},
IResult,
};
use crate::text::whitespace::cfws;
use crate::text::quoted::quoted_string;
use crate::mime::charset::EmailCharset;
use crate::text::misc_token::{MIMEWord, mime_word};
use crate::text::words::{mime_atom};
// --------- NAIVE TYPE
#[derive(Debug, PartialEq)]
pub struct NaiveType<'a> {
main: MIMEWord<'a>,
sub: MIMEWord<'a>,
params: Parameter<'a>,
main: &'a [u8],
sub: &'a [u8],
params: Vec<Parameter<'a>>,
}
impl<'a> NaiveType<'a> {
pub fn decode(&self) -> Type<'a> {
Type::from_naive_type(self)
}
pub fn to_type(&self) -> Type { self.into() }
}
pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> {
pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
map(
tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|(main, _, sub, params)| Type { main, sub, params },
|(main, _, sub, params)| NaiveType { main, sub, params },
)(input)
}
#[derive(Debug, PartialEq)]
pub enum Parameter<'a> {
pub struct Parameter<'a> {
name: &'a [u8],
value: MIMEWord<'a>,
}
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input)
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input)
}
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
many0(preceded(tag(";"), parameter))(input)
}
// -------- TYPE
#[derive(Debug, PartialEq, Default)]
pub enum Type<'a> {
#[derive(Debug, PartialEq)]
pub enum Type {
// Composite types
Multipart(MultipartDesc<'a>),
Message(MessageSubtype<'a>),
Multipart(MultipartDesc),
Message(MessageSubtype),
// Discrete types
Text(TextDesc<'a>),
Text(TextDesc),
Binary,
}
impl<'a> Type<'a> {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
impl Default for Type {
fn default() -> Self {
Self::Text(TextDesc::default())
}
}
impl<'a> From<&'a NaiveType<'a>> for Type {
fn from(nt: &'a NaiveType<'a>) -> Self {
match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()),
b"message" => Self::Message(MessageDesc::from_naive_type(nt)),
b"text" => Self::Text(TextDesc::from_naive_type(nt)),
b"multipart" => MultipartDesc::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()),
b"message" => Self::Message(MessageSubtype::from(nt)),
b"text" => Self::Text(TextDesc::from(nt)),
_ => Self::Binary,
}
}
}
#[derive(Debug, PartialEq)]
pub struct MultipartDesc<'a> {
pub subtype: MultipartSubtype<'a>,
pub boundary: &'a [u8],
pub struct MultipartDesc {
pub subtype: MultipartSubtype,
pub boundary: String,
}
impl<'a> MultipartDesc<'a> {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> {
MultipartDesc {
subtype: MultipartSubtype::from_naive_type(nt),
boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]),
}
impl<'a> TryFrom<&'a NaiveType<'a>> for MultipartDesc {
type Error = ();
fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
nt.params.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
.map(|boundary| MultipartDesc {
subtype: MultipartSubtype::from(nt),
boundary: boundary.value.to_string(),
})
.ok_or(())
}
}
@ -91,9 +94,9 @@ pub enum MultipartSubtype {
Report,
Unknown,
}
impl<'a> From<&NaiveType<'a>> for MultipartSubtype<'a> {
pub fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.as_ascii_lowercase().as_slice() {
impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() {
b"alternative" => Self::Alternative,
b"mixed" => Self::Mixed,
b"digest" => Self::Digest,
@ -113,24 +116,83 @@ pub enum MessageSubtype {
}
impl<'a> From<&NaiveType<'a>> for MessageSubtype {
fn from(nt: &NaiveType<'a>) -> Self {
match csub.to_lowercase().as_ref() {
"rfc822" => MessageSubtype::RFC822,
"partial" => MessageSubtype::Partial,
"external" => MessageSubtype::External,
match nt.sub.to_ascii_lowercase().as_slice() {
b"rfc822" => Self::RFC822,
b"partial" => Self::Partial,
b"external" => Self::External,
_ => Self::Unknown,
}
}
}
#[derive(Debug, PartialEq, Default)]
pub struct TextDesc<'a> {
pub charset: Option<EmailCharset<'a>>,
pub subtype: TextSubtype<'a>,
pub struct TextDesc {
pub subtype: TextSubtype,
pub charset: EmailCharset,
}
impl<'a> From<&NaiveType<'a>> for TextDesc {
fn from(nt: &NaiveType<'a>) -> Self {
TextDesc {
subtype: TextSubtype::from(nt),
charset: nt.params.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
.map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
.unwrap_or(EmailCharset::US_ASCII),
}
}
}
#[derive(Debug, PartialEq, Default)]
pub enum TextSubtype<'a> {
pub enum TextSubtype {
#[default]
Plain,
Html,
Other(&'a str),
Unknown,
}
impl<'a> From<&NaiveType<'a>> for TextSubtype {
fn from(nt: &NaiveType<'a>) -> Self {
match nt.sub.to_ascii_lowercase().as_slice() {
b"plain" => Self::Plain,
b"html" => Self::Html,
_ => Self::Unknown,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::text::quoted::QuotedString;
#[test]
fn test_parameter() {
assert_eq!(
parameter(b"charset=utf-8"),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Atom(&b"utf-8"[..]),
})),
);
assert_eq!(
parameter(b"charset=\"utf-8\""),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
})),
);
}
#[test]
fn test_content_type_plaintext() {
let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8").unwrap();
assert_eq!(rest, &b""[..]);
assert_eq!(
nt.to_type(),
Type::Text(TextDesc {
charset: EmailCharset::UTF_8,
subtype: TextSubtype::Plain,
})
);
}
}

View file

@ -99,7 +99,7 @@ pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList<Field>> {
#[cfg(test)]
mod tests {
use super::*;
use chrono::{DateTime, FixedOffset, TimeZone};
use chrono::{FixedOffset, TimeZone};
use crate::rfc5322::mailbox::*;
use crate::rfc5322::address::*;
use crate::text::misc_token::*;

View file

@ -27,11 +27,16 @@ pub enum MIMEWord<'a> {
Quoted(QuotedString<'a>),
Atom(&'a [u8]),
}
impl Default for MIMEWord<'static> {
fn default() -> Self {
Self::Atom(&[])
}
}
impl<'a> MIMEWord<'a> {
pub fn to_string(&self) -> String {
match self {
Self::Quoted(v) => v.to_string(),
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(),
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
}
}
}