implement content-type
This commit is contained in:
parent
8fff581fb4
commit
0d4e472d41
6 changed files with 134 additions and 101 deletions
|
@ -7,8 +7,9 @@ use encoding_rs::Encoding;
|
|||
/// using encoding_rs datastructures directly would lead to a loss of information.
|
||||
/// https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum EmailCharset<'a> {
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub enum EmailCharset {
|
||||
#[default]
|
||||
US_ASCII,
|
||||
ISO_8859_1,
|
||||
ISO_8859_2,
|
||||
|
@ -34,10 +35,16 @@ pub enum EmailCharset<'a> {
|
|||
Big5,
|
||||
KOI8_R,
|
||||
UTF_8,
|
||||
Other(&'a [u8]),
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u8]> for EmailCharset<'a> {
|
||||
impl<'a> From<&'a str> for EmailCharset {
|
||||
fn from(s: &'a str) -> Self {
|
||||
Self::from(s.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u8]> for EmailCharset {
|
||||
fn from(s: &'a [u8]) -> Self {
|
||||
match s.to_ascii_lowercase().as_slice() {
|
||||
b"us-ascii" | b"ascii" => EmailCharset::US_ASCII,
|
||||
|
@ -65,13 +72,13 @@ impl<'a> From<&'a [u8]> for EmailCharset<'a> {
|
|||
b"big5" => EmailCharset::Big5,
|
||||
b"koi8-r" => EmailCharset::KOI8_R,
|
||||
b"utf-8" | b"utf8" => EmailCharset::UTF_8,
|
||||
_ => EmailCharset::Other(s)
|
||||
_ => EmailCharset::Unknown,
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> EmailCharset<'a> {
|
||||
impl EmailCharset {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
use EmailCharset::*;
|
||||
match self {
|
||||
|
@ -100,7 +107,7 @@ impl<'a> EmailCharset<'a> {
|
|||
Big5 => "Big5",
|
||||
KOI8_R => "KOI8-R",
|
||||
UTF_8 => "UTF-8",
|
||||
Other(_) => "UTF-8", //@FIXME bad idea...
|
||||
Unknown => "UTF-8",
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -60,49 +60,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::fragments::lazy;
|
||||
|
||||
#[test]
|
||||
fn test_version() {
|
||||
assert_eq!(version("1.0"), Ok(("", Version { major: 1, minor: 0 })),);
|
||||
|
||||
assert_eq!(
|
||||
version(" 1.0 (produced by MetaSend Vx.x)"),
|
||||
Ok(("", Version { major: 1, minor: 0 })),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
version("(produced by MetaSend Vx.x) 1.0"),
|
||||
Ok(("", Version { major: 1, minor: 0 })),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
version("1.(produced by MetaSend Vx.x)0"),
|
||||
Ok(("", Version { major: 1, minor: 0 })),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parameter() {
|
||||
assert_eq!(
|
||||
parameter("charset=utf-8"),
|
||||
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
|
||||
);
|
||||
assert_eq!(
|
||||
parameter("charset=\"utf-8\""),
|
||||
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_type_plaintext() {
|
||||
assert_eq!(
|
||||
Type::try_from(&lazy::Type("text/plain; charset=utf-8")),
|
||||
Ok(Type::Text(TextDesc {
|
||||
charset: Some(EmailCharset::UTF_8),
|
||||
subtype: TextSubtype::Plain,
|
||||
unknown_parameters: vec![],
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_type_multipart() {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
pub mod charset;
|
||||
pub mod mechanism;
|
||||
pub mod r#type;
|
||||
//pub mod field;
|
||||
|
|
162
src/mime/type.rs
162
src/mime/type.rs
|
@ -1,84 +1,87 @@
|
|||
use std::borrow::Cow;
|
||||
use encoding_rs::Encoding;
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag,take_while1},
|
||||
character::complete as character,
|
||||
combinator::{into, opt},
|
||||
bytes::complete::tag,
|
||||
combinator::map,
|
||||
multi::many0,
|
||||
sequence::{delimited, preceded, tuple},
|
||||
sequence::{preceded, tuple},
|
||||
IResult,
|
||||
};
|
||||
|
||||
use crate::text::whitespace::cfws;
|
||||
use crate::text::quoted::quoted_string;
|
||||
use crate::mime::charset::EmailCharset;
|
||||
use crate::text::misc_token::{MIMEWord, mime_word};
|
||||
use crate::text::words::{mime_atom};
|
||||
|
||||
// --------- NAIVE TYPE
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct NaiveType<'a> {
|
||||
main: MIMEWord<'a>,
|
||||
sub: MIMEWord<'a>,
|
||||
params: Parameter<'a>,
|
||||
main: &'a [u8],
|
||||
sub: &'a [u8],
|
||||
params: Vec<Parameter<'a>>,
|
||||
}
|
||||
impl<'a> NaiveType<'a> {
|
||||
pub fn decode(&self) -> Type<'a> {
|
||||
Type::from_naive_type(self)
|
||||
}
|
||||
pub fn to_type(&self) -> Type { self.into() }
|
||||
}
|
||||
pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> {
|
||||
pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
|
||||
map(
|
||||
tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|
||||
|(main, _, sub, params)| Type { main, sub, params },
|
||||
|(main, _, sub, params)| NaiveType { main, sub, params },
|
||||
)(input)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Parameter<'a> {
|
||||
pub struct Parameter<'a> {
|
||||
name: &'a [u8],
|
||||
value: MIMEWord<'a>,
|
||||
}
|
||||
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
|
||||
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input)
|
||||
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input)
|
||||
}
|
||||
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
|
||||
many0(preceded(tag(";"), parameter))(input)
|
||||
}
|
||||
|
||||
// -------- TYPE
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub enum Type<'a> {
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Type {
|
||||
// Composite types
|
||||
Multipart(MultipartDesc<'a>),
|
||||
Message(MessageSubtype<'a>),
|
||||
Multipart(MultipartDesc),
|
||||
Message(MessageSubtype),
|
||||
|
||||
// Discrete types
|
||||
Text(TextDesc<'a>),
|
||||
Text(TextDesc),
|
||||
Binary,
|
||||
}
|
||||
impl<'a> Type<'a> {
|
||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
|
||||
impl Default for Type {
|
||||
fn default() -> Self {
|
||||
Self::Text(TextDesc::default())
|
||||
}
|
||||
}
|
||||
impl<'a> From<&'a NaiveType<'a>> for Type {
|
||||
fn from(nt: &'a NaiveType<'a>) -> Self {
|
||||
match nt.main.to_ascii_lowercase().as_slice() {
|
||||
b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()),
|
||||
b"message" => Self::Message(MessageDesc::from_naive_type(nt)),
|
||||
b"text" => Self::Text(TextDesc::from_naive_type(nt)),
|
||||
b"multipart" => MultipartDesc::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()),
|
||||
b"message" => Self::Message(MessageSubtype::from(nt)),
|
||||
b"text" => Self::Text(TextDesc::from(nt)),
|
||||
_ => Self::Binary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MultipartDesc<'a> {
|
||||
pub subtype: MultipartSubtype<'a>,
|
||||
pub boundary: &'a [u8],
|
||||
pub struct MultipartDesc {
|
||||
pub subtype: MultipartSubtype,
|
||||
pub boundary: String,
|
||||
}
|
||||
impl<'a> MultipartDesc<'a> {
|
||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> {
|
||||
MultipartDesc {
|
||||
subtype: MultipartSubtype::from_naive_type(nt),
|
||||
boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]),
|
||||
}
|
||||
impl<'a> TryFrom<&'a NaiveType<'a>> for MultipartDesc {
|
||||
type Error = ();
|
||||
|
||||
fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
|
||||
nt.params.iter()
|
||||
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
|
||||
.map(|boundary| MultipartDesc {
|
||||
subtype: MultipartSubtype::from(nt),
|
||||
boundary: boundary.value.to_string(),
|
||||
})
|
||||
.ok_or(())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,9 +94,9 @@ pub enum MultipartSubtype {
|
|||
Report,
|
||||
Unknown,
|
||||
}
|
||||
impl<'a> From<&NaiveType<'a>> for MultipartSubtype<'a> {
|
||||
pub fn from(nt: &NaiveType<'a>) -> Self {
|
||||
match nt.sub.as_ascii_lowercase().as_slice() {
|
||||
impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
|
||||
fn from(nt: &NaiveType<'a>) -> Self {
|
||||
match nt.sub.to_ascii_lowercase().as_slice() {
|
||||
b"alternative" => Self::Alternative,
|
||||
b"mixed" => Self::Mixed,
|
||||
b"digest" => Self::Digest,
|
||||
|
@ -113,24 +116,83 @@ pub enum MessageSubtype {
|
|||
}
|
||||
impl<'a> From<&NaiveType<'a>> for MessageSubtype {
|
||||
fn from(nt: &NaiveType<'a>) -> Self {
|
||||
match csub.to_lowercase().as_ref() {
|
||||
"rfc822" => MessageSubtype::RFC822,
|
||||
"partial" => MessageSubtype::Partial,
|
||||
"external" => MessageSubtype::External,
|
||||
match nt.sub.to_ascii_lowercase().as_slice() {
|
||||
b"rfc822" => Self::RFC822,
|
||||
b"partial" => Self::Partial,
|
||||
b"external" => Self::External,
|
||||
_ => Self::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub struct TextDesc<'a> {
|
||||
pub charset: Option<EmailCharset<'a>>,
|
||||
pub subtype: TextSubtype<'a>,
|
||||
pub struct TextDesc {
|
||||
pub subtype: TextSubtype,
|
||||
pub charset: EmailCharset,
|
||||
}
|
||||
impl<'a> From<&NaiveType<'a>> for TextDesc {
|
||||
fn from(nt: &NaiveType<'a>) -> Self {
|
||||
TextDesc {
|
||||
subtype: TextSubtype::from(nt),
|
||||
charset: nt.params.iter()
|
||||
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
|
||||
.map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
|
||||
.unwrap_or(EmailCharset::US_ASCII),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub enum TextSubtype<'a> {
|
||||
pub enum TextSubtype {
|
||||
#[default]
|
||||
Plain,
|
||||
Html,
|
||||
Other(&'a str),
|
||||
Unknown,
|
||||
}
|
||||
impl<'a> From<&NaiveType<'a>> for TextSubtype {
|
||||
fn from(nt: &NaiveType<'a>) -> Self {
|
||||
match nt.sub.to_ascii_lowercase().as_slice() {
|
||||
b"plain" => Self::Plain,
|
||||
b"html" => Self::Html,
|
||||
_ => Self::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::text::quoted::QuotedString;
|
||||
|
||||
#[test]
|
||||
fn test_parameter() {
|
||||
assert_eq!(
|
||||
parameter(b"charset=utf-8"),
|
||||
Ok((&b""[..], Parameter {
|
||||
name: &b"charset"[..],
|
||||
value: MIMEWord::Atom(&b"utf-8"[..]),
|
||||
})),
|
||||
);
|
||||
assert_eq!(
|
||||
parameter(b"charset=\"utf-8\""),
|
||||
Ok((&b""[..], Parameter {
|
||||
name: &b"charset"[..],
|
||||
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_type_plaintext() {
|
||||
let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8").unwrap();
|
||||
assert_eq!(rest, &b""[..]);
|
||||
|
||||
assert_eq!(
|
||||
nt.to_type(),
|
||||
Type::Text(TextDesc {
|
||||
charset: EmailCharset::UTF_8,
|
||||
subtype: TextSubtype::Plain,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList<Field>> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::{DateTime, FixedOffset, TimeZone};
|
||||
use chrono::{FixedOffset, TimeZone};
|
||||
use crate::rfc5322::mailbox::*;
|
||||
use crate::rfc5322::address::*;
|
||||
use crate::text::misc_token::*;
|
||||
|
|
|
@ -27,11 +27,16 @@ pub enum MIMEWord<'a> {
|
|||
Quoted(QuotedString<'a>),
|
||||
Atom(&'a [u8]),
|
||||
}
|
||||
impl Default for MIMEWord<'static> {
|
||||
fn default() -> Self {
|
||||
Self::Atom(&[])
|
||||
}
|
||||
}
|
||||
impl<'a> MIMEWord<'a> {
|
||||
pub fn to_string(&self) -> String {
|
||||
match self {
|
||||
Self::Quoted(v) => v.to_string(),
|
||||
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(),
|
||||
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue