implement content-type
This commit is contained in:
parent
8fff581fb4
commit
0d4e472d41
6 changed files with 134 additions and 101 deletions
|
@ -7,8 +7,9 @@ use encoding_rs::Encoding;
|
||||||
/// using encoding_rs datastructures directly would lead to a loss of information.
|
/// using encoding_rs datastructures directly would lead to a loss of information.
|
||||||
/// https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
/// https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub enum EmailCharset<'a> {
|
pub enum EmailCharset {
|
||||||
|
#[default]
|
||||||
US_ASCII,
|
US_ASCII,
|
||||||
ISO_8859_1,
|
ISO_8859_1,
|
||||||
ISO_8859_2,
|
ISO_8859_2,
|
||||||
|
@ -34,10 +35,16 @@ pub enum EmailCharset<'a> {
|
||||||
Big5,
|
Big5,
|
||||||
KOI8_R,
|
KOI8_R,
|
||||||
UTF_8,
|
UTF_8,
|
||||||
Other(&'a [u8]),
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> From<&'a [u8]> for EmailCharset<'a> {
|
impl<'a> From<&'a str> for EmailCharset {
|
||||||
|
fn from(s: &'a str) -> Self {
|
||||||
|
Self::from(s.as_bytes())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a [u8]> for EmailCharset {
|
||||||
fn from(s: &'a [u8]) -> Self {
|
fn from(s: &'a [u8]) -> Self {
|
||||||
match s.to_ascii_lowercase().as_slice() {
|
match s.to_ascii_lowercase().as_slice() {
|
||||||
b"us-ascii" | b"ascii" => EmailCharset::US_ASCII,
|
b"us-ascii" | b"ascii" => EmailCharset::US_ASCII,
|
||||||
|
@ -65,13 +72,13 @@ impl<'a> From<&'a [u8]> for EmailCharset<'a> {
|
||||||
b"big5" => EmailCharset::Big5,
|
b"big5" => EmailCharset::Big5,
|
||||||
b"koi8-r" => EmailCharset::KOI8_R,
|
b"koi8-r" => EmailCharset::KOI8_R,
|
||||||
b"utf-8" | b"utf8" => EmailCharset::UTF_8,
|
b"utf-8" | b"utf8" => EmailCharset::UTF_8,
|
||||||
_ => EmailCharset::Other(s)
|
_ => EmailCharset::Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> EmailCharset<'a> {
|
impl EmailCharset {
|
||||||
pub fn as_str(&self) -> &'static str {
|
pub fn as_str(&self) -> &'static str {
|
||||||
use EmailCharset::*;
|
use EmailCharset::*;
|
||||||
match self {
|
match self {
|
||||||
|
@ -100,7 +107,7 @@ impl<'a> EmailCharset<'a> {
|
||||||
Big5 => "Big5",
|
Big5 => "Big5",
|
||||||
KOI8_R => "KOI8-R",
|
KOI8_R => "KOI8-R",
|
||||||
UTF_8 => "UTF-8",
|
UTF_8 => "UTF-8",
|
||||||
Other(_) => "UTF-8", //@FIXME bad idea...
|
Unknown => "UTF-8",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,49 +60,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::fragments::lazy;
|
use crate::fragments::lazy;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_version() {
|
|
||||||
assert_eq!(version("1.0"), Ok(("", Version { major: 1, minor: 0 })),);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
version(" 1.0 (produced by MetaSend Vx.x)"),
|
|
||||||
Ok(("", Version { major: 1, minor: 0 })),
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
version("(produced by MetaSend Vx.x) 1.0"),
|
|
||||||
Ok(("", Version { major: 1, minor: 0 })),
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
version("1.(produced by MetaSend Vx.x)0"),
|
|
||||||
Ok(("", Version { major: 1, minor: 0 })),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parameter() {
|
|
||||||
assert_eq!(
|
|
||||||
parameter("charset=utf-8"),
|
|
||||||
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
parameter("charset=\"utf-8\""),
|
|
||||||
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_content_type_plaintext() {
|
|
||||||
assert_eq!(
|
|
||||||
Type::try_from(&lazy::Type("text/plain; charset=utf-8")),
|
|
||||||
Ok(Type::Text(TextDesc {
|
|
||||||
charset: Some(EmailCharset::UTF_8),
|
|
||||||
subtype: TextSubtype::Plain,
|
|
||||||
unknown_parameters: vec![],
|
|
||||||
}))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_content_type_multipart() {
|
fn test_content_type_multipart() {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
pub mod charset;
|
pub mod charset;
|
||||||
pub mod mechanism;
|
pub mod mechanism;
|
||||||
|
pub mod r#type;
|
||||||
//pub mod field;
|
//pub mod field;
|
||||||
|
|
162
src/mime/type.rs
162
src/mime/type.rs
|
@ -1,84 +1,87 @@
|
||||||
use std::borrow::Cow;
|
|
||||||
use encoding_rs::Encoding;
|
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
bytes::complete::tag,
|
||||||
bytes::complete::{tag,take_while1},
|
combinator::map,
|
||||||
character::complete as character,
|
|
||||||
combinator::{into, opt},
|
|
||||||
multi::many0,
|
multi::many0,
|
||||||
sequence::{delimited, preceded, tuple},
|
sequence::{preceded, tuple},
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::text::whitespace::cfws;
|
use crate::mime::charset::EmailCharset;
|
||||||
use crate::text::quoted::quoted_string;
|
|
||||||
use crate::text::misc_token::{MIMEWord, mime_word};
|
use crate::text::misc_token::{MIMEWord, mime_word};
|
||||||
use crate::text::words::{mime_atom};
|
use crate::text::words::{mime_atom};
|
||||||
|
|
||||||
// --------- NAIVE TYPE
|
// --------- NAIVE TYPE
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct NaiveType<'a> {
|
pub struct NaiveType<'a> {
|
||||||
main: MIMEWord<'a>,
|
main: &'a [u8],
|
||||||
sub: MIMEWord<'a>,
|
sub: &'a [u8],
|
||||||
params: Parameter<'a>,
|
params: Vec<Parameter<'a>>,
|
||||||
}
|
}
|
||||||
impl<'a> NaiveType<'a> {
|
impl<'a> NaiveType<'a> {
|
||||||
pub fn decode(&self) -> Type<'a> {
|
pub fn to_type(&self) -> Type { self.into() }
|
||||||
Type::from_naive_type(self)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> {
|
pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
|
||||||
map(
|
map(
|
||||||
tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|
tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|
||||||
|(main, _, sub, params)| Type { main, sub, params },
|
|(main, _, sub, params)| NaiveType { main, sub, params },
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Parameter<'a> {
|
pub struct Parameter<'a> {
|
||||||
name: &'a [u8],
|
name: &'a [u8],
|
||||||
value: MIMEWord<'a>,
|
value: MIMEWord<'a>,
|
||||||
}
|
}
|
||||||
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
|
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
|
||||||
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input)
|
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input)
|
||||||
}
|
}
|
||||||
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
|
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
|
||||||
many0(preceded(tag(";"), parameter))(input)
|
many0(preceded(tag(";"), parameter))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------- TYPE
|
// -------- TYPE
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Type<'a> {
|
pub enum Type {
|
||||||
// Composite types
|
// Composite types
|
||||||
Multipart(MultipartDesc<'a>),
|
Multipart(MultipartDesc),
|
||||||
Message(MessageSubtype<'a>),
|
Message(MessageSubtype),
|
||||||
|
|
||||||
// Discrete types
|
// Discrete types
|
||||||
Text(TextDesc<'a>),
|
Text(TextDesc),
|
||||||
Binary,
|
Binary,
|
||||||
}
|
}
|
||||||
impl<'a> Type<'a> {
|
impl Default for Type {
|
||||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
|
fn default() -> Self {
|
||||||
|
Self::Text(TextDesc::default())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> From<&'a NaiveType<'a>> for Type {
|
||||||
|
fn from(nt: &'a NaiveType<'a>) -> Self {
|
||||||
match nt.main.to_ascii_lowercase().as_slice() {
|
match nt.main.to_ascii_lowercase().as_slice() {
|
||||||
b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()),
|
b"multipart" => MultipartDesc::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()),
|
||||||
b"message" => Self::Message(MessageDesc::from_naive_type(nt)),
|
b"message" => Self::Message(MessageSubtype::from(nt)),
|
||||||
b"text" => Self::Text(TextDesc::from_naive_type(nt)),
|
b"text" => Self::Text(TextDesc::from(nt)),
|
||||||
_ => Self::Binary,
|
_ => Self::Binary,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct MultipartDesc<'a> {
|
pub struct MultipartDesc {
|
||||||
pub subtype: MultipartSubtype<'a>,
|
pub subtype: MultipartSubtype,
|
||||||
pub boundary: &'a [u8],
|
pub boundary: String,
|
||||||
}
|
}
|
||||||
impl<'a> MultipartDesc<'a> {
|
impl<'a> TryFrom<&'a NaiveType<'a>> for MultipartDesc {
|
||||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> {
|
type Error = ();
|
||||||
MultipartDesc {
|
|
||||||
subtype: MultipartSubtype::from_naive_type(nt),
|
fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
|
||||||
boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]),
|
nt.params.iter()
|
||||||
}
|
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
|
||||||
|
.map(|boundary| MultipartDesc {
|
||||||
|
subtype: MultipartSubtype::from(nt),
|
||||||
|
boundary: boundary.value.to_string(),
|
||||||
|
})
|
||||||
|
.ok_or(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,9 +94,9 @@ pub enum MultipartSubtype {
|
||||||
Report,
|
Report,
|
||||||
Unknown,
|
Unknown,
|
||||||
}
|
}
|
||||||
impl<'a> From<&NaiveType<'a>> for MultipartSubtype<'a> {
|
impl<'a> From<&NaiveType<'a>> for MultipartSubtype {
|
||||||
pub fn from(nt: &NaiveType<'a>) -> Self {
|
fn from(nt: &NaiveType<'a>) -> Self {
|
||||||
match nt.sub.as_ascii_lowercase().as_slice() {
|
match nt.sub.to_ascii_lowercase().as_slice() {
|
||||||
b"alternative" => Self::Alternative,
|
b"alternative" => Self::Alternative,
|
||||||
b"mixed" => Self::Mixed,
|
b"mixed" => Self::Mixed,
|
||||||
b"digest" => Self::Digest,
|
b"digest" => Self::Digest,
|
||||||
|
@ -113,24 +116,83 @@ pub enum MessageSubtype {
|
||||||
}
|
}
|
||||||
impl<'a> From<&NaiveType<'a>> for MessageSubtype {
|
impl<'a> From<&NaiveType<'a>> for MessageSubtype {
|
||||||
fn from(nt: &NaiveType<'a>) -> Self {
|
fn from(nt: &NaiveType<'a>) -> Self {
|
||||||
match csub.to_lowercase().as_ref() {
|
match nt.sub.to_ascii_lowercase().as_slice() {
|
||||||
"rfc822" => MessageSubtype::RFC822,
|
b"rfc822" => Self::RFC822,
|
||||||
"partial" => MessageSubtype::Partial,
|
b"partial" => Self::Partial,
|
||||||
"external" => MessageSubtype::External,
|
b"external" => Self::External,
|
||||||
_ => Self::Unknown,
|
_ => Self::Unknown,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub struct TextDesc<'a> {
|
pub struct TextDesc {
|
||||||
pub charset: Option<EmailCharset<'a>>,
|
pub subtype: TextSubtype,
|
||||||
pub subtype: TextSubtype<'a>,
|
pub charset: EmailCharset,
|
||||||
|
}
|
||||||
|
impl<'a> From<&NaiveType<'a>> for TextDesc {
|
||||||
|
fn from(nt: &NaiveType<'a>) -> Self {
|
||||||
|
TextDesc {
|
||||||
|
subtype: TextSubtype::from(nt),
|
||||||
|
charset: nt.params.iter()
|
||||||
|
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
|
||||||
|
.map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
|
||||||
|
.unwrap_or(EmailCharset::US_ASCII),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub enum TextSubtype<'a> {
|
pub enum TextSubtype {
|
||||||
|
#[default]
|
||||||
Plain,
|
Plain,
|
||||||
Html,
|
Html,
|
||||||
Other(&'a str),
|
Unknown,
|
||||||
|
}
|
||||||
|
impl<'a> From<&NaiveType<'a>> for TextSubtype {
|
||||||
|
fn from(nt: &NaiveType<'a>) -> Self {
|
||||||
|
match nt.sub.to_ascii_lowercase().as_slice() {
|
||||||
|
b"plain" => Self::Plain,
|
||||||
|
b"html" => Self::Html,
|
||||||
|
_ => Self::Unknown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::text::quoted::QuotedString;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parameter() {
|
||||||
|
assert_eq!(
|
||||||
|
parameter(b"charset=utf-8"),
|
||||||
|
Ok((&b""[..], Parameter {
|
||||||
|
name: &b"charset"[..],
|
||||||
|
value: MIMEWord::Atom(&b"utf-8"[..]),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parameter(b"charset=\"utf-8\""),
|
||||||
|
Ok((&b""[..], Parameter {
|
||||||
|
name: &b"charset"[..],
|
||||||
|
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_content_type_plaintext() {
|
||||||
|
let (rest, nt) = naive_type(b"text/plain;\r\n charset=utf-8").unwrap();
|
||||||
|
assert_eq!(rest, &b""[..]);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
nt.to_type(),
|
||||||
|
Type::Text(TextDesc {
|
||||||
|
charset: EmailCharset::UTF_8,
|
||||||
|
subtype: TextSubtype::Plain,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,7 @@ pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList<Field>> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use chrono::{DateTime, FixedOffset, TimeZone};
|
use chrono::{FixedOffset, TimeZone};
|
||||||
use crate::rfc5322::mailbox::*;
|
use crate::rfc5322::mailbox::*;
|
||||||
use crate::rfc5322::address::*;
|
use crate::rfc5322::address::*;
|
||||||
use crate::text::misc_token::*;
|
use crate::text::misc_token::*;
|
||||||
|
|
|
@ -27,11 +27,16 @@ pub enum MIMEWord<'a> {
|
||||||
Quoted(QuotedString<'a>),
|
Quoted(QuotedString<'a>),
|
||||||
Atom(&'a [u8]),
|
Atom(&'a [u8]),
|
||||||
}
|
}
|
||||||
|
impl Default for MIMEWord<'static> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Atom(&[])
|
||||||
|
}
|
||||||
|
}
|
||||||
impl<'a> MIMEWord<'a> {
|
impl<'a> MIMEWord<'a> {
|
||||||
pub fn to_string(&self) -> String {
|
pub fn to_string(&self) -> String {
|
||||||
match self {
|
match self {
|
||||||
Self::Quoted(v) => v.to_string(),
|
Self::Quoted(v) => v.to_string(),
|
||||||
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(),
|
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue