content type header
This commit is contained in:
parent
e3b10a994b
commit
cb7b767271
3 changed files with 223 additions and 26 deletions
|
@ -8,6 +8,7 @@
|
||||||
- Maintainability - modifying the code does not create regression and is possible for someone exterior to the project. Keep cyclomatic complexity low.
|
- Maintainability - modifying the code does not create regression and is possible for someone exterior to the project. Keep cyclomatic complexity low.
|
||||||
- Composability - build your own parser by picking the relevant passes, avoid work that is not needed.
|
- Composability - build your own parser by picking the relevant passes, avoid work that is not needed.
|
||||||
- Compatibility - always try to parse something, do not panic or return an error.
|
- Compatibility - always try to parse something, do not panic or return an error.
|
||||||
|
- Exhaustivity - serve as a common project to encode knowledge about emails (existing mime types, existing headers, etc.).
|
||||||
|
|
||||||
## Non goals
|
## Non goals
|
||||||
|
|
||||||
|
|
|
@ -16,4 +16,5 @@ pub enum IMFError<'a> {
|
||||||
PhraseList(nom::Err<nom::error::Error<&'a str>>),
|
PhraseList(nom::Err<nom::error::Error<&'a str>>),
|
||||||
ReceivedLog(nom::Err<nom::error::Error<&'a str>>),
|
ReceivedLog(nom::Err<nom::error::Error<&'a str>>),
|
||||||
Version(nom::Err<nom::error::Error<&'a str>>),
|
Version(nom::Err<nom::error::Error<&'a str>>),
|
||||||
|
ContentType(nom::Err<nom::error::Error<&'a str>>),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,19 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
use encoding_rs::Encoding;
|
use encoding_rs::Encoding;
|
||||||
use nom::{
|
use nom::{
|
||||||
bytes::complete::tag, character::complete as character, combinator::opt, sequence::tuple,
|
branch::alt,
|
||||||
|
bytes::complete::{tag,take_while1},
|
||||||
|
character::complete as character,
|
||||||
|
combinator::{into, opt},
|
||||||
|
multi::many0,
|
||||||
|
sequence::{delimited, preceded, tuple},
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::error::IMFError;
|
use crate::error::IMFError;
|
||||||
use crate::fragments::lazy;
|
use crate::fragments::lazy;
|
||||||
use crate::fragments::whitespace::cfws;
|
use crate::fragments::whitespace::cfws;
|
||||||
|
use crate::fragments::quoted::quoted_string;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Version {
|
pub struct Version {
|
||||||
|
@ -17,42 +24,69 @@ pub struct Version {
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Type<'a> {
|
pub enum Type<'a> {
|
||||||
// Composite types
|
// Composite types
|
||||||
Multipart(MultipartSubtype<'a>),
|
Multipart(MultipartDesc<'a>),
|
||||||
Message(MessageSubtype<'a>),
|
Message(MessageDesc<'a>),
|
||||||
|
|
||||||
// Discrete types
|
// Discrete types
|
||||||
Text(&'a str),
|
Text(TextDesc<'a>),
|
||||||
Image(&'a str),
|
Image(&'a str, Vec<Parameter<'a>>),
|
||||||
Audio(&'a str),
|
Audio(&'a str, Vec<Parameter<'a>>),
|
||||||
Video(&'a str),
|
Video(&'a str, Vec<Parameter<'a>>),
|
||||||
Application(&'a str),
|
Application(&'a str, Vec<Parameter<'a>>),
|
||||||
|
|
||||||
// Unknown
|
// Unknown
|
||||||
Other(&'a str, &'a str, Vec<Parameter<'a>>),
|
Other(&'a str, &'a str, Vec<Parameter<'a>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct MultipartDesc<'a> {
|
||||||
|
boundary: String,
|
||||||
|
subtype: MultipartSubtype<'a>,
|
||||||
|
unknown_parameters: Vec<Parameter<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum MultipartSubtype<'a> {
|
pub enum MultipartSubtype<'a> {
|
||||||
Alternative(Parameter<'a>),
|
Alternative,
|
||||||
Mixed(Parameter<'a>),
|
Mixed,
|
||||||
Digest(Parameter<'a>),
|
Digest,
|
||||||
Parallel(Parameter<'a>),
|
Parallel,
|
||||||
Other(&'a str, Parameter<'a>),
|
Other(&'a str),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct MessageDesc<'a> {
|
||||||
|
subtype: MessageSubtype<'a>,
|
||||||
|
unknown_parameters: Vec<Parameter<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum MessageSubtype<'a> {
|
pub enum MessageSubtype<'a> {
|
||||||
RFC822(Vec<Parameter<'a>>),
|
RFC822,
|
||||||
Partial(Vec<Parameter<'a>>),
|
Partial,
|
||||||
External(Vec<Parameter<'a>>),
|
External,
|
||||||
Other(&'a str, Vec<Parameter<'a>>),
|
Other(&'a str),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct TextDesc<'a> {
|
||||||
|
charset: Option<EmailCharset<'a>>,
|
||||||
|
subtype: TextSubtype<'a>,
|
||||||
|
unknown_parameters: Vec<Parameter<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum TextSubtype<'a> {
|
||||||
|
Plain,
|
||||||
|
Html,
|
||||||
|
Other(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Parameter<'a> {
|
pub enum Parameter<'a> {
|
||||||
Charset(EmailCharset<'a>),
|
Charset(EmailCharset<'a>),
|
||||||
Boundary(&'a str),
|
Boundary(String),
|
||||||
Other(&'a str, &'a str),
|
Other(&'a str, String),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Specific implementation of charset
|
/// Specific implementation of charset
|
||||||
|
@ -89,7 +123,7 @@ pub enum EmailCharset<'a> {
|
||||||
Big5,
|
Big5,
|
||||||
KOI8_R,
|
KOI8_R,
|
||||||
UTF_8,
|
UTF_8,
|
||||||
Other(&'a str),
|
Other(Cow<'a, str>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
@ -101,11 +135,16 @@ pub enum Mechanism<'a> {
|
||||||
Base64,
|
Base64,
|
||||||
Other(&'a str),
|
Other(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> From<&'a str> for EmailCharset<'a> {
|
impl<'a> From<&'a str> for EmailCharset<'a> {
|
||||||
fn from(s: &'a str) -> Self {
|
fn from(s: &'a str) -> Self {
|
||||||
|
EmailCharset::from(Cow::Borrowed(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<Cow<'a, str>> for EmailCharset<'a> {
|
||||||
|
fn from(s: Cow<'a, str>) -> Self {
|
||||||
match s.to_lowercase().as_ref() {
|
match s.to_lowercase().as_ref() {
|
||||||
"us-ascii" => EmailCharset::US_ASCII,
|
"us-ascii" | "ascii" => EmailCharset::US_ASCII,
|
||||||
"iso-8859-1" => EmailCharset::ISO_8859_1,
|
"iso-8859-1" => EmailCharset::ISO_8859_1,
|
||||||
"iso-8859-2" => EmailCharset::ISO_8859_2,
|
"iso-8859-2" => EmailCharset::ISO_8859_2,
|
||||||
"iso-8859-3" => EmailCharset::ISO_8859_3,
|
"iso-8859-3" => EmailCharset::ISO_8859_3,
|
||||||
|
@ -129,14 +168,14 @@ impl<'a> From<&'a str> for EmailCharset<'a> {
|
||||||
"gb2312" => EmailCharset::GB2312,
|
"gb2312" => EmailCharset::GB2312,
|
||||||
"big5" => EmailCharset::Big5,
|
"big5" => EmailCharset::Big5,
|
||||||
"koi8-r" => EmailCharset::KOI8_R,
|
"koi8-r" => EmailCharset::KOI8_R,
|
||||||
"utf-8" => EmailCharset::UTF_8,
|
"utf-8" | "utf8" => EmailCharset::UTF_8,
|
||||||
_ => EmailCharset::Other(s)
|
_ => EmailCharset::Other(s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> EmailCharset<'a> {
|
impl<'a> EmailCharset<'a> {
|
||||||
pub fn as_str(&self) -> &'a str {
|
pub fn as_str(&'a self) -> &'a str {
|
||||||
use EmailCharset::*;
|
use EmailCharset::*;
|
||||||
match self {
|
match self {
|
||||||
US_ASCII => "US-ASCII",
|
US_ASCII => "US-ASCII",
|
||||||
|
@ -164,7 +203,7 @@ impl<'a> EmailCharset<'a> {
|
||||||
Big5 => "Big5",
|
Big5 => "Big5",
|
||||||
KOI8_R => "KOI8-R",
|
KOI8_R => "KOI8-R",
|
||||||
UTF_8 => "UTF-8",
|
UTF_8 => "UTF-8",
|
||||||
Other(raw) => raw,
|
Other(raw) => raw.as_ref(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,7 +227,9 @@ impl<'a> TryFrom<&'a lazy::Type<'a>> for Type<'a> {
|
||||||
type Error = IMFError<'a>;
|
type Error = IMFError<'a>;
|
||||||
|
|
||||||
fn try_from(tp: &'a lazy::Type<'a>) -> Result<Self, Self::Error> {
|
fn try_from(tp: &'a lazy::Type<'a>) -> Result<Self, Self::Error> {
|
||||||
Ok(Type::Other("", "", vec![]))
|
content_type(tp.0)
|
||||||
|
.map(|(_, v)| v)
|
||||||
|
.map_err(|e| IMFError::ContentType(e))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,6 +241,40 @@ impl<'a> TryFrom<&'a lazy::Mechanism<'a>> for Mechanism<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a str> for MultipartSubtype<'a> {
|
||||||
|
fn from(csub: &'a str) -> Self {
|
||||||
|
match csub.to_lowercase().as_ref() {
|
||||||
|
"alternative" => MultipartSubtype::Alternative,
|
||||||
|
"mixed" => MultipartSubtype::Mixed,
|
||||||
|
"digest" => MultipartSubtype::Digest,
|
||||||
|
"parallel" => MultipartSubtype::Parallel,
|
||||||
|
_ => MultipartSubtype::Other(csub),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a str> for MessageSubtype<'a> {
|
||||||
|
fn from(csub: &'a str) -> Self {
|
||||||
|
match csub.to_lowercase().as_ref() {
|
||||||
|
"rfc822" => MessageSubtype::RFC822,
|
||||||
|
"partial" => MessageSubtype::Partial,
|
||||||
|
"external" => MessageSubtype::External,
|
||||||
|
_ => MessageSubtype::Other(csub),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a str> for TextSubtype<'a> {
|
||||||
|
fn from(csub: &'a str) -> Self {
|
||||||
|
match csub.to_lowercase().as_ref() {
|
||||||
|
"html" => TextSubtype::Html,
|
||||||
|
"plain" => TextSubtype::Plain,
|
||||||
|
_ => TextSubtype::Other(csub),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
pub fn version(input: &str) -> IResult<&str, Version> {
|
pub fn version(input: &str) -> IResult<&str, Version> {
|
||||||
let (rest, (_, major, _, _, _, minor, _)) = tuple((
|
let (rest, (_, major, _, _, _, minor, _)) = tuple((
|
||||||
opt(cfws),
|
opt(cfws),
|
||||||
|
@ -213,9 +288,105 @@ pub fn version(input: &str) -> IResult<&str, Version> {
|
||||||
Ok((rest, Version { major, minor }))
|
Ok((rest, Version { major, minor }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Token allowed characters
|
||||||
|
fn is_token_text(c: char) -> bool {
|
||||||
|
c.is_ascii() && !c.is_ascii_control() && !c.is_ascii_whitespace() && !"()<>@,;:\\\"/[]?=".contains(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Token
|
||||||
|
///
|
||||||
|
/// `[CFWS] 1*token_text [CFWS]`
|
||||||
|
pub fn token(input: &str) -> IResult<&str, &str> {
|
||||||
|
delimited(opt(cfws), take_while1(is_token_text), opt(cfws))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
|
||||||
|
let (rest, (pname, _, pvalue)) = tuple((
|
||||||
|
token,
|
||||||
|
tag("="),
|
||||||
|
alt((quoted_string, into(token)))
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
let param = match pname.to_lowercase().as_ref() {
|
||||||
|
"charset" => Parameter::Charset(EmailCharset::from(Cow::Owned(pvalue))),
|
||||||
|
"boundary" => Parameter::Boundary(pvalue),
|
||||||
|
_ => Parameter::Other(pname, pvalue),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((rest, param))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn content_type(input: &str) -> IResult<&str, Type> {
|
||||||
|
let (rest, (ctype, _, csub, params)) = tuple((
|
||||||
|
token, tag("/"), token,
|
||||||
|
many0(preceded(tag(";"), parameter))
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
let parsed = match ctype.to_lowercase().as_ref() {
|
||||||
|
"multipart" => {
|
||||||
|
let (boundary_param, unknown_parameters): (Vec<Parameter>, Vec<Parameter>) = params
|
||||||
|
.into_iter()
|
||||||
|
.partition(|p| matches!(p, Parameter::Boundary(_)));
|
||||||
|
|
||||||
|
// @FIXME: if multiple boundary value is set, only the
|
||||||
|
// first one is picked. We should check that it makes
|
||||||
|
// sense with other implementation.
|
||||||
|
match boundary_param.into_iter().next() {
|
||||||
|
// @FIXME boundary is mandatory. If it is missing,
|
||||||
|
// fallback to text/plain. Must check that this behavior
|
||||||
|
// is standard...
|
||||||
|
None => Type::Text(TextDesc {
|
||||||
|
charset: None,
|
||||||
|
subtype: TextSubtype::Plain,
|
||||||
|
unknown_parameters
|
||||||
|
}),
|
||||||
|
Some(Parameter::Boundary(v)) => Type::Multipart(MultipartDesc {
|
||||||
|
subtype: MultipartSubtype::from(csub),
|
||||||
|
unknown_parameters,
|
||||||
|
boundary: v.into(),
|
||||||
|
}),
|
||||||
|
Some(_) => unreachable!(), // checked above
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"message" => {
|
||||||
|
Type::Message(MessageDesc {
|
||||||
|
subtype: MessageSubtype::from(csub),
|
||||||
|
unknown_parameters: params,
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
|
"text" => {
|
||||||
|
let (charset_param, unknown_parameters): (Vec<Parameter>, Vec<Parameter>) = params
|
||||||
|
.into_iter()
|
||||||
|
.partition(|p| matches!(p, Parameter::Charset(_)));
|
||||||
|
|
||||||
|
let charset = match charset_param.into_iter().next() {
|
||||||
|
Some(Parameter::Charset(emlchar)) => Some(emlchar),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Type::Text(TextDesc {
|
||||||
|
subtype: TextSubtype::from(csub),
|
||||||
|
charset: charset,
|
||||||
|
unknown_parameters,
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
|
"image" => Type::Image(csub, params),
|
||||||
|
"audio" => Type::Audio(csub, params),
|
||||||
|
"video" => Type::Video(csub, params),
|
||||||
|
"application" => Type::Application(csub, params),
|
||||||
|
_ => Type::Other(ctype, csub, params),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((rest, parsed))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::fragments::lazy;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_version() {
|
fn test_version() {
|
||||||
|
@ -265,6 +436,30 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parameter() {
|
||||||
|
assert_eq!(
|
||||||
|
parameter("charset=utf-8"),
|
||||||
|
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parameter("charset=\"utf-8\""),
|
||||||
|
Ok(("", Parameter::Charset(EmailCharset::UTF_8))),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_content_type_plaintext() {
|
||||||
|
assert_eq!(
|
||||||
|
Type::try_from(&lazy::Type("text/plain; charset=utf-8")),
|
||||||
|
Ok(Type::Text(TextDesc {
|
||||||
|
charset: Some(EmailCharset::UTF_8),
|
||||||
|
subtype: TextSubtype::Plain,
|
||||||
|
unknown_parameters: vec![],
|
||||||
|
}))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/* #[test]
|
/* #[test]
|
||||||
fn test_parameter() {
|
fn test_parameter() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Reference in a new issue