wip type
This commit is contained in:
parent
42a5f928f6
commit
d4b1853513
5 changed files with 177 additions and 183 deletions
|
@ -15,75 +15,6 @@ use crate::fragments::lazy;
|
|||
use crate::fragments::whitespace::cfws;
|
||||
use crate::fragments::quoted::quoted_string;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Type<'a> {
|
||||
// Composite types
|
||||
Multipart(MultipartDesc<'a>),
|
||||
Message(MessageDesc<'a>),
|
||||
|
||||
// Discrete types
|
||||
Text(TextDesc<'a>),
|
||||
Image(&'a str, Vec<Parameter<'a>>),
|
||||
Audio(&'a str, Vec<Parameter<'a>>),
|
||||
Video(&'a str, Vec<Parameter<'a>>),
|
||||
Application(&'a str, Vec<Parameter<'a>>),
|
||||
|
||||
// Unknown
|
||||
Other(&'a str, &'a str, Vec<Parameter<'a>>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MultipartDesc<'a> {
|
||||
pub boundary: String,
|
||||
pub subtype: MultipartSubtype<'a>,
|
||||
pub unknown_parameters: Vec<Parameter<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MultipartSubtype<'a> {
|
||||
Alternative,
|
||||
Mixed,
|
||||
Digest,
|
||||
Parallel,
|
||||
Report,
|
||||
Other(&'a str),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MessageDesc<'a> {
|
||||
pub subtype: MessageSubtype<'a>,
|
||||
pub unknown_parameters: Vec<Parameter<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MessageSubtype<'a> {
|
||||
RFC822,
|
||||
Partial,
|
||||
External,
|
||||
Other(&'a str),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct TextDesc<'a> {
|
||||
pub charset: Option<EmailCharset<'a>>,
|
||||
pub subtype: TextSubtype<'a>,
|
||||
pub unknown_parameters: Vec<Parameter<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum TextSubtype<'a> {
|
||||
Plain,
|
||||
Html,
|
||||
Other(&'a str),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Parameter<'a> {
|
||||
Charset(EmailCharset<'a>),
|
||||
Boundary(String),
|
||||
Other(&'a str, String),
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -124,89 +55,6 @@ impl<'a> From<&'a str> for TextSubtype<'a> {
|
|||
}
|
||||
*/
|
||||
|
||||
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
|
||||
let (rest, (pname, _, pvalue)) = tuple((
|
||||
token,
|
||||
tag("="),
|
||||
alt((quoted_string, into(token)))
|
||||
))(input)?;
|
||||
|
||||
let param = match pname.to_lowercase().as_ref() {
|
||||
"charset" => Parameter::Charset(EmailCharset::from(Cow::Owned(pvalue))),
|
||||
"boundary" => Parameter::Boundary(pvalue),
|
||||
_ => Parameter::Other(pname, pvalue),
|
||||
};
|
||||
|
||||
Ok((rest, param))
|
||||
}
|
||||
|
||||
pub fn content_type(input: &str) -> IResult<&str, Type> {
|
||||
let (rest, (ctype, _, csub, params)) = tuple((
|
||||
token, tag("/"), token,
|
||||
many0(preceded(tag(";"), parameter))
|
||||
))(input)?;
|
||||
|
||||
let parsed = match ctype.to_lowercase().as_ref() {
|
||||
"multipart" => {
|
||||
let (boundary_param, unknown_parameters): (Vec<Parameter>, Vec<Parameter>) = params
|
||||
.into_iter()
|
||||
.partition(|p| matches!(p, Parameter::Boundary(_)));
|
||||
|
||||
// @FIXME: if multiple boundary value is set, only the
|
||||
// first one is picked. We should check that it makes
|
||||
// sense with other implementation.
|
||||
match boundary_param.into_iter().next() {
|
||||
// @FIXME boundary is mandatory. If it is missing,
|
||||
// fallback to text/plain. Must check that this behavior
|
||||
// is standard...
|
||||
None => Type::Text(TextDesc {
|
||||
charset: None,
|
||||
subtype: TextSubtype::Plain,
|
||||
unknown_parameters
|
||||
}),
|
||||
Some(Parameter::Boundary(v)) => Type::Multipart(MultipartDesc {
|
||||
subtype: MultipartSubtype::from(csub),
|
||||
unknown_parameters,
|
||||
boundary: v.into(),
|
||||
}),
|
||||
Some(_) => unreachable!(), // checked above
|
||||
}
|
||||
},
|
||||
|
||||
"message" => {
|
||||
Type::Message(MessageDesc {
|
||||
subtype: MessageSubtype::from(csub),
|
||||
unknown_parameters: params,
|
||||
})
|
||||
},
|
||||
|
||||
"text" => {
|
||||
let (charset_param, unknown_parameters): (Vec<Parameter>, Vec<Parameter>) = params
|
||||
.into_iter()
|
||||
.partition(|p| matches!(p, Parameter::Charset(_)));
|
||||
|
||||
let charset = match charset_param.into_iter().next() {
|
||||
Some(Parameter::Charset(emlchar)) => Some(emlchar),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Type::Text(TextDesc {
|
||||
subtype: TextSubtype::from(csub),
|
||||
charset: charset,
|
||||
unknown_parameters,
|
||||
})
|
||||
},
|
||||
|
||||
"image" => Type::Image(csub, params),
|
||||
"audio" => Type::Audio(csub, params),
|
||||
"video" => Type::Video(csub, params),
|
||||
"application" => Type::Application(csub, params),
|
||||
_ => Type::Other(ctype, csub, params),
|
||||
};
|
||||
|
||||
Ok((rest, parsed))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::text::whitespace::cfws;
|
|||
use crate::text::words::mime_token as token;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum DecodedMechanism<'a> {
|
||||
pub enum Mechanism<'a> {
|
||||
_7Bit,
|
||||
_8Bit,
|
||||
Binary,
|
||||
|
@ -18,23 +18,23 @@ pub enum DecodedMechanism<'a> {
|
|||
Other(&'a [u8]),
|
||||
}
|
||||
|
||||
pub struct Mechanism<'a>(pub &'a [u8]);
|
||||
impl<'a> Mechanism<'a> {
|
||||
pub fn decode(&self) -> DecodedMechanism {
|
||||
use DecodedMechanism::*;
|
||||
match self.0.to_ascii_lowercase().as_slice() {
|
||||
b"7bit" => _7Bit,
|
||||
b"8bit" => _8Bit,
|
||||
b"binary" => Binary,
|
||||
b"quoted-printable" => QuotedPrintable,
|
||||
b"base64" => Base64,
|
||||
_ => Other(self.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism> {
|
||||
map(token, Mechanism)(input)
|
||||
use Mechanism::*;
|
||||
|
||||
alt((
|
||||
delimited(
|
||||
opt(cfws),
|
||||
alt((
|
||||
value(_7Bit, tag_no_case("7bit")),
|
||||
value(_8Bit, tag_no_case("8bit")),
|
||||
value(Binary, tag_no_case("binary")),
|
||||
value(QuotedPrintable, tag_no_case("quoted-printable")),
|
||||
value(Base64, tag_no_case("base64")),
|
||||
)),
|
||||
opt(cfws),
|
||||
),
|
||||
map(token, Other),
|
||||
))(input)
|
||||
}
|
||||
|
||||
|
||||
|
@ -44,28 +44,28 @@ mod tests {
|
|||
#[test]
|
||||
fn test_mechanism() {
|
||||
assert_eq!(
|
||||
mechanism(b"7bit").unwrap().1.decode(),
|
||||
DecodedMechanism::_7Bit,
|
||||
mechanism(b"7bit"),
|
||||
Ok((&b""[..], Mechanism::_7Bit)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
mechanism(b"(youhou) 8bit").unwrap().1.decode(),
|
||||
DecodedMechanism::_8Bit,
|
||||
mechanism(b"(youhou) 8bit"),
|
||||
Ok((&b""[..], Mechanism::_8Bit)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
mechanism(b"(blip) bInArY (blip blip)").unwrap().1.decode(),
|
||||
DecodedMechanism::Binary,
|
||||
mechanism(b"(blip) bInArY (blip blip)"),
|
||||
Ok((&b""[..], Mechanism::Binary)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
mechanism(b" base64 ").unwrap().1.decode(),
|
||||
DecodedMechanism::Base64,
|
||||
mechanism(b" base64 "),
|
||||
Ok((&b""[..], Mechanism::Base64)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
mechanism(b" Quoted-Printable ").unwrap().1.decode(),
|
||||
DecodedMechanism::QuotedPrintable,
|
||||
mechanism(b" Quoted-Printable "),
|
||||
Ok((&b""[..], Mechanism::QuotedPrintable)),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ use nom::{
|
|||
use crate::text::{
|
||||
quoted::{QuotedString, quoted_string},
|
||||
whitespace::{fws, is_obs_no_ws_ctl},
|
||||
words::{atom, is_vchar},
|
||||
words::{atom, mime_atom, is_vchar},
|
||||
encoding::{self, encoded_word},
|
||||
ascii,
|
||||
};
|
||||
|
@ -22,6 +22,26 @@ pub fn phrase_list(input: &[u8]) -> IResult<&[u8], PhraseList> {
|
|||
map(separated_list1(tag(","), phrase), PhraseList)(input)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MIMEWord<'a> {
|
||||
Quoted(QuotedString<'a>),
|
||||
Atom(&'a [u8]),
|
||||
}
|
||||
impl<'a> MIMEWord<'a> {
|
||||
pub fn to_string(&self) -> String {
|
||||
match self {
|
||||
Quoted(v) => v.to_string(),
|
||||
Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord> {
|
||||
alt((
|
||||
map(quoted_string, MIMEWord::Quoted),
|
||||
map(mime_atom, MIMEWord::Atom),
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Word<'a> {
|
||||
Quoted(QuotedString<'a>),
|
||||
|
|
|
@ -16,7 +16,7 @@ pub fn is_vchar(c: u8) -> bool {
|
|||
/// MIME Token allowed characters
|
||||
///
|
||||
/// forbidden: ()<>@,;:\"/[]?=
|
||||
fn is_mime_token_text(c: u8) -> bool {
|
||||
fn is_mime_atom_text(c: u8) -> bool {
|
||||
is_alphanumeric(c)
|
||||
|| c == ascii::EXCLAMATION
|
||||
|| c == ascii::NUM
|
||||
|
@ -40,8 +40,8 @@ fn is_mime_token_text(c: u8) -> bool {
|
|||
/// MIME Token
|
||||
///
|
||||
/// `[CFWS] 1*token_text [CFWS]`
|
||||
pub fn mime_token(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||
delimited(opt(cfws), take_while1(is_mime_token_text), opt(cfws))(input)
|
||||
pub fn mime_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||
delimited(opt(cfws), take_while1(is_mime_atom_text), opt(cfws))(input)
|
||||
}
|
||||
|
||||
/// Atom allowed characters
|
||||
|
|
126
src/type.rs
Normal file
126
src/type.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
use std::borrow::Cow;
|
||||
use encoding_rs::Encoding;
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag,take_while1},
|
||||
character::complete as character,
|
||||
combinator::{into, opt},
|
||||
multi::many0,
|
||||
sequence::{delimited, preceded, tuple},
|
||||
IResult,
|
||||
};
|
||||
|
||||
use crate::text::whitespace::cfws;
|
||||
use crate::text::quoted::quoted_string;
|
||||
use crate::text::misc_token::{MIMEWord, mime_word};
|
||||
use crate::text::words::{mime_atom};
|
||||
|
||||
// --------- NAIVE TYPE
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct NaiveType<'a> {
|
||||
main: MIMEWord<'a>,
|
||||
sub: MIMEWord<'a>,
|
||||
params: Parameter<'a>,
|
||||
}
|
||||
impl<'a> NaiveType<'a> {
|
||||
pub fn decode(&self) -> Type<'a> {
|
||||
Type::from_naive_type(self)
|
||||
}
|
||||
}
|
||||
pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> {
|
||||
map(
|
||||
tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|
||||
|(main, _, sub, params)| Type { main, sub, params },
|
||||
)(input)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Parameter<'a> {
|
||||
name: &'a [u8],
|
||||
value: MIMEWord<'a>,
|
||||
}
|
||||
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
|
||||
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input)
|
||||
}
|
||||
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
|
||||
many0(preceded(tag(";"), parameter))(input)
|
||||
}
|
||||
|
||||
// -------- TYPE
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub enum Type<'a> {
|
||||
// Composite types
|
||||
Multipart(MultipartDesc<'a>),
|
||||
Message(MessageSubtype<'a>),
|
||||
|
||||
// Discrete types
|
||||
Text(TextDesc<'a>),
|
||||
Binary,
|
||||
}
|
||||
impl<'a> Type<'a> {
|
||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
|
||||
match nt.main.to_ascii_lowercase().as_slice() {
|
||||
b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()),
|
||||
b"message" => Self::Message(MessageDesc::from_naive_type(nt)),
|
||||
b"text" => Self::Text(TextDesc::from_naive_type(nt)),
|
||||
_ => Self::Binary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MultipartDesc<'a> {
|
||||
pub subtype: MultipartSubtype<'a>,
|
||||
pub boundary: &'a [u8],
|
||||
}
|
||||
impl<'a> MultipartDesc<'a> {
|
||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> {
|
||||
MultipartDesc {
|
||||
subtype: MultipartSubtype::from_naive_type(nt),
|
||||
boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MultipartSubtype {
|
||||
Alternative,
|
||||
Mixed,
|
||||
Digest,
|
||||
Parallel,
|
||||
Report,
|
||||
Unknown,
|
||||
}
|
||||
impl<'a> MultipartSubtype<'a> {
|
||||
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
|
||||
match nt.sub.as_ascii_lowercase().as_slice() {
|
||||
b"alternative" => Self::Alternative,
|
||||
b"mixed" => Self::Mixed,
|
||||
b"digest" => Self::Digest,
|
||||
b"parallel" => Self::Parallel,
|
||||
b"report" => Self::Report,
|
||||
_ => Self::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MessageSubtype<'a> {
|
||||
RFC822,
|
||||
Partial,
|
||||
External,
|
||||
Other(&'a str),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub struct TextDesc<'a> {
|
||||
pub charset: Option<EmailCharset<'a>>,
|
||||
pub subtype: TextSubtype<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub enum TextSubtype<'a> {
|
||||
Plain,
|
||||
Html,
|
||||
Other(&'a str),
|
||||
}
|
Loading…
Reference in a new issue