This commit is contained in:
Quentin 2023-07-21 18:31:56 +02:00
parent 42a5f928f6
commit d4b1853513
Signed by: quentin
GPG key ID: E9602264D639FF68
5 changed files with 177 additions and 183 deletions

View file

@ -15,75 +15,6 @@ use crate::fragments::lazy;
use crate::fragments::whitespace::cfws;
use crate::fragments::quoted::quoted_string;
#[derive(Debug, PartialEq)]
pub enum Type<'a> {
// Composite types
Multipart(MultipartDesc<'a>),
Message(MessageDesc<'a>),
// Discrete types
Text(TextDesc<'a>),
Image(&'a str, Vec<Parameter<'a>>),
Audio(&'a str, Vec<Parameter<'a>>),
Video(&'a str, Vec<Parameter<'a>>),
Application(&'a str, Vec<Parameter<'a>>),
// Unknown
Other(&'a str, &'a str, Vec<Parameter<'a>>),
}
#[derive(Debug, PartialEq)]
pub struct MultipartDesc<'a> {
pub boundary: String,
pub subtype: MultipartSubtype<'a>,
pub unknown_parameters: Vec<Parameter<'a>>,
}
#[derive(Debug, PartialEq)]
pub enum MultipartSubtype<'a> {
Alternative,
Mixed,
Digest,
Parallel,
Report,
Other(&'a str),
}
#[derive(Debug, PartialEq)]
pub struct MessageDesc<'a> {
pub subtype: MessageSubtype<'a>,
pub unknown_parameters: Vec<Parameter<'a>>,
}
#[derive(Debug, PartialEq)]
pub enum MessageSubtype<'a> {
RFC822,
Partial,
External,
Other(&'a str),
}
#[derive(Debug, PartialEq)]
pub struct TextDesc<'a> {
pub charset: Option<EmailCharset<'a>>,
pub subtype: TextSubtype<'a>,
pub unknown_parameters: Vec<Parameter<'a>>,
}
#[derive(Debug, PartialEq)]
pub enum TextSubtype<'a> {
Plain,
Html,
Other(&'a str),
}
#[derive(Debug, PartialEq)]
pub enum Parameter<'a> {
Charset(EmailCharset<'a>),
Boundary(String),
Other(&'a str, String),
}
@ -124,89 +55,6 @@ impl<'a> From<&'a str> for TextSubtype<'a> {
}
*/
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
let (rest, (pname, _, pvalue)) = tuple((
token,
tag("="),
alt((quoted_string, into(token)))
))(input)?;
let param = match pname.to_lowercase().as_ref() {
"charset" => Parameter::Charset(EmailCharset::from(Cow::Owned(pvalue))),
"boundary" => Parameter::Boundary(pvalue),
_ => Parameter::Other(pname, pvalue),
};
Ok((rest, param))
}
pub fn content_type(input: &str) -> IResult<&str, Type> {
let (rest, (ctype, _, csub, params)) = tuple((
token, tag("/"), token,
many0(preceded(tag(";"), parameter))
))(input)?;
let parsed = match ctype.to_lowercase().as_ref() {
"multipart" => {
let (boundary_param, unknown_parameters): (Vec<Parameter>, Vec<Parameter>) = params
.into_iter()
.partition(|p| matches!(p, Parameter::Boundary(_)));
// @FIXME: if multiple boundary value is set, only the
// first one is picked. We should check that it makes
// sense with other implementation.
match boundary_param.into_iter().next() {
// @FIXME boundary is mandatory. If it is missing,
// fallback to text/plain. Must check that this behavior
// is standard...
None => Type::Text(TextDesc {
charset: None,
subtype: TextSubtype::Plain,
unknown_parameters
}),
Some(Parameter::Boundary(v)) => Type::Multipart(MultipartDesc {
subtype: MultipartSubtype::from(csub),
unknown_parameters,
boundary: v.into(),
}),
Some(_) => unreachable!(), // checked above
}
},
"message" => {
Type::Message(MessageDesc {
subtype: MessageSubtype::from(csub),
unknown_parameters: params,
})
},
"text" => {
let (charset_param, unknown_parameters): (Vec<Parameter>, Vec<Parameter>) = params
.into_iter()
.partition(|p| matches!(p, Parameter::Charset(_)));
let charset = match charset_param.into_iter().next() {
Some(Parameter::Charset(emlchar)) => Some(emlchar),
_ => None,
};
Type::Text(TextDesc {
subtype: TextSubtype::from(csub),
charset: charset,
unknown_parameters,
})
},
"image" => Type::Image(csub, params),
"audio" => Type::Audio(csub, params),
"video" => Type::Video(csub, params),
"application" => Type::Application(csub, params),
_ => Type::Other(ctype, csub, params),
};
Ok((rest, parsed))
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -9,7 +9,7 @@ use crate::text::whitespace::cfws;
use crate::text::words::mime_token as token;
#[derive(Debug, Clone, PartialEq)]
pub enum DecodedMechanism<'a> {
pub enum Mechanism<'a> {
_7Bit,
_8Bit,
Binary,
@ -18,23 +18,23 @@ pub enum DecodedMechanism<'a> {
Other(&'a [u8]),
}
pub struct Mechanism<'a>(pub &'a [u8]);
impl<'a> Mechanism<'a> {
pub fn decode(&self) -> DecodedMechanism {
use DecodedMechanism::*;
match self.0.to_ascii_lowercase().as_slice() {
b"7bit" => _7Bit,
b"8bit" => _8Bit,
b"binary" => Binary,
b"quoted-printable" => QuotedPrintable,
b"base64" => Base64,
_ => Other(self.0),
}
}
}
pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism> {
map(token, Mechanism)(input)
use Mechanism::*;
alt((
delimited(
opt(cfws),
alt((
value(_7Bit, tag_no_case("7bit")),
value(_8Bit, tag_no_case("8bit")),
value(Binary, tag_no_case("binary")),
value(QuotedPrintable, tag_no_case("quoted-printable")),
value(Base64, tag_no_case("base64")),
)),
opt(cfws),
),
map(token, Other),
))(input)
}
@ -44,28 +44,28 @@ mod tests {
#[test]
fn test_mechanism() {
assert_eq!(
mechanism(b"7bit").unwrap().1.decode(),
DecodedMechanism::_7Bit,
mechanism(b"7bit"),
Ok((&b""[..], Mechanism::_7Bit)),
);
assert_eq!(
mechanism(b"(youhou) 8bit").unwrap().1.decode(),
DecodedMechanism::_8Bit,
mechanism(b"(youhou) 8bit"),
Ok((&b""[..], Mechanism::_8Bit)),
);
assert_eq!(
mechanism(b"(blip) bInArY (blip blip)").unwrap().1.decode(),
DecodedMechanism::Binary,
mechanism(b"(blip) bInArY (blip blip)"),
Ok((&b""[..], Mechanism::Binary)),
);
assert_eq!(
mechanism(b" base64 ").unwrap().1.decode(),
DecodedMechanism::Base64,
mechanism(b" base64 "),
Ok((&b""[..], Mechanism::Base64)),
);
assert_eq!(
mechanism(b" Quoted-Printable ").unwrap().1.decode(),
DecodedMechanism::QuotedPrintable,
mechanism(b" Quoted-Printable "),
Ok((&b""[..], Mechanism::QuotedPrintable)),
);
}
}

View file

@ -11,7 +11,7 @@ use nom::{
use crate::text::{
quoted::{QuotedString, quoted_string},
whitespace::{fws, is_obs_no_ws_ctl},
words::{atom, is_vchar},
words::{atom, mime_atom, is_vchar},
encoding::{self, encoded_word},
ascii,
};
@ -22,6 +22,26 @@ pub fn phrase_list(input: &[u8]) -> IResult<&[u8], PhraseList> {
map(separated_list1(tag(","), phrase), PhraseList)(input)
}
#[derive(Debug, PartialEq)]
pub enum MIMEWord<'a> {
Quoted(QuotedString<'a>),
Atom(&'a [u8]),
}
impl<'a> MIMEWord<'a> {
pub fn to_string(&self) -> String {
match self {
Quoted(v) => v.to_string(),
Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).1.to_string(),
}
}
}
pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord> {
alt((
map(quoted_string, MIMEWord::Quoted),
map(mime_atom, MIMEWord::Atom),
))(input)
}
#[derive(Debug, PartialEq)]
pub enum Word<'a> {
Quoted(QuotedString<'a>),

View file

@ -16,7 +16,7 @@ pub fn is_vchar(c: u8) -> bool {
/// MIME Token allowed characters
///
/// forbidden: ()<>@,;:\"/[]?=
fn is_mime_token_text(c: u8) -> bool {
fn is_mime_atom_text(c: u8) -> bool {
is_alphanumeric(c)
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
@ -40,8 +40,8 @@ fn is_mime_token_text(c: u8) -> bool {
/// MIME Token
///
/// `[CFWS] 1*token_text [CFWS]`
pub fn mime_token(input: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(opt(cfws), take_while1(is_mime_token_text), opt(cfws))(input)
pub fn mime_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(opt(cfws), take_while1(is_mime_atom_text), opt(cfws))(input)
}
/// Atom allowed characters

126
src/type.rs Normal file
View file

@ -0,0 +1,126 @@
use std::borrow::Cow;
use encoding_rs::Encoding;
use nom::{
branch::alt,
bytes::complete::{tag,take_while1},
character::complete as character,
combinator::{into, opt},
multi::many0,
sequence::{delimited, preceded, tuple},
IResult,
};
use crate::text::whitespace::cfws;
use crate::text::quoted::quoted_string;
use crate::text::misc_token::{MIMEWord, mime_word};
use crate::text::words::{mime_atom};
// --------- NAIVE TYPE
#[derive(Debug, PartialEq)]
pub struct NaiveType<'a> {
main: MIMEWord<'a>,
sub: MIMEWord<'a>,
params: Parameter<'a>,
}
impl<'a> NaiveType<'a> {
pub fn decode(&self) -> Type<'a> {
Type::from_naive_type(self)
}
}
pub fn naive_type(input: &[u8]) -> IResult<&[u8], Type> {
map(
tuple((mime_atom, tag("/"), mime_atom, parameter_list)),
|(main, _, sub, params)| Type { main, sub, params },
)(input)
}
#[derive(Debug, PartialEq)]
pub enum Parameter<'a> {
name: &'a [u8],
value: MIMEWord<'a>,
}
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, value)| Parameter { name, value })(input)
}
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
many0(preceded(tag(";"), parameter))(input)
}
// -------- TYPE
#[derive(Debug, PartialEq, Default)]
pub enum Type<'a> {
// Composite types
Multipart(MultipartDesc<'a>),
Message(MessageSubtype<'a>),
// Discrete types
Text(TextDesc<'a>),
Binary,
}
impl<'a> Type<'a> {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => MultipartDesc::from_naive_type(nt).map(Self::Multipart).unwrap_or(Self::default()),
b"message" => Self::Message(MessageDesc::from_naive_type(nt)),
b"text" => Self::Text(TextDesc::from_naive_type(nt)),
_ => Self::Binary,
}
}
}
#[derive(Debug, PartialEq)]
pub struct MultipartDesc<'a> {
pub subtype: MultipartSubtype<'a>,
pub boundary: &'a [u8],
}
impl<'a> MultipartDesc<'a> {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Option<Self> {
MultipartDesc {
subtype: MultipartSubtype::from_naive_type(nt),
boundary: nt.iter().find(|x| x.name.as_ascii_lowercase().as_slice() == b"boundary").unwrap_or(&[]),
}
}
}
#[derive(Debug, PartialEq)]
pub enum MultipartSubtype {
Alternative,
Mixed,
Digest,
Parallel,
Report,
Unknown,
}
impl<'a> MultipartSubtype<'a> {
pub fn from_naive_type(nt: &NaiveType<'a>) -> Self {
match nt.sub.as_ascii_lowercase().as_slice() {
b"alternative" => Self::Alternative,
b"mixed" => Self::Mixed,
b"digest" => Self::Digest,
b"parallel" => Self::Parallel,
b"report" => Self::Report,
_ => Self::Unknown,
}
}
}
#[derive(Debug, PartialEq)]
pub enum MessageSubtype<'a> {
RFC822,
Partial,
External,
Other(&'a str),
}
#[derive(Debug, PartialEq, Default)]
pub struct TextDesc<'a> {
pub charset: Option<EmailCharset<'a>>,
pub subtype: TextSubtype<'a>,
}
#[derive(Debug, PartialEq, Default)]
pub enum TextSubtype<'a> {
Plain,
Html,
Other(&'a str),
}