format code

This commit is contained in:
Quentin 2023-07-23 16:37:47 +02:00
parent 6e2b29ec57
commit c97595c128
Signed by: quentin
GPG key ID: E9602264D639FF68
26 changed files with 610 additions and 439 deletions

View file

@ -1,14 +1,14 @@
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::{foldable_line, obs_crlf};
use nom::{
IResult,
branch::alt,
bytes::complete::{tag_no_case, tag, take_while1},
bytes::complete::{tag, tag_no_case, take_while1},
character::complete::space0,
combinator::{map},
combinator::map,
multi::many0,
sequence::{pair, terminated, tuple},
IResult,
};
use crate::text::whitespace::{foldable_line, obs_crlf};
use crate::text::misc_token::{Unstructured, unstructured};
#[derive(Debug, PartialEq)]
pub enum CompField<'a, T> {
@ -21,21 +21,33 @@ pub enum CompField<'a, T> {
pub struct CompFieldList<'a, T>(pub Vec<CompField<'a, T>>);
impl<'a, T> CompFieldList<'a, T> {
pub fn known(self) -> Vec<T> {
self.0.into_iter().map(|v| match v {
CompField::Known(f) => Some(f),
_ => None,
}).flatten().collect()
self.0
.into_iter()
.map(|v| match v {
CompField::Known(f) => Some(f),
_ => None,
})
.flatten()
.collect()
}
}
pub fn header<'a, T>(fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy)
-> impl Fn(&'a [u8]) -> IResult<&'a [u8], CompFieldList<T>>
{
move |input| map(terminated(many0(alt((
map(fx, CompField::Known),
map(opt_field, |(k,v)| CompField::Unknown(k,v)),
map(foldable_line, CompField::Bad),
))), obs_crlf), CompFieldList)(input)
pub fn header<'a, T>(
fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], CompFieldList<T>> {
move |input| {
map(
terminated(
many0(alt((
map(fx, CompField::Known),
map(opt_field, |(k, v)| CompField::Unknown(k, v)),
map(foldable_line, CompField::Bad),
))),
obs_crlf,
),
CompFieldList,
)(input)
}
}
/*
@ -53,12 +65,7 @@ pub fn header_in_boundaries<'a, T>(bound: &'a [u8], fx: impl Fn(&'a [u8]) -> IRe
*/
pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input| {
terminated(
tag_no_case(name),
tuple((space0, tag(b":"), space0)),
)(input)
}
move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input)
}
/// Optional field
@ -78,7 +85,7 @@ pub fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> {
tuple((space0, tag(b":"), space0)),
),
unstructured,
), obs_crlf)(input)
),
obs_crlf,
)(input)
}

View file

@ -1,9 +1,9 @@
mod error;
mod text;
mod header;
mod rfc5322;
mod mime;
mod part;
mod rfc5322;
mod text;
pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> {
part::part::message(mime::mime::Message::default())(input)

View file

@ -74,7 +74,6 @@ impl<'a> From<&'a [u8]> for EmailCharset {
b"utf-8" | b"utf8" => EmailCharset::UTF_8,
_ => EmailCharset::Unknown,
}
}
}
@ -112,21 +111,16 @@ impl EmailCharset {
}
pub fn as_encoding(&self) -> &'static Encoding {
Encoding::for_label(self.as_str().as_bytes())
.unwrap_or(encoding_rs::WINDOWS_1252)
Encoding::for_label(self.as_str().as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_charset() {
assert_eq!(
EmailCharset::from(&b"Us-Ascii"[..]).as_str(),
"US-ASCII",
);
assert_eq!(EmailCharset::from(&b"Us-Ascii"[..]).as_str(), "US-ASCII",);
assert_eq!(
EmailCharset::from(&b"Us-Ascii"[..]).as_encoding(),

View file

@ -1,17 +1,17 @@
use nom::{
IResult,
branch::alt,
combinator::map,
sequence::{preceded, terminated},
IResult,
};
use crate::text::whitespace::obs_crlf;
use crate::text::misc_token::{Unstructured, unstructured};
use crate::rfc5322::identification::{MessageID, msg_id};
use crate::header::{field_name, CompFieldList};
use crate::mime::r#type::{NaiveType, naive_type};
use crate::mime::mechanism::{Mechanism, mechanism};
use crate::mime::mechanism::{mechanism, Mechanism};
use crate::mime::mime::AnyMIME;
use crate::mime::r#type::{naive_type, NaiveType};
use crate::rfc5322::identification::{msg_id, MessageID};
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)]
pub enum Content<'a> {
@ -22,16 +22,28 @@ pub enum Content<'a> {
}
impl<'a> Content<'a> {
pub fn ctype(&'a self) -> Option<&'a NaiveType<'a>> {
match self { Content::Type(v) => Some(v), _ => None }
match self {
Content::Type(v) => Some(v),
_ => None,
}
}
pub fn transfer_encoding(&'a self) -> Option<&'a Mechanism<'a>> {
match self { Content::TransferEncoding(v) => Some(v), _ => None }
match self {
Content::TransferEncoding(v) => Some(v),
_ => None,
}
}
pub fn id(&'a self) -> Option<&'a MessageID<'a>> {
match self { Content::ID(v) => Some(v), _ => None }
match self {
Content::ID(v) => Some(v),
_ => None,
}
}
pub fn description(&'a self) -> Option<&'a Unstructured<'a>> {
match self { Content::Description(v) => Some(v), _ => None }
match self {
Content::Description(v) => Some(v),
_ => None,
}
}
}
@ -42,26 +54,36 @@ impl<'a> CompFieldList<'a, Content<'a>> {
}
pub fn content(input: &[u8]) -> IResult<&[u8], Content> {
terminated(alt((
preceded(field_name(b"content-type"), map(naive_type, Content::Type)),
preceded(field_name(b"content-transfer-encoding"), map(mechanism, Content::TransferEncoding)),
preceded(field_name(b"content-id"), map(msg_id, Content::ID)),
preceded(field_name(b"content-description"), map(unstructured, Content::Description)),
)), obs_crlf)(input)
terminated(
alt((
preceded(field_name(b"content-type"), map(naive_type, Content::Type)),
preceded(
field_name(b"content-transfer-encoding"),
map(mechanism, Content::TransferEncoding),
),
preceded(field_name(b"content-id"), map(msg_id, Content::ID)),
preceded(
field_name(b"content-description"),
map(unstructured, Content::Description),
),
)),
obs_crlf,
)(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::mime::r#type::*;
use crate::header::{header, CompFieldList};
use crate::mime::charset::EmailCharset;
use crate::mime::r#type::*;
use crate::text::misc_token::MIMEWord;
use crate::text::quoted::QuotedString;
use crate::header::{header, CompFieldList};
#[test]
fn test_content_type() {
let (rest, content) = content(b"Content-Type: text/plain; charset=UTF-8; format=flowed\r\n").unwrap();
let (rest, content) =
content(b"Content-Type: text/plain; charset=UTF-8; format=flowed\r\n").unwrap();
assert_eq!(&b""[..], rest);
if let Content::Type(nt) = content {
@ -91,7 +113,8 @@ Content-Transfer-Encoding: 7bit
This is a multipart message.
"#.as_bytes();
"#
.as_bytes();
assert_eq!(
map(header(content), CompFieldList::known)(fullmail),
@ -101,12 +124,12 @@ This is a multipart message.
Content::Type(NaiveType {
main: &b"multipart"[..],
sub: &b"alternative"[..],
params: vec![
Parameter {
name: &b"boundary"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"b1_e376dc71bafc953c0b0fdeb9983a9956"[..]])),
}
]
params: vec![Parameter {
name: &b"boundary"[..],
value: MIMEWord::Quoted(QuotedString(vec![
&b"b1_e376dc71bafc953c0b0fdeb9983a9956"[..]
])),
}]
}),
Content::TransferEncoding(Mechanism::_7Bit),
],

View file

@ -1,12 +1,12 @@
use crate::text::whitespace::cfws;
use crate::text::words::mime_atom as token;
use nom::{
IResult,
branch::alt,
bytes::complete::tag_no_case,
combinator::{map, opt, value},
sequence::delimited,
IResult,
};
use crate::text::whitespace::cfws;
use crate::text::words::mime_atom as token;
#[derive(Debug, Clone, PartialEq, Default)]
pub enum Mechanism<'a> {
@ -38,16 +38,12 @@ pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism> {
))(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mechanism() {
assert_eq!(
mechanism(b"7bit"),
Ok((&b""[..], Mechanism::_7Bit)),
);
assert_eq!(mechanism(b"7bit"), Ok((&b""[..], Mechanism::_7Bit)),);
assert_eq!(
mechanism(b"(youhou) 8bit"),
@ -59,10 +55,7 @@ mod tests {
Ok((&b""[..], Mechanism::Binary)),
);
assert_eq!(
mechanism(b" base64 "),
Ok((&b""[..], Mechanism::Base64)),
);
assert_eq!(mechanism(b" base64 "), Ok((&b""[..], Mechanism::Base64)),);
assert_eq!(
mechanism(b" Quoted-Printable "),

View file

@ -1,8 +1,8 @@
use crate::mime::mechanism::Mechanism;
use crate::rfc5322::identification::MessageID;
use crate::text::misc_token::Unstructured;
use crate::mime::field::Content;
use crate::mime::r#type::{AnyType, self as ctype}; //Multipart, Message, Text, Binary};
use crate::mime::mechanism::Mechanism;
use crate::mime::r#type::{self as ctype, AnyType};
use crate::rfc5322::identification::MessageID;
use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
#[derive(Debug, PartialEq, Clone)]
pub struct Multipart<'a>(pub ctype::Multipart, pub Generic<'a>);
@ -24,7 +24,6 @@ pub enum AnyMIME<'a> {
Bin(Binary<'a>),
}
impl<'a> AnyMIME<'a> {
pub fn from_pair(at: AnyType, gen: Generic<'a>) -> Self {
match at {
@ -38,7 +37,7 @@ impl<'a> AnyMIME<'a> {
impl<'a> FromIterator<Content<'a>> for AnyMIME<'a> {
fn from_iter<I: IntoIterator<Item = Content<'a>>>(it: I) -> Self {
let (at, gen) = it.into_iter().fold(
let (at, gen) = it.into_iter().fold(
(AnyType::default(), Generic::default()),
|(mut at, mut section), field| {
match field {
@ -48,7 +47,7 @@ impl<'a> FromIterator<Content<'a>> for AnyMIME<'a> {
Content::Description(v) => section.description = Some(v),
};
(at, section)
}
},
);
Self::from_pair(at, gen)
@ -61,4 +60,3 @@ pub struct Generic<'a> {
pub id: Option<MessageID<'a>>,
pub description: Option<Unstructured<'a>>,
}

View file

@ -1,5 +1,5 @@
pub mod charset;
pub mod mechanism;
pub mod r#type;
pub mod field;
pub mod mechanism;
pub mod mime;
pub mod r#type;

View file

@ -6,9 +6,9 @@ use nom::{
IResult,
};
use crate::text::misc_token::{MIMEWord, mime_word};
use crate::text::words::{mime_atom};
use crate::mime::charset::EmailCharset;
use crate::text::misc_token::{mime_word, MIMEWord};
use crate::text::words::mime_atom;
// --------- NAIVE TYPE
#[derive(Debug, PartialEq)]
@ -18,7 +18,9 @@ pub struct NaiveType<'a> {
pub params: Vec<Parameter<'a>>,
}
impl<'a> NaiveType<'a> {
pub fn to_type(&self) -> AnyType { self.into() }
pub fn to_type(&self) -> AnyType {
self.into()
}
}
pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
map(
@ -33,7 +35,10 @@ pub struct Parameter<'a> {
pub value: MIMEWord<'a>,
}
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input)
map(
tuple((mime_atom, tag(b"="), mime_word)),
|(name, _, value)| Parameter { name, value },
)(input)
}
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
many0(preceded(tag(";"), parameter))(input)
@ -60,7 +65,9 @@ impl Default for AnyType {
impl<'a> From<&'a NaiveType<'a>> for AnyType {
fn from(nt: &'a NaiveType<'a>) -> Self {
match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => Multipart::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()),
b"multipart" => Multipart::try_from(nt)
.map(Self::Multipart)
.unwrap_or(Self::default()),
b"message" => Self::Message(Message::from(nt)),
b"text" => Self::Text(Text::from(nt)),
_ => Self::Binary(Binary::default()),
@ -77,7 +84,8 @@ impl<'a> TryFrom<&'a NaiveType<'a>> for Multipart {
type Error = ();
fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
nt.params.iter()
nt.params
.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
.map(|boundary| Multipart {
subtype: MultipartSubtype::from(nt),
@ -137,7 +145,9 @@ impl<'a> From<&NaiveType<'a>> for Text {
fn from(nt: &NaiveType<'a>) -> Self {
Self {
subtype: TextSubtype::from(nt),
charset: nt.params.iter()
charset: nt
.params
.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
.map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
.unwrap_or(EmailCharset::US_ASCII),
@ -175,17 +185,23 @@ mod tests {
fn test_parameter() {
assert_eq!(
parameter(b"charset=utf-8"),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Atom(&b"utf-8"[..]),
})),
Ok((
&b""[..],
Parameter {
name: &b"charset"[..],
value: MIMEWord::Atom(&b"utf-8"[..]),
}
)),
);
assert_eq!(
parameter(b"charset=\"utf-8\""),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
})),
Ok((
&b""[..],
Parameter {
name: &b"charset"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
}
)),
);
}
@ -203,7 +219,6 @@ mod tests {
);
}
#[test]
fn test_content_type_multipart() {
let (rest, nt) = naive_type(b"multipart/mixed;\r\n\tboundary=\"--==_mimepart_64a3f2c69114f_2a13d020975fe\";\r\n\tcharset=UTF-8").unwrap();
@ -222,20 +237,20 @@ mod tests {
let (rest, nt) = naive_type(b"message/rfc822").unwrap();
assert_eq!(rest, &[]);
assert_eq!(
nt.to_type(),
AnyType::Message(Message::RFC822),
);
assert_eq!(nt.to_type(), AnyType::Message(Message::RFC822),);
}
#[test]
fn test_parameter_ascii() {
assert_eq!(
parameter(b"charset = (simple) us-ascii (Plain text)"),
Ok((&b""[..], Parameter {
name: &b"charset"[..],
value: MIMEWord::Atom(&b"us-ascii"[..]),
}))
Ok((
&b""[..],
Parameter {
name: &b"charset"[..],
value: MIMEWord::Atom(&b"us-ascii"[..]),
}
))
);
}
}

View file

@ -1,25 +1,29 @@
use nom::{
IResult,
branch::alt,
bytes::complete::{is_not},
multi::many0,
sequence::{pair},
bytes::complete::is_not,
combinator::{map, not, recognize},
multi::many0,
sequence::pair,
IResult,
};
use crate::mime;
use crate::mime::mime::{AnyMIME};
use crate::rfc5322::{self as imf};
use crate::text::boundary::{Delimiter, boundary};
use crate::text::whitespace::obs_crlf;
use crate::text::ascii::CRLF;
use crate::header::{header, CompFieldList};
use crate::mime;
use crate::mime::mime::AnyMIME;
use crate::rfc5322::{self as imf};
use crate::text::ascii::CRLF;
use crate::text::boundary::{boundary, Delimiter};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)]
pub struct Multipart<'a>(pub mime::mime::Multipart<'a>, pub Vec<AnyPart<'a>>);
#[derive(Debug, PartialEq)]
pub struct Message<'a>(pub mime::mime::Message<'a>, pub imf::message::Message<'a>, pub Box<AnyPart<'a>>);
pub struct Message<'a>(
pub mime::mime::Message<'a>,
pub imf::message::Message<'a>,
pub Box<AnyPart<'a>>,
);
#[derive(Debug, PartialEq)]
pub struct Text<'a>(pub mime::mime::Text<'a>, pub &'a [u8]);
@ -68,9 +72,18 @@ impl<'a> MixedField<'a> {
impl<'a> CompFieldList<'a, MixedField<'a>> {
pub fn sections(self) -> (mime::mime::AnyMIME<'a>, imf::message::Message<'a>) {
let k = self.known();
let (v1, v2): (Vec<MixedField>, Vec<MixedField>) = k.into_iter().partition(|v| v.mime().is_some());
let mime = v1.into_iter().map(|v| v.to_mime()).flatten().collect::<mime::mime::AnyMIME>();
let imf = v2.into_iter().map(|v| v.to_imf()).flatten().collect::<imf::message::Message>();
let (v1, v2): (Vec<MixedField>, Vec<MixedField>) =
k.into_iter().partition(|v| v.mime().is_some());
let mime = v1
.into_iter()
.map(|v| v.to_mime())
.flatten()
.collect::<mime::mime::AnyMIME>();
let imf = v2
.into_iter()
.map(|v| v.to_imf())
.flatten()
.collect::<imf::message::Message>();
(mime, imf)
}
}
@ -81,7 +94,9 @@ pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> {
))(input)
}
pub fn message<'a>(m: mime::mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
pub fn message<'a>(
m: mime::mime::Message<'a>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
move |input: &[u8]| {
let (input, fields) = header(mixed_field)(input)?;
let (in_mime, imf) = fields.sections();
@ -92,7 +107,9 @@ pub fn message<'a>(m: mime::mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&
}
}
pub fn multipart<'a>(m: mime::mime::Multipart<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> {
pub fn multipart<'a>(
m: mime::mime::Multipart<'a>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> {
let m = m.clone();
move |input| {
@ -124,17 +141,16 @@ pub fn multipart<'a>(m: mime::mime::Multipart<'a>) -> impl Fn(&'a [u8]) -> IResu
pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> {
match m {
AnyMIME::Mult(a) => map(multipart(a), AnyPart::Mult)(rpart)
.map(|v| v.1)
.unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))),
.map(|v| v.1)
.unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))),
AnyMIME::Msg(a) => map(message(a), AnyPart::Msg)(rpart)
.map(|v| v.1)
.unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))),
.map(|v| v.1)
.unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))),
AnyMIME::Txt(a) => AnyPart::Txt(Text(a, rpart)),
AnyMIME::Bin(a) => AnyPart::Bin(Binary(a, rpart)),
}
}
pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> + '_ {
move |input| {
recognize(many0(pair(
@ -147,14 +163,15 @@ pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a
#[cfg(test)]
mod tests {
use super::*;
use crate::text::encoding::{Base64Word, EncodedWord, QuotedChunk, QuotedWord};
use crate::text::misc_token::{Phrase, UnstrToken, Unstructured, Word};
use chrono::{FixedOffset, TimeZone};
use crate::text::misc_token::{Word, Phrase, Unstructured, UnstrToken};
use crate::text::encoding::{EncodedWord, QuotedChunk, Base64Word, QuotedWord};
#[test]
fn test_preamble() {
assert_eq!(
part_raw(b"hello")(b"blip
part_raw(b"hello")(
b"blip
bloup
blip
@ -164,7 +181,8 @@ bloup--
--hello
Field: Body
"),
"
),
Ok((
&b"\n--hello\nField: Body\n"[..],
&b"blip\nbloup\n\nblip\nbloup--\n--bim\n--bim--\n"[..],
@ -292,7 +310,8 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
</div>
--b1_e376dc71bafc953c0b0fdeb9983a9956--
"#.as_bytes();
"#
.as_bytes();
let base_mime = mime::mime::Message::default();
assert_eq!(

View file

@ -141,21 +141,22 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
}
pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
map(
opt(alt((address_list, address_list_cfws))),
|v| v.unwrap_or(vec![]),
)(input)
map(opt(alt((address_list, address_list_cfws))), |v| {
v.unwrap_or(vec![])
})(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::text::misc_token::{Phrase, Word};
use crate::rfc5322::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken};
use crate::text::misc_token::{Phrase, Word};
#[test]
fn test_mailbox_list() {
match mailbox_list(r#"Pete(A nice \) chap) <pete(his account)@silly.test(his host)>"#.as_bytes()) {
match mailbox_list(
r#"Pete(A nice \) chap) <pete(his account)@silly.test(his host)>"#.as_bytes(),
) {
Ok((rest, _)) => assert_eq!(&b""[..], rest),
_ => panic!(),
};

View file

@ -145,7 +145,13 @@ fn strict_year(input: &[u8]) -> IResult<&[u8], i32> {
fws,
map(
terminated(take_while_m_n(4, 9, |c| c >= 0x30 && c <= 0x39), digit0),
|d: &[u8]| encoding_rs::UTF_8.decode_without_bom_handling(d).0.parse::<i32>().unwrap_or(0),
|d: &[u8]| {
encoding_rs::UTF_8
.decode_without_bom_handling(d)
.0
.parse::<i32>()
.unwrap_or(0)
},
),
fws,
)(input)
@ -225,8 +231,10 @@ fn strict_zone(input: &[u8]) -> IResult<&[u8], Option<FixedOffset>> {
take_while_m_n(2, 2, |c| c >= 0x30 && c <= 0x39),
)),
|(_, op, dig_zone_hour, dig_zone_min)| {
let zone_hour: i32 = ((dig_zone_hour[0] - 0x30) * 10 + (dig_zone_hour[1] - 0x30)) as i32 * HOUR;
let zone_min: i32 = ((dig_zone_min[0] - 0x30) * 10 + (dig_zone_min[1] - 0x30)) as i32 * MIN;
let zone_hour: i32 =
((dig_zone_hour[0] - 0x30) * 10 + (dig_zone_hour[1] - 0x30)) as i32 * HOUR;
let zone_min: i32 =
((dig_zone_min[0] - 0x30) * 10 + (dig_zone_min[1] - 0x30)) as i32 * MIN;
match op {
b"+" => FixedOffset::east_opt(zone_hour + zone_min),
b"-" => FixedOffset::west_opt(zone_hour + zone_min),
@ -298,7 +306,6 @@ fn obs_zone(input: &[u8]) -> IResult<&[u8], Option<FixedOffset>> {
value(FixedOffset::east_opt(11 * HOUR), tag_no_case(b"L")),
value(FixedOffset::east_opt(12 * HOUR), tag_no_case(b"M")),
)),
// Military Timezones West
alt((
value(FixedOffset::west_opt(1 * HOUR), tag_no_case(b"N")),
@ -314,7 +321,6 @@ fn obs_zone(input: &[u8]) -> IResult<&[u8], Option<FixedOffset>> {
value(FixedOffset::west_opt(11 * HOUR), tag_no_case(b"X")),
value(FixedOffset::west_opt(12 * HOUR), tag_no_case(b"Y")),
)),
// Unknown timezone
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
)),
@ -367,7 +373,8 @@ mod tests {
Feb
1969
23:32
-0330 (Newfoundland Time)"#.as_bytes()
-0330 (Newfoundland Time)"#
.as_bytes()
),
Ok((
&b""[..],

View file

@ -1,21 +1,21 @@
use chrono::{DateTime, FixedOffset};
use nom::{
IResult,
branch::alt,
combinator::map,
sequence::{preceded, terminated},
IResult,
};
use crate::text::whitespace::{obs_crlf};
use crate::rfc5322::address::{AddressList, address_list, nullable_address_list, mailbox_list};
use crate::rfc5322::datetime::section as date;
use crate::rfc5322::mailbox::{MailboxRef, MailboxList, AddrSpec, mailbox};
use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list};
use crate::rfc5322::trace::{ReceivedLog, return_path, received_log};
use crate::rfc5322::mime::{Version, version};
use crate::rfc5322::message::Message;
use crate::header::{field_name, header};
use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list};
use crate::rfc5322::address::{address_list, mailbox_list, nullable_address_list, AddressList};
use crate::rfc5322::datetime::section as date;
use crate::rfc5322::identification::{msg_id, msg_list, MessageID, MessageIDList};
use crate::rfc5322::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
use crate::rfc5322::message::Message;
use crate::rfc5322::mime::{version, Version};
use crate::rfc5322::trace::{received_log, return_path, ReceivedLog};
use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
@ -50,7 +50,6 @@ pub enum Field<'a> {
MIMEVersion(Version),
}
#[derive(Debug, PartialEq)]
pub struct FieldList<'a>(pub Vec<Field<'a>>);
impl<'a> FieldList<'a> {
@ -60,30 +59,33 @@ impl<'a> FieldList<'a> {
}
pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
terminated(alt((
preceded(field_name(b"date"), map(date, Field::Date)),
preceded(field_name(b"from"), map(mailbox_list, Field::From)),
preceded(field_name(b"sender"), map(mailbox, Field::Sender)),
preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)),
preceded(field_name(b"to"), map(address_list, Field::To)),
preceded(field_name(b"cc"), map(address_list, Field::Cc)),
preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)),
preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)),
preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)),
preceded(field_name(b"references"), map(msg_list, Field::References)),
preceded(field_name(b"subject"), map(unstructured, Field::Subject)),
preceded(field_name(b"comments"), map(unstructured, Field::Comments)),
preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)),
preceded(field_name(b"return-path"), map(return_path, Field::ReturnPath)),
preceded(field_name(b"received"), map(received_log, Field::Received)),
preceded(field_name(b"mime-version"), map(version, Field::MIMEVersion)),
)), obs_crlf)(input)
terminated(
alt((
preceded(field_name(b"date"), map(date, Field::Date)),
preceded(field_name(b"from"), map(mailbox_list, Field::From)),
preceded(field_name(b"sender"), map(mailbox, Field::Sender)),
preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)),
preceded(field_name(b"to"), map(address_list, Field::To)),
preceded(field_name(b"cc"), map(address_list, Field::Cc)),
preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)),
preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)),
preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)),
preceded(field_name(b"references"), map(msg_list, Field::References)),
preceded(field_name(b"subject"), map(unstructured, Field::Subject)),
preceded(field_name(b"comments"), map(unstructured, Field::Comments)),
preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)),
preceded(
field_name(b"return-path"),
map(return_path, Field::ReturnPath),
),
preceded(field_name(b"received"), map(received_log, Field::Received)),
preceded(
field_name(b"mime-version"),
map(version, Field::MIMEVersion),
),
)),
obs_crlf,
)(input)
}
pub fn message(input: &[u8]) -> IResult<&[u8], Message> {
@ -93,10 +95,10 @@ pub fn message(input: &[u8]) -> IResult<&[u8], Message> {
#[cfg(test)]
mod tests {
use super::*;
use chrono::{FixedOffset, TimeZone};
use crate::rfc5322::mailbox::*;
use crate::rfc5322::address::*;
use crate::rfc5322::mailbox::*;
use crate::text::misc_token::*;
use chrono::{FixedOffset, TimeZone};
#[test]
fn test_header() {

View file

@ -11,7 +11,6 @@ use crate::rfc5322::mailbox::is_dtext;
use crate::text::whitespace::cfws;
use crate::text::words::dot_atom_text;
#[derive(Debug, PartialEq, Clone)]
pub struct MessageID<'a> {
pub left: &'a [u8],

View file

@ -7,10 +7,10 @@ use nom::{
IResult,
};
use crate::text::misc_token::{phrase, word, Word, Phrase};
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::words::{atom};
use crate::text::ascii;
use crate::text::misc_token::{phrase, word, Phrase, Word};
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::words::atom;
#[derive(Debug, PartialEq)]
pub struct AddrSpec<'a> {
@ -19,7 +19,11 @@ pub struct AddrSpec<'a> {
}
impl<'a> AddrSpec<'a> {
pub fn to_string(&self) -> String {
format!("{}@{}", self.local_part.to_string(), self.domain.to_string())
format!(
"{}@{}",
self.local_part.to_string(),
self.domain.to_string()
)
}
}
@ -33,7 +37,7 @@ impl<'a> MailboxRef<'a> {
pub fn to_string(&self) -> String {
match &self.name {
Some(n) => format!("{} <{}>", n.to_string(), self.addrspec.to_string()),
None => self.addrspec.to_string()
None => self.addrspec.to_string(),
}
}
}
@ -96,7 +100,8 @@ fn obs_domain_list(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain>>> {
separated_list1(
tag(&[ascii::COMMA]),
preceded(many0(cfws), opt(preceded(tag(&[ascii::AT]), obs_domain))),
))(input)
),
)(input)
}
/// AddrSpec
@ -129,16 +134,13 @@ pub struct LocalPart<'a>(pub Vec<LocalPartToken<'a>>);
impl<'a> LocalPart<'a> {
pub fn to_string(&self) -> String {
self.0.iter().fold(
String::new(),
|mut acc, token| {
match token {
LocalPartToken::Dot => acc.push('.'),
LocalPartToken::Word(v) => acc.push_str(v.to_string().as_ref()),
}
acc
self.0.iter().fold(String::new(), |mut acc, token| {
match token {
LocalPartToken::Dot => acc.push('.'),
LocalPartToken::Word(v) => acc.push_str(v.to_string().as_ref()),
}
)
acc
})
}
}
@ -173,9 +175,27 @@ pub enum Domain<'a> {
impl<'a> Domain<'a> {
pub fn to_string(&self) -> String {
match self {
Domain::Atoms(v) => v.iter().map(|v| encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string()).collect::<Vec<String>>().join("."),
Domain::Atoms(v) => v
.iter()
.map(|v| {
encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string()
})
.collect::<Vec<String>>()
.join("."),
Domain::Litteral(v) => {
let inner = v.iter().map(|v| encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string()).collect::<Vec<String>>().join(" ");
let inner = v
.iter()
.map(|v| {
encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string()
})
.collect::<Vec<String>>()
.join(" ");
format!("[{}]", inner)
}
}
@ -211,10 +231,10 @@ fn domain_litteral(input: &[u8]) -> IResult<&[u8], Domain> {
}
fn inner_domain_litteral(input: &[u8]) -> IResult<&[u8], Domain> {
map(
terminated(many0(preceded(opt(fws), take_while1(is_dtext))), opt(fws)),
|v| Domain::Litteral(v),
)(input)
map(
terminated(many0(preceded(opt(fws), take_while1(is_dtext))), opt(fws)),
|v| Domain::Litteral(v),
)(input)
}
fn is_strict_dtext(c: u8) -> bool {
@ -257,7 +277,10 @@ mod tests {
"jsmith@[192.168.2.1]".to_string(),
);
assert_eq!(
addr_spec(b"jsmith@[IPv6:2001:db8::1]").unwrap().1.to_string(),
addr_spec(b"jsmith@[IPv6:2001:db8::1]")
.unwrap()
.1
.to_string(),
"jsmith@[IPv6:2001:db8::1]".to_string(),
);
@ -276,12 +299,18 @@ mod tests {
// ASCII Edge cases
assert_eq!(
addr_spec(b"user+mailbox/department=shipping@example.com").unwrap().1.to_string(),
addr_spec(b"user+mailbox/department=shipping@example.com")
.unwrap()
.1
.to_string(),
"user+mailbox/department=shipping@example.com".to_string(),
);
assert_eq!(
addr_spec(b"!#$%&'*+-/=?^_`.{|}~@example.com").unwrap().1.to_string(),
addr_spec(b"!#$%&'*+-/=?^_`.{|}~@example.com")
.unwrap()
.1
.to_string(),
"!#$%&'*+-/=?^_`.{|}~@example.com".to_string(),
);
@ -290,7 +319,9 @@ mod tests {
Ok((
&b""[..],
AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(vec![b"Abc@def"])))]),
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![b"Abc@def"]
)))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
))
@ -300,7 +331,9 @@ mod tests {
Ok((
&b""[..],
AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(vec![b"Fred", b" ", b"Bloggs"])))]),
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![b"Fred", b" ", b"Bloggs"]
)))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
))
@ -310,7 +343,9 @@ mod tests {
Ok((
&b""[..],
AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(vec![b"Joe.", &[ascii::BACKSLASH], b"Blow"])))]),
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![b"Joe.", &[ascii::BACKSLASH], b"Blow"]
)))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
))
@ -324,7 +359,13 @@ mod tests {
Ok((
&b""[..],
MailboxRef {
name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![&b"Joe"[..], &[ascii::SP], &b"Q."[..], &[ascii::SP], &b"Public"[..]]))])),
name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![
&b"Joe"[..],
&[ascii::SP],
&b"Q."[..],
&[ascii::SP],
&b"Public"[..]
]))])),
addrspec: AddrSpec {
local_part: LocalPart(vec![
LocalPartToken::Word(Word::Atom(&b"john"[..])),
@ -344,7 +385,10 @@ mod tests {
Ok((
&b""[..],
MailboxRef {
name: Some(Phrase(vec![Word::Atom(&b"Mary"[..]), Word::Atom(&b"Smith"[..])])),
name: Some(Phrase(vec![
Word::Atom(&b"Mary"[..]),
Word::Atom(&b"Smith"[..])
])),
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"mary"[..]))]),
domain: Domain::Atoms(vec![&b"x"[..], &b"test"[..]]),
@ -410,7 +454,9 @@ mod tests {
&b"Box"[..]
]))])),
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"sysservices"[..]))]),
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(
&b"sysservices"[..]
))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"net"[..]]),
}
}
@ -428,19 +474,27 @@ mod tests {
@33+4.com,,,,
,,,,
(again)
@example.com,@yep.com,@a,@b,,,@c"#.as_bytes()
@example.com,@yep.com,@a,@b,,,@c"#
.as_bytes()
),
Ok((
&b""[..],
vec![
None,
Some(Domain::Atoms(vec![&b"33+4"[..], &b"com"[..]])),
None, None, None, None, None, None, None,
None,
None,
None,
None,
None,
None,
None,
Some(Domain::Atoms(vec![&b"example"[..], &b"com"[..]])),
Some(Domain::Atoms(vec![&b"yep"[..], &b"com"[..]])),
Some(Domain::Atoms(vec![&b"a"[..]])),
Some(Domain::Atoms(vec![&b"b"[..]])),
None, None,
None,
None,
Some(Domain::Atoms(vec![&b"c"[..]])),
]
))
@ -505,17 +559,18 @@ mod tests {
#[test]
fn test_enron4() {
assert_eq!(
mailbox(r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#.as_bytes()),
mailbox(
r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#
.as_bytes()
),
Ok((
&b""[..],
MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![
LocalPartToken::Word(Word::Quoted(QuotedString(vec![
&b"mark_kopinski/intl/acim/americancentury"[..],
])))
]),
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(
QuotedString(vec![&b"mark_kopinski/intl/acim/americancentury"[..],])
))]),
domain: Domain::Atoms(vec![&b"americancentury"[..], &b"com"[..]]),
}
}

View file

@ -1,10 +1,10 @@
use crate::text::misc_token::{PhraseList, Unstructured};
use crate::rfc5322::mime::Version;
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
use crate::rfc5322::address::{AddressRef};
use crate::rfc5322::identification::{MessageID};
use crate::rfc5322::address::AddressRef;
use crate::rfc5322::field::Field;
use crate::rfc5322::identification::MessageID;
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
use crate::rfc5322::mime::Version;
use crate::rfc5322::trace::ReceivedLog;
use crate::text::misc_token::{PhraseList, Unstructured};
use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq, Default)]
@ -45,9 +45,8 @@ pub struct Message<'a> {
// it may result in missing data or silently overriden data.
impl<'a> FromIterator<Field<'a>> for Message<'a> {
fn from_iter<I: IntoIterator<Item = Field<'a>>>(iter: I) -> Self {
iter.into_iter().fold(
Message::default(),
|mut section, field| {
iter.into_iter()
.fold(Message::default(), |mut section, field| {
match field {
Field::Date(v) => section.date = v,
Field::From(v) => section.from.extend(v),
@ -67,7 +66,6 @@ impl<'a> FromIterator<Field<'a>> for Message<'a> {
Field::MIMEVersion(v) => section.mime_version = Some(v),
};
section
}
)
})
}
}

View file

@ -1,8 +1,8 @@
use nom::{
IResult,
sequence::tuple,
bytes::complete::{tag, take},
combinator::{map, opt, verify},
sequence::tuple,
IResult,
};
use crate::text::ascii;
@ -41,7 +41,10 @@ mod tests {
#[test]
fn test_version() {
assert_eq!(version(b"1.0"), Ok((&b""[..], Version { major: 1, minor: 0 })),);
assert_eq!(
version(b"1.0"),
Ok((&b""[..], Version { major: 1, minor: 0 })),
);
assert_eq!(
version(b" 1.0 (produced by MetaSend Vx.x)"),

View file

@ -1,8 +1,8 @@
pub mod mailbox;
pub mod address;
pub mod datetime;
pub mod trace;
pub mod identification;
pub mod mime;
pub mod field;
pub mod identification;
pub mod mailbox;
pub mod message;
pub mod mime;
pub mod trace;

View file

@ -1,21 +1,21 @@
use chrono::{DateTime, FixedOffset};
use nom::{
branch::alt,
bytes::complete::{is_a, tag},
combinator::{map, opt, not},
combinator::{map, not, opt},
multi::many0,
sequence::{tuple, terminated},
sequence::{terminated, tuple},
IResult,
};
use chrono::{DateTime, FixedOffset};
use crate::rfc5322::{datetime, mailbox};
use crate::text::{ascii, whitespace, misc_token };
use crate::text::{ascii, misc_token, whitespace};
#[derive(Debug, PartialEq)]
pub enum ReceivedLogToken<'a> {
Addr(mailbox::AddrSpec<'a>),
Domain(mailbox::Domain<'a>),
Word(misc_token::Word<'a>)
Word(misc_token::Word<'a>),
}
#[derive(Debug, PartialEq)]
@ -37,12 +37,11 @@ impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
map(
tuple((
many0(received_tokens),
tag(";"),
datetime::section,
)),
|(tokens, _, dt)| ReceivedLog { log: tokens, date: dt } ,
tuple((many0(received_tokens), tag(";"), datetime::section)),
|(tokens, _, dt)| ReceivedLog {
log: tokens,
date: dt,
},
)(input)
}
@ -63,7 +62,10 @@ fn empty_path(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> {
alt((
terminated(map(misc_token::word, |x| ReceivedLogToken::Word(x)), not(is_a([ascii::PERIOD, ascii::AT]))),
terminated(
map(misc_token::word, |x| ReceivedLogToken::Word(x)),
not(is_a([ascii::PERIOD, ascii::AT])),
),
map(mailbox::angle_addr, |x| ReceivedLogToken::Addr(x)),
map(mailbox::addr_spec, |x| ReceivedLogToken::Addr(x)),
map(mailbox::obs_domain, |x| ReceivedLogToken::Domain(x)),
@ -73,8 +75,8 @@ fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> {
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
use crate::rfc5322::trace::misc_token::Word;
use chrono::TimeZone;
#[test]
fn test_received_body() {
@ -82,17 +84,27 @@ mod tests {
by server with LMTP
id xxxxxxxxx
(envelope-from <gitlab@example.com>)
for <me@example.com>; Tue, 13 Jun 2023 19:01:08 +0000"#.as_bytes();
for <me@example.com>; Tue, 13 Jun 2023 19:01:08 +0000"#
.as_bytes();
assert_eq!(
received_log(hdrs),
Ok((
&b""[..],
ReceivedLog {
date: Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 06, 13, 19, 1, 8).unwrap()),
date: Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 06, 13, 19, 1, 8)
.unwrap()
),
log: vec![
ReceivedLogToken::Word(Word::Atom(&b"from"[..])),
ReceivedLogToken::Domain(mailbox::Domain::Atoms(vec![&b"smtp"[..], &b"example"[..], &b"com"[..]])),
ReceivedLogToken::Domain(mailbox::Domain::Atoms(vec![
&b"smtp"[..],
&b"example"[..],
&b"com"[..]
])),
ReceivedLogToken::Word(Word::Atom(&b"by"[..])),
ReceivedLogToken::Word(Word::Atom(&b"server"[..])),
ReceivedLogToken::Word(Word::Atom(&b"with"[..])),
@ -101,7 +113,9 @@ mod tests {
ReceivedLogToken::Word(Word::Atom(&b"xxxxxxxxx"[..])),
ReceivedLogToken::Word(Word::Atom(&b"for"[..])),
ReceivedLogToken::Addr(mailbox::AddrSpec {
local_part: mailbox::LocalPart(vec![mailbox::LocalPartToken::Word(Word::Atom(&b"me"[..]))]),
local_part: mailbox::LocalPart(vec![mailbox::LocalPartToken::Word(
Word::Atom(&b"me"[..])
)]),
domain: mailbox::Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
})
],

View file

@ -20,7 +20,7 @@ pub const DLE: u8 = 0x10;
pub const DC1: u8 = 0x11;
pub const DC2: u8 = 0x12;
pub const DC3: u8 = 0x13;
pub const DC4 : u8 = 0x14;
pub const DC4: u8 = 0x14;
pub const NAK: u8 = 0x15;
pub const SYN: u8 = 0x16;
pub const ETB: u8 = 0x17;

View file

@ -1,21 +1,22 @@
use nom::{
IResult,
bytes::complete::tag,
sequence::tuple,
combinator::opt,
};
use nom::{bytes::complete::tag, combinator::opt, sequence::tuple, IResult};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)]
pub enum Delimiter {
Next,
Last
Last,
}
pub fn boundary<'a>(boundary: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> + '_ {
move |input: &[u8]| {
let (rest, (_, _, _, last, _)) = tuple((opt(obs_crlf), tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
let (rest, (_, _, _, last, _)) = tuple((
opt(obs_crlf),
tag(b"--"),
tag(boundary),
opt(tag(b"--")),
opt(obs_crlf),
))(input)?;
match last {
Some(_) => Ok((rest, Delimiter::Last)),
None => Ok((rest, Delimiter::Next)),

View file

@ -1,19 +1,19 @@
use encoding_rs::Encoding;
use base64::{engine::general_purpose, Engine as _};
use nom::{
IResult,
branch::alt,
bytes::complete::{tag, take, take_while1, take_while},
character::complete::{one_of},
bytes::complete::{tag, take, take_while, take_while1},
character::complete::one_of,
character::is_alphanumeric,
combinator::map,
sequence::{preceded, terminated, tuple},
multi::many0,
sequence::{preceded, terminated, tuple},
IResult,
};
use base64::{Engine as _, engine::general_purpose};
use crate::text::words;
use crate::text::ascii;
use crate::text::words;
pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
alt((encoded_word_quoted, encoded_word_base64))(input)
@ -21,35 +21,49 @@ pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
tag("=?"), words::mime_atom,
tag("?"), one_of("Qq"),
tag("?"), ptext,
tag("?=")))(input)?;
tag("=?"),
words::mime_atom,
tag("?"),
one_of("Qq"),
tag("?"),
ptext,
tag("?="),
))(input)?;
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = EncodedWord::Quoted(QuotedWord { enc: renc, chunks: txt });
let parsed = EncodedWord::Quoted(QuotedWord {
enc: renc,
chunks: txt,
});
Ok((rest, parsed))
}
pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
tag("=?"), words::mime_atom,
tag("?"), one_of("Bb"),
tag("?"), btext,
tag("?=")))(input)?;
tag("=?"),
words::mime_atom,
tag("?"),
one_of("Bb"),
tag("?"),
btext,
tag("?="),
))(input)?;
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = EncodedWord::Base64(Base64Word { enc: renc, content: txt });
let parsed = EncodedWord::Base64(Base64Word {
enc: renc,
content: txt,
});
Ok((rest, parsed))
}
#[derive(PartialEq,Debug, Clone)]
#[derive(PartialEq, Debug, Clone)]
pub enum EncodedWord<'a> {
Quoted(QuotedWord<'a>),
Base64(Base64Word<'a>),
}
impl<'a> EncodedWord<'a> {
pub fn to_string(&self) -> String {
pub fn to_string(&self) -> String {
match self {
EncodedWord::Quoted(v) => v.to_string(),
EncodedWord::Base64(v) => v.to_string(),
@ -57,7 +71,7 @@ impl<'a> EncodedWord<'a> {
}
}
#[derive(PartialEq,Debug,Clone)]
#[derive(PartialEq, Debug, Clone)]
pub struct Base64Word<'a> {
pub enc: &'static Encoding,
pub content: &'a [u8],
@ -72,7 +86,7 @@ impl<'a> Base64Word<'a> {
}
}
#[derive(PartialEq,Debug,Clone)]
#[derive(PartialEq, Debug, Clone)]
pub struct QuotedWord<'a> {
pub enc: &'static Encoding,
pub chunks: Vec<QuotedChunk<'a>>,
@ -80,27 +94,25 @@ pub struct QuotedWord<'a> {
impl<'a> QuotedWord<'a> {
pub fn to_string(&self) -> String {
self.chunks.iter().fold(
String::new(),
|mut acc, c| {
match c {
QuotedChunk::Safe(v) => {
let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
acc.push_str(content.as_ref());
}
QuotedChunk::Space => acc.push(' '),
QuotedChunk::Encoded(v) => {
let w = &[*v];
let (d, _) = self.enc.decode_without_bom_handling(w);
acc.push_str(d.as_ref());
},
};
acc
})
self.chunks.iter().fold(String::new(), |mut acc, c| {
match c {
QuotedChunk::Safe(v) => {
let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
acc.push_str(content.as_ref());
}
QuotedChunk::Space => acc.push(' '),
QuotedChunk::Encoded(v) => {
let w = &[*v];
let (d, _) = self.enc.decode_without_bom_handling(w);
acc.push_str(d.as_ref());
}
};
acc
})
}
}
#[derive(PartialEq,Debug,Clone)]
#[derive(PartialEq, Debug, Clone)]
pub enum QuotedChunk<'a> {
Safe(&'a [u8]),
Encoded(u8),
@ -112,12 +124,10 @@ pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
many0(alt((safe_char2, encoded_space, hex_octet)))(input)
}
fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
}
/// RFC2047 section 4.2
/// 8-bit values which correspond to printable ASCII characters other
/// than "=", "?", and "_" (underscore), MAY be represented as those
@ -167,28 +177,33 @@ mod tests {
fn test_ptext() {
assert_eq!(
ptext(b"Accus=E9_de_r=E9ception_(affich=E9)"),
Ok((&b""[..], vec![
QuotedChunk::Safe(&b"Accus"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Space,
QuotedChunk::Safe(&b"de"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"r"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe(&b"ception"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"(affich"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe(&b")"[..]),
]))
Ok((
&b""[..],
vec![
QuotedChunk::Safe(&b"Accus"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Space,
QuotedChunk::Safe(&b"de"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"r"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe(&b"ception"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"(affich"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe(&b")"[..]),
]
))
);
}
#[test]
fn test_decode_word() {
assert_eq!(
encoded_word(b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?=").unwrap().1.to_string(),
encoded_word(b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?=")
.unwrap()
.1
.to_string(),
"Accusé de réception (affiché)".to_string(),
);
}
@ -197,7 +212,10 @@ mod tests {
#[test]
fn test_decode_word_b64() {
assert_eq!(
encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=").unwrap().1.to_string(),
encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=")
.unwrap()
.1
.to_string(),
"If you can read this yo".to_string(),
);
}

View file

@ -4,16 +4,16 @@ use nom::{
character::complete::space0,
combinator::{map, opt},
multi::{many0, many1, separated_list1},
sequence::{preceded},
sequence::preceded,
IResult,
};
use crate::text::{
quoted::{QuotedString, quoted_string},
whitespace::{fws, is_obs_no_ws_ctl},
words::{atom, mime_atom, is_vchar},
encoding::{self, encoded_word},
ascii,
encoding::{self, encoded_word},
quoted::{quoted_string, QuotedString},
whitespace::{fws, is_obs_no_ws_ctl},
words::{atom, is_vchar, mime_atom},
};
#[derive(Debug, PartialEq, Default)]
@ -36,7 +36,10 @@ impl<'a> MIMEWord<'a> {
pub fn to_string(&self) -> String {
match self {
Self::Quoted(v) => v.to_string(),
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
Self::Atom(v) => encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string(),
}
}
}
@ -59,7 +62,10 @@ impl<'a> Word<'a> {
match self {
Word::Quoted(v) => v.to_string(),
Word::Encoded(v) => v.to_string(),
Word::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(),
Word::Atom(v) => encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string(),
}
}
}
@ -73,7 +79,7 @@ pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
alt((
map(quoted_string, |v| Word::Quoted(v)),
map(encoded_word, |v| Word::Encoded(v)),
map(atom, |v| Word::Atom(v))
map(atom, |v| Word::Atom(v)),
))(input)
}
@ -82,7 +88,11 @@ pub struct Phrase<'a>(pub Vec<Word<'a>>);
impl<'a> Phrase<'a> {
pub fn to_string(&self) -> String {
self.0.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(" ")
self.0
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>()
.join(" ")
}
}
@ -117,7 +127,10 @@ impl<'a> UnstrToken<'a> {
match self {
UnstrToken::Init => "".into(),
UnstrToken::Encoded(e) => e.to_string(),
UnstrToken::Plain(e) => encoding_rs::UTF_8.decode_without_bom_handling(e).0.into_owned(),
UnstrToken::Plain(e) => encoding_rs::UTF_8
.decode_without_bom_handling(e)
.0
.into_owned(),
}
}
}
@ -127,21 +140,26 @@ pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
impl<'a> Unstructured<'a> {
pub fn to_string(&self) -> String {
self.0.iter().fold(
(&UnstrToken::Init, String::new()),
|(prev_token, mut result), current_token| {
match (prev_token, current_token) {
(UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()),
(UnstrToken::Encoded(_), UnstrToken::Encoded(v)) => result.push_str(v.to_string().as_ref()),
(_, v) => {
result.push(' ');
result.push_str(v.to_string().as_ref())
},
};
self.0
.iter()
.fold(
(&UnstrToken::Init, String::new()),
|(prev_token, mut result), current_token| {
match (prev_token, current_token) {
(UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()),
(UnstrToken::Encoded(_), UnstrToken::Encoded(v)) => {
result.push_str(v.to_string().as_ref())
}
(_, v) => {
result.push(' ');
result.push_str(v.to_string().as_ref())
}
};
(current_token, result)
}
).1
(current_token, result)
},
)
.1
}
}
@ -151,16 +169,18 @@ impl<'a> Unstructured<'a> {
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ```
pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured> {
let (input, r) = many0(preceded(opt(fws), alt((
map(encoded_word, |v| UnstrToken::Encoded(v)),
map(take_while1(is_unstructured), |v| UnstrToken::Plain(v)),
))))(input)?;
let (input, r) = many0(preceded(
opt(fws),
alt((
map(encoded_word, |v| UnstrToken::Encoded(v)),
map(take_while1(is_unstructured), |v| UnstrToken::Plain(v)),
)),
))(input)?;
let (input, _) = space0(input)?;
Ok((input, Unstructured(r)))
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -1,7 +1,7 @@
pub mod ascii;
pub mod boundary;
pub mod encoding;
pub mod misc_token;
pub mod quoted;
pub mod whitespace;
pub mod words;
pub mod boundary;

View file

@ -1,14 +1,14 @@
use nom::{
branch::alt,
bytes::complete::{take_while1, take, tag},
combinator::{opt},
bytes::complete::{tag, take, take_while1},
combinator::opt,
multi::many0,
sequence::{pair, preceded},
IResult,
};
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::ascii;
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
#[derive(Debug, PartialEq, Default)]
pub struct QuotedString<'a>(pub Vec<&'a [u8]>);
@ -22,14 +22,13 @@ impl<'a> QuotedString<'a> {
let enc = encoding_rs::UTF_8;
let size = self.0.iter().fold(0, |acc, v| acc + v.len());
self.0.iter().fold(
String::with_capacity(size),
|mut acc, v| {
self.0
.iter()
.fold(String::with_capacity(size), |mut acc, v| {
let (content, _) = enc.decode_without_bom_handling(v);
acc.push_str(content.as_ref());
acc
},
)
})
}
}
@ -43,8 +42,6 @@ pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], &[u8]> {
preceded(tag(&[ascii::BACKSLASH]), take(1usize))(input)
}
/// Allowed characters in quote
///
/// ```abnf
@ -54,7 +51,9 @@ pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], &[u8]> {
/// obs-qtext
/// ```
fn is_restr_qtext(c: u8) -> bool {
c == ascii::EXCLAMATION || (c >= ascii::NUM && c <= ascii::LEFT_BRACKET) || (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
c == ascii::EXCLAMATION
|| (c >= ascii::NUM && c <= ascii::LEFT_BRACKET)
|| (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
}
fn is_qtext(c: u8) -> bool {
@ -116,7 +115,10 @@ mod tests {
assert_eq!(
quoted_string(b"\"hello\r\n world\""),
Ok((&b""[..], QuotedString(vec![b"hello", &[ascii::SP], b"world"]))),
Ok((
&b""[..],
QuotedString(vec![b"hello", &[ascii::SP], b"world"])
)),
);
}

View file

@ -1,3 +1,6 @@
use crate::text::ascii;
use crate::text::encoding::encoded_word;
use crate::text::quoted::quoted_pair;
use nom::{
branch::alt,
bytes::complete::{is_not, tag, take_while1},
@ -7,9 +10,6 @@ use nom::{
sequence::{pair, tuple},
IResult,
};
use crate::text::encoding::encoded_word;
use crate::text::quoted::quoted_pair;
use crate::text::ascii;
/// Whitespace (space, new line, tab) content and
/// delimited content (eg. comment, line, sections, etc.)
@ -37,10 +37,7 @@ pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
pub fn foldable_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(tuple((
is_not(ascii::CRLF),
many0(pair(
many1(pair(obs_crlf, space1)),
is_not(ascii::CRLF),
)),
many0(pair(many1(pair(obs_crlf, space1)), is_not(ascii::CRLF))),
obs_crlf,
)))(input)
}
@ -101,7 +98,12 @@ pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
}
pub fn ccontent(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((ctext, recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
alt((
ctext,
recognize(quoted_pair),
recognize(encoded_word),
recognize(comment),
))(input)
}
pub fn ctext(input: &[u8]) -> IResult<&[u8], &[u8]> {
@ -183,7 +185,7 @@ mod tests {
#[test]
fn test_cfws_encoded_word() {
assert_eq!(
assert_eq!(
cfws(b"(=?US-ASCII?Q?Keith_Moore?=)"),
Ok((&b""[..], &b"(=?US-ASCII?Q?Keith_Moore?=)"[..])),
);

View file

@ -1,5 +1,5 @@
use crate::text::whitespace::cfws;
use crate::text::ascii;
use crate::text::whitespace::cfws;
use nom::{
bytes::complete::{tag, take_while1},
character::is_alphanumeric,
@ -18,23 +18,23 @@ pub fn is_vchar(c: u8) -> bool {
/// forbidden: ()<>@,;:\"/[]?=
fn is_mime_atom_text(c: u8) -> bool {
is_alphanumeric(c)
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
|| c == ascii::DOLLAR
|| c == ascii::PERCENT
|| c == ascii::AMPERSAND
|| c == ascii::SQUOTE
|| c == ascii::ASTERISK
|| c == ascii::PLUS
|| c == ascii::MINUS
|| c == ascii::PERIOD
|| c == ascii::CARRET
|| c == ascii::UNDERSCORE
|| c == ascii::GRAVE
|| c == ascii::LEFT_CURLY
|| c == ascii::PIPE
|| c == ascii::RIGHT_CURLY
|| c == ascii::TILDE
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
|| c == ascii::DOLLAR
|| c == ascii::PERCENT
|| c == ascii::AMPERSAND
|| c == ascii::SQUOTE
|| c == ascii::ASTERISK
|| c == ascii::PLUS
|| c == ascii::MINUS
|| c == ascii::PERIOD
|| c == ascii::CARRET
|| c == ascii::UNDERSCORE
|| c == ascii::GRAVE
|| c == ascii::LEFT_CURLY
|| c == ascii::PIPE
|| c == ascii::RIGHT_CURLY
|| c == ascii::TILDE
}
/// MIME Token
@ -49,25 +49,25 @@ pub fn mime_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
/// authorized: !#$%&'*+-/=?^_`{|}~
fn is_atext(c: u8) -> bool {
is_alphanumeric(c)
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
|| c == ascii::DOLLAR
|| c == ascii::PERCENT
|| c == ascii::AMPERSAND
|| c == ascii::SQUOTE
|| c == ascii::ASTERISK
|| c == ascii::PLUS
|| c == ascii::MINUS
|| c == ascii::SLASH
|| c == ascii::EQ
|| c == ascii::QUESTION
|| c == ascii::CARRET
|| c == ascii::UNDERSCORE
|| c == ascii::GRAVE
|| c == ascii::LEFT_CURLY
|| c == ascii::PIPE
|| c == ascii::RIGHT_CURLY
|| c == ascii::TILDE
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
|| c == ascii::DOLLAR
|| c == ascii::PERCENT
|| c == ascii::AMPERSAND
|| c == ascii::SQUOTE
|| c == ascii::ASTERISK
|| c == ascii::PLUS
|| c == ascii::MINUS
|| c == ascii::SLASH
|| c == ascii::EQ
|| c == ascii::QUESTION
|| c == ascii::CARRET
|| c == ascii::UNDERSCORE
|| c == ascii::GRAVE
|| c == ascii::LEFT_CURLY
|| c == ascii::PIPE
|| c == ascii::RIGHT_CURLY
|| c == ascii::TILDE
}
/// Atom