format code

This commit is contained in:
Quentin 2023-07-23 16:37:47 +02:00
parent 6e2b29ec57
commit c97595c128
Signed by: quentin
GPG key ID: E9602264D639FF68
26 changed files with 610 additions and 439 deletions

View file

@ -1,14 +1,14 @@
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::{foldable_line, obs_crlf};
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{tag_no_case, tag, take_while1}, bytes::complete::{tag, tag_no_case, take_while1},
character::complete::space0, character::complete::space0,
combinator::{map}, combinator::map,
multi::many0, multi::many0,
sequence::{pair, terminated, tuple}, sequence::{pair, terminated, tuple},
IResult,
}; };
use crate::text::whitespace::{foldable_line, obs_crlf};
use crate::text::misc_token::{Unstructured, unstructured};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum CompField<'a, T> { pub enum CompField<'a, T> {
@ -21,21 +21,33 @@ pub enum CompField<'a, T> {
pub struct CompFieldList<'a, T>(pub Vec<CompField<'a, T>>); pub struct CompFieldList<'a, T>(pub Vec<CompField<'a, T>>);
impl<'a, T> CompFieldList<'a, T> { impl<'a, T> CompFieldList<'a, T> {
pub fn known(self) -> Vec<T> { pub fn known(self) -> Vec<T> {
self.0.into_iter().map(|v| match v { self.0
.into_iter()
.map(|v| match v {
CompField::Known(f) => Some(f), CompField::Known(f) => Some(f),
_ => None, _ => None,
}).flatten().collect() })
.flatten()
.collect()
} }
} }
pub fn header<'a, T>(fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy) pub fn header<'a, T>(
-> impl Fn(&'a [u8]) -> IResult<&'a [u8], CompFieldList<T>> fx: impl Fn(&'a [u8]) -> IResult<&'a [u8], T> + Copy,
{ ) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], CompFieldList<T>> {
move |input| map(terminated(many0(alt(( move |input| {
map(
terminated(
many0(alt((
map(fx, CompField::Known), map(fx, CompField::Known),
map(opt_field, |(k,v)| CompField::Unknown(k,v)), map(opt_field, |(k, v)| CompField::Unknown(k, v)),
map(foldable_line, CompField::Bad), map(foldable_line, CompField::Bad),
))), obs_crlf), CompFieldList)(input) ))),
obs_crlf,
),
CompFieldList,
)(input)
}
} }
/* /*
@ -53,12 +65,7 @@ pub fn header_in_boundaries<'a, T>(bound: &'a [u8], fx: impl Fn(&'a [u8]) -> IRe
*/ */
pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input| { move |input| terminated(tag_no_case(name), tuple((space0, tag(b":"), space0)))(input)
terminated(
tag_no_case(name),
tuple((space0, tag(b":"), space0)),
)(input)
}
} }
/// Optional field /// Optional field
@ -78,7 +85,7 @@ pub fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> {
tuple((space0, tag(b":"), space0)), tuple((space0, tag(b":"), space0)),
), ),
unstructured, unstructured,
), obs_crlf)(input) ),
obs_crlf,
)(input)
} }

View file

@ -1,9 +1,9 @@
mod error; mod error;
mod text;
mod header; mod header;
mod rfc5322;
mod mime; mod mime;
mod part; mod part;
mod rfc5322;
mod text;
pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> { pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> {
part::part::message(mime::mime::Message::default())(input) part::part::message(mime::mime::Message::default())(input)

View file

@ -74,7 +74,6 @@ impl<'a> From<&'a [u8]> for EmailCharset {
b"utf-8" | b"utf8" => EmailCharset::UTF_8, b"utf-8" | b"utf8" => EmailCharset::UTF_8,
_ => EmailCharset::Unknown, _ => EmailCharset::Unknown,
} }
} }
} }
@ -112,21 +111,16 @@ impl EmailCharset {
} }
pub fn as_encoding(&self) -> &'static Encoding { pub fn as_encoding(&self) -> &'static Encoding {
Encoding::for_label(self.as_str().as_bytes()) Encoding::for_label(self.as_str().as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252)
.unwrap_or(encoding_rs::WINDOWS_1252)
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
#[test] #[test]
fn test_charset() { fn test_charset() {
assert_eq!( assert_eq!(EmailCharset::from(&b"Us-Ascii"[..]).as_str(), "US-ASCII",);
EmailCharset::from(&b"Us-Ascii"[..]).as_str(),
"US-ASCII",
);
assert_eq!( assert_eq!(
EmailCharset::from(&b"Us-Ascii"[..]).as_encoding(), EmailCharset::from(&b"Us-Ascii"[..]).as_encoding(),

View file

@ -1,17 +1,17 @@
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
combinator::map, combinator::map,
sequence::{preceded, terminated}, sequence::{preceded, terminated},
IResult,
}; };
use crate::text::whitespace::obs_crlf;
use crate::text::misc_token::{Unstructured, unstructured};
use crate::rfc5322::identification::{MessageID, msg_id};
use crate::header::{field_name, CompFieldList}; use crate::header::{field_name, CompFieldList};
use crate::mime::r#type::{NaiveType, naive_type}; use crate::mime::mechanism::{mechanism, Mechanism};
use crate::mime::mechanism::{Mechanism, mechanism};
use crate::mime::mime::AnyMIME; use crate::mime::mime::AnyMIME;
use crate::mime::r#type::{naive_type, NaiveType};
use crate::rfc5322::identification::{msg_id, MessageID};
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Content<'a> { pub enum Content<'a> {
@ -22,16 +22,28 @@ pub enum Content<'a> {
} }
impl<'a> Content<'a> { impl<'a> Content<'a> {
pub fn ctype(&'a self) -> Option<&'a NaiveType<'a>> { pub fn ctype(&'a self) -> Option<&'a NaiveType<'a>> {
match self { Content::Type(v) => Some(v), _ => None } match self {
Content::Type(v) => Some(v),
_ => None,
}
} }
pub fn transfer_encoding(&'a self) -> Option<&'a Mechanism<'a>> { pub fn transfer_encoding(&'a self) -> Option<&'a Mechanism<'a>> {
match self { Content::TransferEncoding(v) => Some(v), _ => None } match self {
Content::TransferEncoding(v) => Some(v),
_ => None,
}
} }
pub fn id(&'a self) -> Option<&'a MessageID<'a>> { pub fn id(&'a self) -> Option<&'a MessageID<'a>> {
match self { Content::ID(v) => Some(v), _ => None } match self {
Content::ID(v) => Some(v),
_ => None,
}
} }
pub fn description(&'a self) -> Option<&'a Unstructured<'a>> { pub fn description(&'a self) -> Option<&'a Unstructured<'a>> {
match self { Content::Description(v) => Some(v), _ => None } match self {
Content::Description(v) => Some(v),
_ => None,
}
} }
} }
@ -42,26 +54,36 @@ impl<'a> CompFieldList<'a, Content<'a>> {
} }
pub fn content(input: &[u8]) -> IResult<&[u8], Content> { pub fn content(input: &[u8]) -> IResult<&[u8], Content> {
terminated(alt(( terminated(
alt((
preceded(field_name(b"content-type"), map(naive_type, Content::Type)), preceded(field_name(b"content-type"), map(naive_type, Content::Type)),
preceded(field_name(b"content-transfer-encoding"), map(mechanism, Content::TransferEncoding)), preceded(
field_name(b"content-transfer-encoding"),
map(mechanism, Content::TransferEncoding),
),
preceded(field_name(b"content-id"), map(msg_id, Content::ID)), preceded(field_name(b"content-id"), map(msg_id, Content::ID)),
preceded(field_name(b"content-description"), map(unstructured, Content::Description)), preceded(
)), obs_crlf)(input) field_name(b"content-description"),
map(unstructured, Content::Description),
),
)),
obs_crlf,
)(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::mime::r#type::*; use crate::header::{header, CompFieldList};
use crate::mime::charset::EmailCharset; use crate::mime::charset::EmailCharset;
use crate::mime::r#type::*;
use crate::text::misc_token::MIMEWord; use crate::text::misc_token::MIMEWord;
use crate::text::quoted::QuotedString; use crate::text::quoted::QuotedString;
use crate::header::{header, CompFieldList};
#[test] #[test]
fn test_content_type() { fn test_content_type() {
let (rest, content) = content(b"Content-Type: text/plain; charset=UTF-8; format=flowed\r\n").unwrap(); let (rest, content) =
content(b"Content-Type: text/plain; charset=UTF-8; format=flowed\r\n").unwrap();
assert_eq!(&b""[..], rest); assert_eq!(&b""[..], rest);
if let Content::Type(nt) = content { if let Content::Type(nt) = content {
@ -91,7 +113,8 @@ Content-Transfer-Encoding: 7bit
This is a multipart message. This is a multipart message.
"#.as_bytes(); "#
.as_bytes();
assert_eq!( assert_eq!(
map(header(content), CompFieldList::known)(fullmail), map(header(content), CompFieldList::known)(fullmail),
@ -101,12 +124,12 @@ This is a multipart message.
Content::Type(NaiveType { Content::Type(NaiveType {
main: &b"multipart"[..], main: &b"multipart"[..],
sub: &b"alternative"[..], sub: &b"alternative"[..],
params: vec![ params: vec![Parameter {
Parameter {
name: &b"boundary"[..], name: &b"boundary"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"b1_e376dc71bafc953c0b0fdeb9983a9956"[..]])), value: MIMEWord::Quoted(QuotedString(vec![
} &b"b1_e376dc71bafc953c0b0fdeb9983a9956"[..]
] ])),
}]
}), }),
Content::TransferEncoding(Mechanism::_7Bit), Content::TransferEncoding(Mechanism::_7Bit),
], ],

View file

@ -1,12 +1,12 @@
use crate::text::whitespace::cfws;
use crate::text::words::mime_atom as token;
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::tag_no_case, bytes::complete::tag_no_case,
combinator::{map, opt, value}, combinator::{map, opt, value},
sequence::delimited, sequence::delimited,
IResult,
}; };
use crate::text::whitespace::cfws;
use crate::text::words::mime_atom as token;
#[derive(Debug, Clone, PartialEq, Default)] #[derive(Debug, Clone, PartialEq, Default)]
pub enum Mechanism<'a> { pub enum Mechanism<'a> {
@ -38,16 +38,12 @@ pub fn mechanism(input: &[u8]) -> IResult<&[u8], Mechanism> {
))(input) ))(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
#[test] #[test]
fn test_mechanism() { fn test_mechanism() {
assert_eq!( assert_eq!(mechanism(b"7bit"), Ok((&b""[..], Mechanism::_7Bit)),);
mechanism(b"7bit"),
Ok((&b""[..], Mechanism::_7Bit)),
);
assert_eq!( assert_eq!(
mechanism(b"(youhou) 8bit"), mechanism(b"(youhou) 8bit"),
@ -59,10 +55,7 @@ mod tests {
Ok((&b""[..], Mechanism::Binary)), Ok((&b""[..], Mechanism::Binary)),
); );
assert_eq!( assert_eq!(mechanism(b" base64 "), Ok((&b""[..], Mechanism::Base64)),);
mechanism(b" base64 "),
Ok((&b""[..], Mechanism::Base64)),
);
assert_eq!( assert_eq!(
mechanism(b" Quoted-Printable "), mechanism(b" Quoted-Printable "),

View file

@ -1,8 +1,8 @@
use crate::mime::mechanism::Mechanism;
use crate::rfc5322::identification::MessageID;
use crate::text::misc_token::Unstructured;
use crate::mime::field::Content; use crate::mime::field::Content;
use crate::mime::r#type::{AnyType, self as ctype}; //Multipart, Message, Text, Binary}; use crate::mime::mechanism::Mechanism;
use crate::mime::r#type::{self as ctype, AnyType};
use crate::rfc5322::identification::MessageID;
use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct Multipart<'a>(pub ctype::Multipart, pub Generic<'a>); pub struct Multipart<'a>(pub ctype::Multipart, pub Generic<'a>);
@ -24,7 +24,6 @@ pub enum AnyMIME<'a> {
Bin(Binary<'a>), Bin(Binary<'a>),
} }
impl<'a> AnyMIME<'a> { impl<'a> AnyMIME<'a> {
pub fn from_pair(at: AnyType, gen: Generic<'a>) -> Self { pub fn from_pair(at: AnyType, gen: Generic<'a>) -> Self {
match at { match at {
@ -48,7 +47,7 @@ impl<'a> FromIterator<Content<'a>> for AnyMIME<'a> {
Content::Description(v) => section.description = Some(v), Content::Description(v) => section.description = Some(v),
}; };
(at, section) (at, section)
} },
); );
Self::from_pair(at, gen) Self::from_pair(at, gen)
@ -61,4 +60,3 @@ pub struct Generic<'a> {
pub id: Option<MessageID<'a>>, pub id: Option<MessageID<'a>>,
pub description: Option<Unstructured<'a>>, pub description: Option<Unstructured<'a>>,
} }

View file

@ -1,5 +1,5 @@
pub mod charset; pub mod charset;
pub mod mechanism;
pub mod r#type;
pub mod field; pub mod field;
pub mod mechanism;
pub mod mime; pub mod mime;
pub mod r#type;

View file

@ -6,9 +6,9 @@ use nom::{
IResult, IResult,
}; };
use crate::text::misc_token::{MIMEWord, mime_word};
use crate::text::words::{mime_atom};
use crate::mime::charset::EmailCharset; use crate::mime::charset::EmailCharset;
use crate::text::misc_token::{mime_word, MIMEWord};
use crate::text::words::mime_atom;
// --------- NAIVE TYPE // --------- NAIVE TYPE
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -18,7 +18,9 @@ pub struct NaiveType<'a> {
pub params: Vec<Parameter<'a>>, pub params: Vec<Parameter<'a>>,
} }
impl<'a> NaiveType<'a> { impl<'a> NaiveType<'a> {
pub fn to_type(&self) -> AnyType { self.into() } pub fn to_type(&self) -> AnyType {
self.into()
}
} }
pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> { pub fn naive_type(input: &[u8]) -> IResult<&[u8], NaiveType> {
map( map(
@ -33,7 +35,10 @@ pub struct Parameter<'a> {
pub value: MIMEWord<'a>, pub value: MIMEWord<'a>,
} }
pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> { pub fn parameter(input: &[u8]) -> IResult<&[u8], Parameter> {
map(tuple((mime_atom, tag(b"="), mime_word)), |(name, _, value)| Parameter { name, value })(input) map(
tuple((mime_atom, tag(b"="), mime_word)),
|(name, _, value)| Parameter { name, value },
)(input)
} }
pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> { pub fn parameter_list(input: &[u8]) -> IResult<&[u8], Vec<Parameter>> {
many0(preceded(tag(";"), parameter))(input) many0(preceded(tag(";"), parameter))(input)
@ -60,7 +65,9 @@ impl Default for AnyType {
impl<'a> From<&'a NaiveType<'a>> for AnyType { impl<'a> From<&'a NaiveType<'a>> for AnyType {
fn from(nt: &'a NaiveType<'a>) -> Self { fn from(nt: &'a NaiveType<'a>) -> Self {
match nt.main.to_ascii_lowercase().as_slice() { match nt.main.to_ascii_lowercase().as_slice() {
b"multipart" => Multipart::try_from(nt).map(Self::Multipart).unwrap_or(Self::default()), b"multipart" => Multipart::try_from(nt)
.map(Self::Multipart)
.unwrap_or(Self::default()),
b"message" => Self::Message(Message::from(nt)), b"message" => Self::Message(Message::from(nt)),
b"text" => Self::Text(Text::from(nt)), b"text" => Self::Text(Text::from(nt)),
_ => Self::Binary(Binary::default()), _ => Self::Binary(Binary::default()),
@ -77,7 +84,8 @@ impl<'a> TryFrom<&'a NaiveType<'a>> for Multipart {
type Error = (); type Error = ();
fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> { fn try_from(nt: &'a NaiveType<'a>) -> Result<Self, Self::Error> {
nt.params.iter() nt.params
.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary") .find(|x| x.name.to_ascii_lowercase().as_slice() == b"boundary")
.map(|boundary| Multipart { .map(|boundary| Multipart {
subtype: MultipartSubtype::from(nt), subtype: MultipartSubtype::from(nt),
@ -137,7 +145,9 @@ impl<'a> From<&NaiveType<'a>> for Text {
fn from(nt: &NaiveType<'a>) -> Self { fn from(nt: &NaiveType<'a>) -> Self {
Self { Self {
subtype: TextSubtype::from(nt), subtype: TextSubtype::from(nt),
charset: nt.params.iter() charset: nt
.params
.iter()
.find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset") .find(|x| x.name.to_ascii_lowercase().as_slice() == b"charset")
.map(|x| EmailCharset::from(x.value.to_string().as_bytes())) .map(|x| EmailCharset::from(x.value.to_string().as_bytes()))
.unwrap_or(EmailCharset::US_ASCII), .unwrap_or(EmailCharset::US_ASCII),
@ -175,17 +185,23 @@ mod tests {
fn test_parameter() { fn test_parameter() {
assert_eq!( assert_eq!(
parameter(b"charset=utf-8"), parameter(b"charset=utf-8"),
Ok((&b""[..], Parameter { Ok((
&b""[..],
Parameter {
name: &b"charset"[..], name: &b"charset"[..],
value: MIMEWord::Atom(&b"utf-8"[..]), value: MIMEWord::Atom(&b"utf-8"[..]),
})), }
)),
); );
assert_eq!( assert_eq!(
parameter(b"charset=\"utf-8\""), parameter(b"charset=\"utf-8\""),
Ok((&b""[..], Parameter { Ok((
&b""[..],
Parameter {
name: &b"charset"[..], name: &b"charset"[..],
value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])), value: MIMEWord::Quoted(QuotedString(vec![&b"utf-8"[..]])),
})), }
)),
); );
} }
@ -203,7 +219,6 @@ mod tests {
); );
} }
#[test] #[test]
fn test_content_type_multipart() { fn test_content_type_multipart() {
let (rest, nt) = naive_type(b"multipart/mixed;\r\n\tboundary=\"--==_mimepart_64a3f2c69114f_2a13d020975fe\";\r\n\tcharset=UTF-8").unwrap(); let (rest, nt) = naive_type(b"multipart/mixed;\r\n\tboundary=\"--==_mimepart_64a3f2c69114f_2a13d020975fe\";\r\n\tcharset=UTF-8").unwrap();
@ -222,20 +237,20 @@ mod tests {
let (rest, nt) = naive_type(b"message/rfc822").unwrap(); let (rest, nt) = naive_type(b"message/rfc822").unwrap();
assert_eq!(rest, &[]); assert_eq!(rest, &[]);
assert_eq!( assert_eq!(nt.to_type(), AnyType::Message(Message::RFC822),);
nt.to_type(),
AnyType::Message(Message::RFC822),
);
} }
#[test] #[test]
fn test_parameter_ascii() { fn test_parameter_ascii() {
assert_eq!( assert_eq!(
parameter(b"charset = (simple) us-ascii (Plain text)"), parameter(b"charset = (simple) us-ascii (Plain text)"),
Ok((&b""[..], Parameter { Ok((
&b""[..],
Parameter {
name: &b"charset"[..], name: &b"charset"[..],
value: MIMEWord::Atom(&b"us-ascii"[..]), value: MIMEWord::Atom(&b"us-ascii"[..]),
})) }
))
); );
} }
} }

View file

@ -1,25 +1,29 @@
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{is_not}, bytes::complete::is_not,
multi::many0,
sequence::{pair},
combinator::{map, not, recognize}, combinator::{map, not, recognize},
multi::many0,
sequence::pair,
IResult,
}; };
use crate::mime;
use crate::mime::mime::{AnyMIME};
use crate::rfc5322::{self as imf};
use crate::text::boundary::{Delimiter, boundary};
use crate::text::whitespace::obs_crlf;
use crate::text::ascii::CRLF;
use crate::header::{header, CompFieldList}; use crate::header::{header, CompFieldList};
use crate::mime;
use crate::mime::mime::AnyMIME;
use crate::rfc5322::{self as imf};
use crate::text::ascii::CRLF;
use crate::text::boundary::{boundary, Delimiter};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Multipart<'a>(pub mime::mime::Multipart<'a>, pub Vec<AnyPart<'a>>); pub struct Multipart<'a>(pub mime::mime::Multipart<'a>, pub Vec<AnyPart<'a>>);
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Message<'a>(pub mime::mime::Message<'a>, pub imf::message::Message<'a>, pub Box<AnyPart<'a>>); pub struct Message<'a>(
pub mime::mime::Message<'a>,
pub imf::message::Message<'a>,
pub Box<AnyPart<'a>>,
);
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Text<'a>(pub mime::mime::Text<'a>, pub &'a [u8]); pub struct Text<'a>(pub mime::mime::Text<'a>, pub &'a [u8]);
@ -68,9 +72,18 @@ impl<'a> MixedField<'a> {
impl<'a> CompFieldList<'a, MixedField<'a>> { impl<'a> CompFieldList<'a, MixedField<'a>> {
pub fn sections(self) -> (mime::mime::AnyMIME<'a>, imf::message::Message<'a>) { pub fn sections(self) -> (mime::mime::AnyMIME<'a>, imf::message::Message<'a>) {
let k = self.known(); let k = self.known();
let (v1, v2): (Vec<MixedField>, Vec<MixedField>) = k.into_iter().partition(|v| v.mime().is_some()); let (v1, v2): (Vec<MixedField>, Vec<MixedField>) =
let mime = v1.into_iter().map(|v| v.to_mime()).flatten().collect::<mime::mime::AnyMIME>(); k.into_iter().partition(|v| v.mime().is_some());
let imf = v2.into_iter().map(|v| v.to_imf()).flatten().collect::<imf::message::Message>(); let mime = v1
.into_iter()
.map(|v| v.to_mime())
.flatten()
.collect::<mime::mime::AnyMIME>();
let imf = v2
.into_iter()
.map(|v| v.to_imf())
.flatten()
.collect::<imf::message::Message>();
(mime, imf) (mime, imf)
} }
} }
@ -81,7 +94,9 @@ pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> {
))(input) ))(input)
} }
pub fn message<'a>(m: mime::mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> { pub fn message<'a>(
m: mime::mime::Message<'a>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
move |input: &[u8]| { move |input: &[u8]| {
let (input, fields) = header(mixed_field)(input)?; let (input, fields) = header(mixed_field)(input)?;
let (in_mime, imf) = fields.sections(); let (in_mime, imf) = fields.sections();
@ -92,7 +107,9 @@ pub fn message<'a>(m: mime::mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&
} }
} }
pub fn multipart<'a>(m: mime::mime::Multipart<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> { pub fn multipart<'a>(
m: mime::mime::Multipart<'a>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> {
let m = m.clone(); let m = m.clone();
move |input| { move |input| {
@ -134,7 +151,6 @@ pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> {
} }
} }
pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> + '_ { pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> + '_ {
move |input| { move |input| {
recognize(many0(pair( recognize(many0(pair(
@ -147,14 +163,15 @@ pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::text::encoding::{Base64Word, EncodedWord, QuotedChunk, QuotedWord};
use crate::text::misc_token::{Phrase, UnstrToken, Unstructured, Word};
use chrono::{FixedOffset, TimeZone}; use chrono::{FixedOffset, TimeZone};
use crate::text::misc_token::{Word, Phrase, Unstructured, UnstrToken};
use crate::text::encoding::{EncodedWord, QuotedChunk, Base64Word, QuotedWord};
#[test] #[test]
fn test_preamble() { fn test_preamble() {
assert_eq!( assert_eq!(
part_raw(b"hello")(b"blip part_raw(b"hello")(
b"blip
bloup bloup
blip blip
@ -164,7 +181,8 @@ bloup--
--hello --hello
Field: Body Field: Body
"), "
),
Ok(( Ok((
&b"\n--hello\nField: Body\n"[..], &b"\n--hello\nField: Body\n"[..],
&b"blip\nbloup\n\nblip\nbloup--\n--bim\n--bim--\n"[..], &b"blip\nbloup\n\nblip\nbloup--\n--bim\n--bim--\n"[..],
@ -292,7 +310,8 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
</div> </div>
--b1_e376dc71bafc953c0b0fdeb9983a9956-- --b1_e376dc71bafc953c0b0fdeb9983a9956--
"#.as_bytes(); "#
.as_bytes();
let base_mime = mime::mime::Message::default(); let base_mime = mime::mime::Message::default();
assert_eq!( assert_eq!(

View file

@ -141,21 +141,22 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
} }
pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> { pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
map( map(opt(alt((address_list, address_list_cfws))), |v| {
opt(alt((address_list, address_list_cfws))), v.unwrap_or(vec![])
|v| v.unwrap_or(vec![]), })(input)
)(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::text::misc_token::{Phrase, Word};
use crate::rfc5322::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken}; use crate::rfc5322::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken};
use crate::text::misc_token::{Phrase, Word};
#[test] #[test]
fn test_mailbox_list() { fn test_mailbox_list() {
match mailbox_list(r#"Pete(A nice \) chap) <pete(his account)@silly.test(his host)>"#.as_bytes()) { match mailbox_list(
r#"Pete(A nice \) chap) <pete(his account)@silly.test(his host)>"#.as_bytes(),
) {
Ok((rest, _)) => assert_eq!(&b""[..], rest), Ok((rest, _)) => assert_eq!(&b""[..], rest),
_ => panic!(), _ => panic!(),
}; };

View file

@ -145,7 +145,13 @@ fn strict_year(input: &[u8]) -> IResult<&[u8], i32> {
fws, fws,
map( map(
terminated(take_while_m_n(4, 9, |c| c >= 0x30 && c <= 0x39), digit0), terminated(take_while_m_n(4, 9, |c| c >= 0x30 && c <= 0x39), digit0),
|d: &[u8]| encoding_rs::UTF_8.decode_without_bom_handling(d).0.parse::<i32>().unwrap_or(0), |d: &[u8]| {
encoding_rs::UTF_8
.decode_without_bom_handling(d)
.0
.parse::<i32>()
.unwrap_or(0)
},
), ),
fws, fws,
)(input) )(input)
@ -225,8 +231,10 @@ fn strict_zone(input: &[u8]) -> IResult<&[u8], Option<FixedOffset>> {
take_while_m_n(2, 2, |c| c >= 0x30 && c <= 0x39), take_while_m_n(2, 2, |c| c >= 0x30 && c <= 0x39),
)), )),
|(_, op, dig_zone_hour, dig_zone_min)| { |(_, op, dig_zone_hour, dig_zone_min)| {
let zone_hour: i32 = ((dig_zone_hour[0] - 0x30) * 10 + (dig_zone_hour[1] - 0x30)) as i32 * HOUR; let zone_hour: i32 =
let zone_min: i32 = ((dig_zone_min[0] - 0x30) * 10 + (dig_zone_min[1] - 0x30)) as i32 * MIN; ((dig_zone_hour[0] - 0x30) * 10 + (dig_zone_hour[1] - 0x30)) as i32 * HOUR;
let zone_min: i32 =
((dig_zone_min[0] - 0x30) * 10 + (dig_zone_min[1] - 0x30)) as i32 * MIN;
match op { match op {
b"+" => FixedOffset::east_opt(zone_hour + zone_min), b"+" => FixedOffset::east_opt(zone_hour + zone_min),
b"-" => FixedOffset::west_opt(zone_hour + zone_min), b"-" => FixedOffset::west_opt(zone_hour + zone_min),
@ -298,7 +306,6 @@ fn obs_zone(input: &[u8]) -> IResult<&[u8], Option<FixedOffset>> {
value(FixedOffset::east_opt(11 * HOUR), tag_no_case(b"L")), value(FixedOffset::east_opt(11 * HOUR), tag_no_case(b"L")),
value(FixedOffset::east_opt(12 * HOUR), tag_no_case(b"M")), value(FixedOffset::east_opt(12 * HOUR), tag_no_case(b"M")),
)), )),
// Military Timezones West // Military Timezones West
alt(( alt((
value(FixedOffset::west_opt(1 * HOUR), tag_no_case(b"N")), value(FixedOffset::west_opt(1 * HOUR), tag_no_case(b"N")),
@ -314,7 +321,6 @@ fn obs_zone(input: &[u8]) -> IResult<&[u8], Option<FixedOffset>> {
value(FixedOffset::west_opt(11 * HOUR), tag_no_case(b"X")), value(FixedOffset::west_opt(11 * HOUR), tag_no_case(b"X")),
value(FixedOffset::west_opt(12 * HOUR), tag_no_case(b"Y")), value(FixedOffset::west_opt(12 * HOUR), tag_no_case(b"Y")),
)), )),
// Unknown timezone // Unknown timezone
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1), value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
)), )),
@ -367,7 +373,8 @@ mod tests {
Feb Feb
1969 1969
23:32 23:32
-0330 (Newfoundland Time)"#.as_bytes() -0330 (Newfoundland Time)"#
.as_bytes()
), ),
Ok(( Ok((
&b""[..], &b""[..],

View file

@ -1,21 +1,21 @@
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
combinator::map, combinator::map,
sequence::{preceded, terminated}, sequence::{preceded, terminated},
IResult,
}; };
use crate::text::whitespace::{obs_crlf};
use crate::rfc5322::address::{AddressList, address_list, nullable_address_list, mailbox_list};
use crate::rfc5322::datetime::section as date;
use crate::rfc5322::mailbox::{MailboxRef, MailboxList, AddrSpec, mailbox};
use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list};
use crate::rfc5322::trace::{ReceivedLog, return_path, received_log};
use crate::rfc5322::mime::{Version, version};
use crate::rfc5322::message::Message;
use crate::header::{field_name, header}; use crate::header::{field_name, header};
use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list}; use crate::rfc5322::address::{address_list, mailbox_list, nullable_address_list, AddressList};
use crate::rfc5322::datetime::section as date;
use crate::rfc5322::identification::{msg_id, msg_list, MessageID, MessageIDList};
use crate::rfc5322::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
use crate::rfc5322::message::Message;
use crate::rfc5322::mime::{version, Version};
use crate::rfc5322::trace::{received_log, return_path, ReceivedLog};
use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured};
use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Field<'a> { pub enum Field<'a> {
@ -50,7 +50,6 @@ pub enum Field<'a> {
MIMEVersion(Version), MIMEVersion(Version),
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct FieldList<'a>(pub Vec<Field<'a>>); pub struct FieldList<'a>(pub Vec<Field<'a>>);
impl<'a> FieldList<'a> { impl<'a> FieldList<'a> {
@ -60,30 +59,33 @@ impl<'a> FieldList<'a> {
} }
pub fn field(input: &[u8]) -> IResult<&[u8], Field> { pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
terminated(alt(( terminated(
alt((
preceded(field_name(b"date"), map(date, Field::Date)), preceded(field_name(b"date"), map(date, Field::Date)),
preceded(field_name(b"from"), map(mailbox_list, Field::From)), preceded(field_name(b"from"), map(mailbox_list, Field::From)),
preceded(field_name(b"sender"), map(mailbox, Field::Sender)), preceded(field_name(b"sender"), map(mailbox, Field::Sender)),
preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)), preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)),
preceded(field_name(b"to"), map(address_list, Field::To)), preceded(field_name(b"to"), map(address_list, Field::To)),
preceded(field_name(b"cc"), map(address_list, Field::Cc)), preceded(field_name(b"cc"), map(address_list, Field::Cc)),
preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)), preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)),
preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)), preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)),
preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)), preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)),
preceded(field_name(b"references"), map(msg_list, Field::References)), preceded(field_name(b"references"), map(msg_list, Field::References)),
preceded(field_name(b"subject"), map(unstructured, Field::Subject)), preceded(field_name(b"subject"), map(unstructured, Field::Subject)),
preceded(field_name(b"comments"), map(unstructured, Field::Comments)), preceded(field_name(b"comments"), map(unstructured, Field::Comments)),
preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)), preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)),
preceded(
preceded(field_name(b"return-path"), map(return_path, Field::ReturnPath)), field_name(b"return-path"),
map(return_path, Field::ReturnPath),
),
preceded(field_name(b"received"), map(received_log, Field::Received)), preceded(field_name(b"received"), map(received_log, Field::Received)),
preceded(
preceded(field_name(b"mime-version"), map(version, Field::MIMEVersion)), field_name(b"mime-version"),
)), obs_crlf)(input) map(version, Field::MIMEVersion),
),
)),
obs_crlf,
)(input)
} }
pub fn message(input: &[u8]) -> IResult<&[u8], Message> { pub fn message(input: &[u8]) -> IResult<&[u8], Message> {
@ -93,10 +95,10 @@ pub fn message(input: &[u8]) -> IResult<&[u8], Message> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use chrono::{FixedOffset, TimeZone};
use crate::rfc5322::mailbox::*;
use crate::rfc5322::address::*; use crate::rfc5322::address::*;
use crate::rfc5322::mailbox::*;
use crate::text::misc_token::*; use crate::text::misc_token::*;
use chrono::{FixedOffset, TimeZone};
#[test] #[test]
fn test_header() { fn test_header() {

View file

@ -11,7 +11,6 @@ use crate::rfc5322::mailbox::is_dtext;
use crate::text::whitespace::cfws; use crate::text::whitespace::cfws;
use crate::text::words::dot_atom_text; use crate::text::words::dot_atom_text;
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub struct MessageID<'a> { pub struct MessageID<'a> {
pub left: &'a [u8], pub left: &'a [u8],

View file

@ -7,10 +7,10 @@ use nom::{
IResult, IResult,
}; };
use crate::text::misc_token::{phrase, word, Word, Phrase};
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::words::{atom};
use crate::text::ascii; use crate::text::ascii;
use crate::text::misc_token::{phrase, word, Phrase, Word};
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::words::atom;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct AddrSpec<'a> { pub struct AddrSpec<'a> {
@ -19,7 +19,11 @@ pub struct AddrSpec<'a> {
} }
impl<'a> AddrSpec<'a> { impl<'a> AddrSpec<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
format!("{}@{}", self.local_part.to_string(), self.domain.to_string()) format!(
"{}@{}",
self.local_part.to_string(),
self.domain.to_string()
)
} }
} }
@ -33,7 +37,7 @@ impl<'a> MailboxRef<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match &self.name { match &self.name {
Some(n) => format!("{} <{}>", n.to_string(), self.addrspec.to_string()), Some(n) => format!("{} <{}>", n.to_string(), self.addrspec.to_string()),
None => self.addrspec.to_string() None => self.addrspec.to_string(),
} }
} }
} }
@ -96,7 +100,8 @@ fn obs_domain_list(input: &[u8]) -> IResult<&[u8], Vec<Option<Domain>>> {
separated_list1( separated_list1(
tag(&[ascii::COMMA]), tag(&[ascii::COMMA]),
preceded(many0(cfws), opt(preceded(tag(&[ascii::AT]), obs_domain))), preceded(many0(cfws), opt(preceded(tag(&[ascii::AT]), obs_domain))),
))(input) ),
)(input)
} }
/// AddrSpec /// AddrSpec
@ -129,16 +134,13 @@ pub struct LocalPart<'a>(pub Vec<LocalPartToken<'a>>);
impl<'a> LocalPart<'a> { impl<'a> LocalPart<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
self.0.iter().fold( self.0.iter().fold(String::new(), |mut acc, token| {
String::new(),
|mut acc, token| {
match token { match token {
LocalPartToken::Dot => acc.push('.'), LocalPartToken::Dot => acc.push('.'),
LocalPartToken::Word(v) => acc.push_str(v.to_string().as_ref()), LocalPartToken::Word(v) => acc.push_str(v.to_string().as_ref()),
} }
acc acc
} })
)
} }
} }
@ -173,9 +175,27 @@ pub enum Domain<'a> {
impl<'a> Domain<'a> { impl<'a> Domain<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match self { match self {
Domain::Atoms(v) => v.iter().map(|v| encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string()).collect::<Vec<String>>().join("."), Domain::Atoms(v) => v
.iter()
.map(|v| {
encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string()
})
.collect::<Vec<String>>()
.join("."),
Domain::Litteral(v) => { Domain::Litteral(v) => {
let inner = v.iter().map(|v| encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string()).collect::<Vec<String>>().join(" "); let inner = v
.iter()
.map(|v| {
encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string()
})
.collect::<Vec<String>>()
.join(" ");
format!("[{}]", inner) format!("[{}]", inner)
} }
} }
@ -257,7 +277,10 @@ mod tests {
"jsmith@[192.168.2.1]".to_string(), "jsmith@[192.168.2.1]".to_string(),
); );
assert_eq!( assert_eq!(
addr_spec(b"jsmith@[IPv6:2001:db8::1]").unwrap().1.to_string(), addr_spec(b"jsmith@[IPv6:2001:db8::1]")
.unwrap()
.1
.to_string(),
"jsmith@[IPv6:2001:db8::1]".to_string(), "jsmith@[IPv6:2001:db8::1]".to_string(),
); );
@ -276,12 +299,18 @@ mod tests {
// ASCII Edge cases // ASCII Edge cases
assert_eq!( assert_eq!(
addr_spec(b"user+mailbox/department=shipping@example.com").unwrap().1.to_string(), addr_spec(b"user+mailbox/department=shipping@example.com")
.unwrap()
.1
.to_string(),
"user+mailbox/department=shipping@example.com".to_string(), "user+mailbox/department=shipping@example.com".to_string(),
); );
assert_eq!( assert_eq!(
addr_spec(b"!#$%&'*+-/=?^_`.{|}~@example.com").unwrap().1.to_string(), addr_spec(b"!#$%&'*+-/=?^_`.{|}~@example.com")
.unwrap()
.1
.to_string(),
"!#$%&'*+-/=?^_`.{|}~@example.com".to_string(), "!#$%&'*+-/=?^_`.{|}~@example.com".to_string(),
); );
@ -290,7 +319,9 @@ mod tests {
Ok(( Ok((
&b""[..], &b""[..],
AddrSpec { AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(vec![b"Abc@def"])))]), local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![b"Abc@def"]
)))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
} }
)) ))
@ -300,7 +331,9 @@ mod tests {
Ok(( Ok((
&b""[..], &b""[..],
AddrSpec { AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(vec![b"Fred", b" ", b"Bloggs"])))]), local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![b"Fred", b" ", b"Bloggs"]
)))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
} }
)) ))
@ -310,7 +343,9 @@ mod tests {
Ok(( Ok((
&b""[..], &b""[..],
AddrSpec { AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(vec![b"Joe.", &[ascii::BACKSLASH], b"Blow"])))]), local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(QuotedString(
vec![b"Joe.", &[ascii::BACKSLASH], b"Blow"]
)))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
} }
)) ))
@ -324,7 +359,13 @@ mod tests {
Ok(( Ok((
&b""[..], &b""[..],
MailboxRef { MailboxRef {
name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![&b"Joe"[..], &[ascii::SP], &b"Q."[..], &[ascii::SP], &b"Public"[..]]))])), name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![
&b"Joe"[..],
&[ascii::SP],
&b"Q."[..],
&[ascii::SP],
&b"Public"[..]
]))])),
addrspec: AddrSpec { addrspec: AddrSpec {
local_part: LocalPart(vec![ local_part: LocalPart(vec![
LocalPartToken::Word(Word::Atom(&b"john"[..])), LocalPartToken::Word(Word::Atom(&b"john"[..])),
@ -344,7 +385,10 @@ mod tests {
Ok(( Ok((
&b""[..], &b""[..],
MailboxRef { MailboxRef {
name: Some(Phrase(vec![Word::Atom(&b"Mary"[..]), Word::Atom(&b"Smith"[..])])), name: Some(Phrase(vec![
Word::Atom(&b"Mary"[..]),
Word::Atom(&b"Smith"[..])
])),
addrspec: AddrSpec { addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"mary"[..]))]), local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"mary"[..]))]),
domain: Domain::Atoms(vec![&b"x"[..], &b"test"[..]]), domain: Domain::Atoms(vec![&b"x"[..], &b"test"[..]]),
@ -410,7 +454,9 @@ mod tests {
&b"Box"[..] &b"Box"[..]
]))])), ]))])),
addrspec: AddrSpec { addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"sysservices"[..]))]), local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(
&b"sysservices"[..]
))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"net"[..]]), domain: Domain::Atoms(vec![&b"example"[..], &b"net"[..]]),
} }
} }
@ -428,19 +474,27 @@ mod tests {
@33+4.com,,,, @33+4.com,,,,
,,,, ,,,,
(again) (again)
@example.com,@yep.com,@a,@b,,,@c"#.as_bytes() @example.com,@yep.com,@a,@b,,,@c"#
.as_bytes()
), ),
Ok(( Ok((
&b""[..], &b""[..],
vec![ vec![
None, None,
Some(Domain::Atoms(vec![&b"33+4"[..], &b"com"[..]])), Some(Domain::Atoms(vec![&b"33+4"[..], &b"com"[..]])),
None, None, None, None, None, None, None, None,
None,
None,
None,
None,
None,
None,
Some(Domain::Atoms(vec![&b"example"[..], &b"com"[..]])), Some(Domain::Atoms(vec![&b"example"[..], &b"com"[..]])),
Some(Domain::Atoms(vec![&b"yep"[..], &b"com"[..]])), Some(Domain::Atoms(vec![&b"yep"[..], &b"com"[..]])),
Some(Domain::Atoms(vec![&b"a"[..]])), Some(Domain::Atoms(vec![&b"a"[..]])),
Some(Domain::Atoms(vec![&b"b"[..]])), Some(Domain::Atoms(vec![&b"b"[..]])),
None, None, None,
None,
Some(Domain::Atoms(vec![&b"c"[..]])), Some(Domain::Atoms(vec![&b"c"[..]])),
] ]
)) ))
@ -505,17 +559,18 @@ mod tests {
#[test] #[test]
fn test_enron4() { fn test_enron4() {
assert_eq!( assert_eq!(
mailbox(r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#.as_bytes()), mailbox(
r#"<"mark_kopinski/intl/acim/americancentury"@americancentury.com@enron.com>"#
.as_bytes()
),
Ok(( Ok((
&b""[..], &b""[..],
MailboxRef { MailboxRef {
name: None, name: None,
addrspec: AddrSpec { addrspec: AddrSpec {
local_part: LocalPart(vec![ local_part: LocalPart(vec![LocalPartToken::Word(Word::Quoted(
LocalPartToken::Word(Word::Quoted(QuotedString(vec![ QuotedString(vec![&b"mark_kopinski/intl/acim/americancentury"[..],])
&b"mark_kopinski/intl/acim/americancentury"[..], ))]),
])))
]),
domain: Domain::Atoms(vec![&b"americancentury"[..], &b"com"[..]]), domain: Domain::Atoms(vec![&b"americancentury"[..], &b"com"[..]]),
} }
} }

View file

@ -1,10 +1,10 @@
use crate::text::misc_token::{PhraseList, Unstructured}; use crate::rfc5322::address::AddressRef;
use crate::rfc5322::mime::Version;
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
use crate::rfc5322::address::{AddressRef};
use crate::rfc5322::identification::{MessageID};
use crate::rfc5322::field::Field; use crate::rfc5322::field::Field;
use crate::rfc5322::identification::MessageID;
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
use crate::rfc5322::mime::Version;
use crate::rfc5322::trace::ReceivedLog; use crate::rfc5322::trace::ReceivedLog;
use crate::text::misc_token::{PhraseList, Unstructured};
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
@ -45,9 +45,8 @@ pub struct Message<'a> {
// it may result in missing data or silently overriden data. // it may result in missing data or silently overriden data.
impl<'a> FromIterator<Field<'a>> for Message<'a> { impl<'a> FromIterator<Field<'a>> for Message<'a> {
fn from_iter<I: IntoIterator<Item = Field<'a>>>(iter: I) -> Self { fn from_iter<I: IntoIterator<Item = Field<'a>>>(iter: I) -> Self {
iter.into_iter().fold( iter.into_iter()
Message::default(), .fold(Message::default(), |mut section, field| {
|mut section, field| {
match field { match field {
Field::Date(v) => section.date = v, Field::Date(v) => section.date = v,
Field::From(v) => section.from.extend(v), Field::From(v) => section.from.extend(v),
@ -67,7 +66,6 @@ impl<'a> FromIterator<Field<'a>> for Message<'a> {
Field::MIMEVersion(v) => section.mime_version = Some(v), Field::MIMEVersion(v) => section.mime_version = Some(v),
}; };
section section
} })
)
} }
} }

View file

@ -1,8 +1,8 @@
use nom::{ use nom::{
IResult,
sequence::tuple,
bytes::complete::{tag, take}, bytes::complete::{tag, take},
combinator::{map, opt, verify}, combinator::{map, opt, verify},
sequence::tuple,
IResult,
}; };
use crate::text::ascii; use crate::text::ascii;
@ -41,7 +41,10 @@ mod tests {
#[test] #[test]
fn test_version() { fn test_version() {
assert_eq!(version(b"1.0"), Ok((&b""[..], Version { major: 1, minor: 0 })),); assert_eq!(
version(b"1.0"),
Ok((&b""[..], Version { major: 1, minor: 0 })),
);
assert_eq!( assert_eq!(
version(b" 1.0 (produced by MetaSend Vx.x)"), version(b" 1.0 (produced by MetaSend Vx.x)"),

View file

@ -1,8 +1,8 @@
pub mod mailbox;
pub mod address; pub mod address;
pub mod datetime; pub mod datetime;
pub mod trace;
pub mod identification;
pub mod mime;
pub mod field; pub mod field;
pub mod identification;
pub mod mailbox;
pub mod message; pub mod message;
pub mod mime;
pub mod trace;

View file

@ -1,21 +1,21 @@
use chrono::{DateTime, FixedOffset};
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{is_a, tag}, bytes::complete::{is_a, tag},
combinator::{map, opt, not}, combinator::{map, not, opt},
multi::many0, multi::many0,
sequence::{tuple, terminated}, sequence::{terminated, tuple},
IResult, IResult,
}; };
use chrono::{DateTime, FixedOffset};
use crate::rfc5322::{datetime, mailbox}; use crate::rfc5322::{datetime, mailbox};
use crate::text::{ascii, whitespace, misc_token }; use crate::text::{ascii, misc_token, whitespace};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum ReceivedLogToken<'a> { pub enum ReceivedLogToken<'a> {
Addr(mailbox::AddrSpec<'a>), Addr(mailbox::AddrSpec<'a>),
Domain(mailbox::Domain<'a>), Domain(mailbox::Domain<'a>),
Word(misc_token::Word<'a>) Word(misc_token::Word<'a>),
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -37,12 +37,11 @@ impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> { pub fn received_log(input: &[u8]) -> IResult<&[u8], ReceivedLog> {
map( map(
tuple(( tuple((many0(received_tokens), tag(";"), datetime::section)),
many0(received_tokens), |(tokens, _, dt)| ReceivedLog {
tag(";"), log: tokens,
datetime::section, date: dt,
)), },
|(tokens, _, dt)| ReceivedLog { log: tokens, date: dt } ,
)(input) )(input)
} }
@ -63,7 +62,10 @@ fn empty_path(input: &[u8]) -> IResult<&[u8], Option<mailbox::AddrSpec>> {
fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> { fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> {
alt(( alt((
terminated(map(misc_token::word, |x| ReceivedLogToken::Word(x)), not(is_a([ascii::PERIOD, ascii::AT]))), terminated(
map(misc_token::word, |x| ReceivedLogToken::Word(x)),
not(is_a([ascii::PERIOD, ascii::AT])),
),
map(mailbox::angle_addr, |x| ReceivedLogToken::Addr(x)), map(mailbox::angle_addr, |x| ReceivedLogToken::Addr(x)),
map(mailbox::addr_spec, |x| ReceivedLogToken::Addr(x)), map(mailbox::addr_spec, |x| ReceivedLogToken::Addr(x)),
map(mailbox::obs_domain, |x| ReceivedLogToken::Domain(x)), map(mailbox::obs_domain, |x| ReceivedLogToken::Domain(x)),
@ -73,8 +75,8 @@ fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use chrono::TimeZone;
use crate::rfc5322::trace::misc_token::Word; use crate::rfc5322::trace::misc_token::Word;
use chrono::TimeZone;
#[test] #[test]
fn test_received_body() { fn test_received_body() {
@ -82,17 +84,27 @@ mod tests {
by server with LMTP by server with LMTP
id xxxxxxxxx id xxxxxxxxx
(envelope-from <gitlab@example.com>) (envelope-from <gitlab@example.com>)
for <me@example.com>; Tue, 13 Jun 2023 19:01:08 +0000"#.as_bytes(); for <me@example.com>; Tue, 13 Jun 2023 19:01:08 +0000"#
.as_bytes();
assert_eq!( assert_eq!(
received_log(hdrs), received_log(hdrs),
Ok(( Ok((
&b""[..], &b""[..],
ReceivedLog { ReceivedLog {
date: Some(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2023, 06, 13, 19, 1, 8).unwrap()), date: Some(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(2023, 06, 13, 19, 1, 8)
.unwrap()
),
log: vec![ log: vec![
ReceivedLogToken::Word(Word::Atom(&b"from"[..])), ReceivedLogToken::Word(Word::Atom(&b"from"[..])),
ReceivedLogToken::Domain(mailbox::Domain::Atoms(vec![&b"smtp"[..], &b"example"[..], &b"com"[..]])), ReceivedLogToken::Domain(mailbox::Domain::Atoms(vec![
&b"smtp"[..],
&b"example"[..],
&b"com"[..]
])),
ReceivedLogToken::Word(Word::Atom(&b"by"[..])), ReceivedLogToken::Word(Word::Atom(&b"by"[..])),
ReceivedLogToken::Word(Word::Atom(&b"server"[..])), ReceivedLogToken::Word(Word::Atom(&b"server"[..])),
ReceivedLogToken::Word(Word::Atom(&b"with"[..])), ReceivedLogToken::Word(Word::Atom(&b"with"[..])),
@ -101,7 +113,9 @@ mod tests {
ReceivedLogToken::Word(Word::Atom(&b"xxxxxxxxx"[..])), ReceivedLogToken::Word(Word::Atom(&b"xxxxxxxxx"[..])),
ReceivedLogToken::Word(Word::Atom(&b"for"[..])), ReceivedLogToken::Word(Word::Atom(&b"for"[..])),
ReceivedLogToken::Addr(mailbox::AddrSpec { ReceivedLogToken::Addr(mailbox::AddrSpec {
local_part: mailbox::LocalPart(vec![mailbox::LocalPartToken::Word(Word::Atom(&b"me"[..]))]), local_part: mailbox::LocalPart(vec![mailbox::LocalPartToken::Word(
Word::Atom(&b"me"[..])
)]),
domain: mailbox::Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), domain: mailbox::Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}) })
], ],

View file

@ -20,7 +20,7 @@ pub const DLE: u8 = 0x10;
pub const DC1: u8 = 0x11; pub const DC1: u8 = 0x11;
pub const DC2: u8 = 0x12; pub const DC2: u8 = 0x12;
pub const DC3: u8 = 0x13; pub const DC3: u8 = 0x13;
pub const DC4 : u8 = 0x14; pub const DC4: u8 = 0x14;
pub const NAK: u8 = 0x15; pub const NAK: u8 = 0x15;
pub const SYN: u8 = 0x16; pub const SYN: u8 = 0x16;
pub const ETB: u8 = 0x17; pub const ETB: u8 = 0x17;

View file

@ -1,21 +1,22 @@
use nom::{ use nom::{bytes::complete::tag, combinator::opt, sequence::tuple, IResult};
IResult,
bytes::complete::tag,
sequence::tuple,
combinator::opt,
};
use crate::text::whitespace::obs_crlf; use crate::text::whitespace::obs_crlf;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Delimiter { pub enum Delimiter {
Next, Next,
Last Last,
} }
pub fn boundary<'a>(boundary: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> + '_ { pub fn boundary<'a>(boundary: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> + '_ {
move |input: &[u8]| { move |input: &[u8]| {
let (rest, (_, _, _, last, _)) = tuple((opt(obs_crlf), tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?; let (rest, (_, _, _, last, _)) = tuple((
opt(obs_crlf),
tag(b"--"),
tag(boundary),
opt(tag(b"--")),
opt(obs_crlf),
))(input)?;
match last { match last {
Some(_) => Ok((rest, Delimiter::Last)), Some(_) => Ok((rest, Delimiter::Last)),
None => Ok((rest, Delimiter::Next)), None => Ok((rest, Delimiter::Next)),

View file

@ -1,19 +1,19 @@
use encoding_rs::Encoding; use encoding_rs::Encoding;
use base64::{engine::general_purpose, Engine as _};
use nom::{ use nom::{
IResult,
branch::alt, branch::alt,
bytes::complete::{tag, take, take_while1, take_while}, bytes::complete::{tag, take, take_while, take_while1},
character::complete::{one_of}, character::complete::one_of,
character::is_alphanumeric, character::is_alphanumeric,
combinator::map, combinator::map,
sequence::{preceded, terminated, tuple},
multi::many0, multi::many0,
sequence::{preceded, terminated, tuple},
IResult,
}; };
use base64::{Engine as _, engine::general_purpose};
use crate::text::words;
use crate::text::ascii; use crate::text::ascii;
use crate::text::words;
pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> { pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
alt((encoded_word_quoted, encoded_word_base64))(input) alt((encoded_word_quoted, encoded_word_base64))(input)
@ -21,29 +21,43 @@ pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> { pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple(( let (rest, (_, charset, _, _, _, txt, _)) = tuple((
tag("=?"), words::mime_atom, tag("=?"),
tag("?"), one_of("Qq"), words::mime_atom,
tag("?"), ptext, tag("?"),
tag("?=")))(input)?; one_of("Qq"),
tag("?"),
ptext,
tag("?="),
))(input)?;
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252); let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = EncodedWord::Quoted(QuotedWord { enc: renc, chunks: txt }); let parsed = EncodedWord::Quoted(QuotedWord {
enc: renc,
chunks: txt,
});
Ok((rest, parsed)) Ok((rest, parsed))
} }
pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> { pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple(( let (rest, (_, charset, _, _, _, txt, _)) = tuple((
tag("=?"), words::mime_atom, tag("=?"),
tag("?"), one_of("Bb"), words::mime_atom,
tag("?"), btext, tag("?"),
tag("?=")))(input)?; one_of("Bb"),
tag("?"),
btext,
tag("?="),
))(input)?;
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252); let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = EncodedWord::Base64(Base64Word { enc: renc, content: txt }); let parsed = EncodedWord::Base64(Base64Word {
enc: renc,
content: txt,
});
Ok((rest, parsed)) Ok((rest, parsed))
} }
#[derive(PartialEq,Debug, Clone)] #[derive(PartialEq, Debug, Clone)]
pub enum EncodedWord<'a> { pub enum EncodedWord<'a> {
Quoted(QuotedWord<'a>), Quoted(QuotedWord<'a>),
Base64(Base64Word<'a>), Base64(Base64Word<'a>),
@ -57,7 +71,7 @@ impl<'a> EncodedWord<'a> {
} }
} }
#[derive(PartialEq,Debug,Clone)] #[derive(PartialEq, Debug, Clone)]
pub struct Base64Word<'a> { pub struct Base64Word<'a> {
pub enc: &'static Encoding, pub enc: &'static Encoding,
pub content: &'a [u8], pub content: &'a [u8],
@ -72,7 +86,7 @@ impl<'a> Base64Word<'a> {
} }
} }
#[derive(PartialEq,Debug,Clone)] #[derive(PartialEq, Debug, Clone)]
pub struct QuotedWord<'a> { pub struct QuotedWord<'a> {
pub enc: &'static Encoding, pub enc: &'static Encoding,
pub chunks: Vec<QuotedChunk<'a>>, pub chunks: Vec<QuotedChunk<'a>>,
@ -80,9 +94,7 @@ pub struct QuotedWord<'a> {
impl<'a> QuotedWord<'a> { impl<'a> QuotedWord<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
self.chunks.iter().fold( self.chunks.iter().fold(String::new(), |mut acc, c| {
String::new(),
|mut acc, c| {
match c { match c {
QuotedChunk::Safe(v) => { QuotedChunk::Safe(v) => {
let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v); let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
@ -93,14 +105,14 @@ impl<'a> QuotedWord<'a> {
let w = &[*v]; let w = &[*v];
let (d, _) = self.enc.decode_without_bom_handling(w); let (d, _) = self.enc.decode_without_bom_handling(w);
acc.push_str(d.as_ref()); acc.push_str(d.as_ref());
}, }
}; };
acc acc
}) })
} }
} }
#[derive(PartialEq,Debug,Clone)] #[derive(PartialEq, Debug, Clone)]
pub enum QuotedChunk<'a> { pub enum QuotedChunk<'a> {
Safe(&'a [u8]), Safe(&'a [u8]),
Encoded(u8), Encoded(u8),
@ -112,12 +124,10 @@ pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
many0(alt((safe_char2, encoded_space, hex_octet)))(input) many0(alt((safe_char2, encoded_space, hex_octet)))(input)
} }
fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> { fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input) map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
} }
/// RFC2047 section 4.2 /// RFC2047 section 4.2
/// 8-bit values which correspond to printable ASCII characters other /// 8-bit values which correspond to printable ASCII characters other
/// than "=", "?", and "_" (underscore), MAY be represented as those /// than "=", "?", and "_" (underscore), MAY be represented as those
@ -167,7 +177,9 @@ mod tests {
fn test_ptext() { fn test_ptext() {
assert_eq!( assert_eq!(
ptext(b"Accus=E9_de_r=E9ception_(affich=E9)"), ptext(b"Accus=E9_de_r=E9ception_(affich=E9)"),
Ok((&b""[..], vec![ Ok((
&b""[..],
vec![
QuotedChunk::Safe(&b"Accus"[..]), QuotedChunk::Safe(&b"Accus"[..]),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(0xe9),
QuotedChunk::Space, QuotedChunk::Space,
@ -180,15 +192,18 @@ mod tests {
QuotedChunk::Safe(&b"(affich"[..]), QuotedChunk::Safe(&b"(affich"[..]),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe(&b")"[..]), QuotedChunk::Safe(&b")"[..]),
])) ]
))
); );
} }
#[test] #[test]
fn test_decode_word() { fn test_decode_word() {
assert_eq!( assert_eq!(
encoded_word(b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?=").unwrap().1.to_string(), encoded_word(b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?=")
.unwrap()
.1
.to_string(),
"Accusé de réception (affiché)".to_string(), "Accusé de réception (affiché)".to_string(),
); );
} }
@ -197,7 +212,10 @@ mod tests {
#[test] #[test]
fn test_decode_word_b64() { fn test_decode_word_b64() {
assert_eq!( assert_eq!(
encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=").unwrap().1.to_string(), encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=")
.unwrap()
.1
.to_string(),
"If you can read this yo".to_string(), "If you can read this yo".to_string(),
); );
} }

View file

@ -4,16 +4,16 @@ use nom::{
character::complete::space0, character::complete::space0,
combinator::{map, opt}, combinator::{map, opt},
multi::{many0, many1, separated_list1}, multi::{many0, many1, separated_list1},
sequence::{preceded}, sequence::preceded,
IResult, IResult,
}; };
use crate::text::{ use crate::text::{
quoted::{QuotedString, quoted_string},
whitespace::{fws, is_obs_no_ws_ctl},
words::{atom, mime_atom, is_vchar},
encoding::{self, encoded_word},
ascii, ascii,
encoding::{self, encoded_word},
quoted::{quoted_string, QuotedString},
whitespace::{fws, is_obs_no_ws_ctl},
words::{atom, is_vchar, mime_atom},
}; };
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
@ -36,7 +36,10 @@ impl<'a> MIMEWord<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
match self { match self {
Self::Quoted(v) => v.to_string(), Self::Quoted(v) => v.to_string(),
Self::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(), Self::Atom(v) => encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string(),
} }
} }
} }
@ -59,7 +62,10 @@ impl<'a> Word<'a> {
match self { match self {
Word::Quoted(v) => v.to_string(), Word::Quoted(v) => v.to_string(),
Word::Encoded(v) => v.to_string(), Word::Encoded(v) => v.to_string(),
Word::Atom(v) => encoding_rs::UTF_8.decode_without_bom_handling(v).0.to_string(), Word::Atom(v) => encoding_rs::UTF_8
.decode_without_bom_handling(v)
.0
.to_string(),
} }
} }
} }
@ -73,7 +79,7 @@ pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
alt(( alt((
map(quoted_string, |v| Word::Quoted(v)), map(quoted_string, |v| Word::Quoted(v)),
map(encoded_word, |v| Word::Encoded(v)), map(encoded_word, |v| Word::Encoded(v)),
map(atom, |v| Word::Atom(v)) map(atom, |v| Word::Atom(v)),
))(input) ))(input)
} }
@ -82,7 +88,11 @@ pub struct Phrase<'a>(pub Vec<Word<'a>>);
impl<'a> Phrase<'a> { impl<'a> Phrase<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
self.0.iter().map(|v| v.to_string()).collect::<Vec<String>>().join(" ") self.0
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>()
.join(" ")
} }
} }
@ -117,7 +127,10 @@ impl<'a> UnstrToken<'a> {
match self { match self {
UnstrToken::Init => "".into(), UnstrToken::Init => "".into(),
UnstrToken::Encoded(e) => e.to_string(), UnstrToken::Encoded(e) => e.to_string(),
UnstrToken::Plain(e) => encoding_rs::UTF_8.decode_without_bom_handling(e).0.into_owned(), UnstrToken::Plain(e) => encoding_rs::UTF_8
.decode_without_bom_handling(e)
.0
.into_owned(),
} }
} }
} }
@ -127,21 +140,26 @@ pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
impl<'a> Unstructured<'a> { impl<'a> Unstructured<'a> {
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
self.0.iter().fold( self.0
.iter()
.fold(
(&UnstrToken::Init, String::new()), (&UnstrToken::Init, String::new()),
|(prev_token, mut result), current_token| { |(prev_token, mut result), current_token| {
match (prev_token, current_token) { match (prev_token, current_token) {
(UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()), (UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()),
(UnstrToken::Encoded(_), UnstrToken::Encoded(v)) => result.push_str(v.to_string().as_ref()), (UnstrToken::Encoded(_), UnstrToken::Encoded(v)) => {
result.push_str(v.to_string().as_ref())
}
(_, v) => { (_, v) => {
result.push(' '); result.push(' ');
result.push_str(v.to_string().as_ref()) result.push_str(v.to_string().as_ref())
}, }
}; };
(current_token, result) (current_token, result)
} },
).1 )
.1
} }
} }
@ -151,16 +169,18 @@ impl<'a> Unstructured<'a> {
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct /// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ``` /// ```
pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured> { pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured> {
let (input, r) = many0(preceded(opt(fws), alt(( let (input, r) = many0(preceded(
opt(fws),
alt((
map(encoded_word, |v| UnstrToken::Encoded(v)), map(encoded_word, |v| UnstrToken::Encoded(v)),
map(take_while1(is_unstructured), |v| UnstrToken::Plain(v)), map(take_while1(is_unstructured), |v| UnstrToken::Plain(v)),
))))(input)?; )),
))(input)?;
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
Ok((input, Unstructured(r))) Ok((input, Unstructured(r)))
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -1,7 +1,7 @@
pub mod ascii; pub mod ascii;
pub mod boundary;
pub mod encoding; pub mod encoding;
pub mod misc_token; pub mod misc_token;
pub mod quoted; pub mod quoted;
pub mod whitespace; pub mod whitespace;
pub mod words; pub mod words;
pub mod boundary;

View file

@ -1,14 +1,14 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{take_while1, take, tag}, bytes::complete::{tag, take, take_while1},
combinator::{opt}, combinator::opt,
multi::many0, multi::many0,
sequence::{pair, preceded}, sequence::{pair, preceded},
IResult, IResult,
}; };
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::ascii; use crate::text::ascii;
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct QuotedString<'a>(pub Vec<&'a [u8]>); pub struct QuotedString<'a>(pub Vec<&'a [u8]>);
@ -22,14 +22,13 @@ impl<'a> QuotedString<'a> {
let enc = encoding_rs::UTF_8; let enc = encoding_rs::UTF_8;
let size = self.0.iter().fold(0, |acc, v| acc + v.len()); let size = self.0.iter().fold(0, |acc, v| acc + v.len());
self.0.iter().fold( self.0
String::with_capacity(size), .iter()
|mut acc, v| { .fold(String::with_capacity(size), |mut acc, v| {
let (content, _) = enc.decode_without_bom_handling(v); let (content, _) = enc.decode_without_bom_handling(v);
acc.push_str(content.as_ref()); acc.push_str(content.as_ref());
acc acc
}, })
)
} }
} }
@ -43,8 +42,6 @@ pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], &[u8]> {
preceded(tag(&[ascii::BACKSLASH]), take(1usize))(input) preceded(tag(&[ascii::BACKSLASH]), take(1usize))(input)
} }
/// Allowed characters in quote /// Allowed characters in quote
/// ///
/// ```abnf /// ```abnf
@ -54,7 +51,9 @@ pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], &[u8]> {
/// obs-qtext /// obs-qtext
/// ``` /// ```
fn is_restr_qtext(c: u8) -> bool { fn is_restr_qtext(c: u8) -> bool {
c == ascii::EXCLAMATION || (c >= ascii::NUM && c <= ascii::LEFT_BRACKET) || (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE) c == ascii::EXCLAMATION
|| (c >= ascii::NUM && c <= ascii::LEFT_BRACKET)
|| (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
} }
fn is_qtext(c: u8) -> bool { fn is_qtext(c: u8) -> bool {
@ -116,7 +115,10 @@ mod tests {
assert_eq!( assert_eq!(
quoted_string(b"\"hello\r\n world\""), quoted_string(b"\"hello\r\n world\""),
Ok((&b""[..], QuotedString(vec![b"hello", &[ascii::SP], b"world"]))), Ok((
&b""[..],
QuotedString(vec![b"hello", &[ascii::SP], b"world"])
)),
); );
} }

View file

@ -1,3 +1,6 @@
use crate::text::ascii;
use crate::text::encoding::encoded_word;
use crate::text::quoted::quoted_pair;
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{is_not, tag, take_while1}, bytes::complete::{is_not, tag, take_while1},
@ -7,9 +10,6 @@ use nom::{
sequence::{pair, tuple}, sequence::{pair, tuple},
IResult, IResult,
}; };
use crate::text::encoding::encoded_word;
use crate::text::quoted::quoted_pair;
use crate::text::ascii;
/// Whitespace (space, new line, tab) content and /// Whitespace (space, new line, tab) content and
/// delimited content (eg. comment, line, sections, etc.) /// delimited content (eg. comment, line, sections, etc.)
@ -37,10 +37,7 @@ pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
pub fn foldable_line(input: &[u8]) -> IResult<&[u8], &[u8]> { pub fn foldable_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(tuple(( recognize(tuple((
is_not(ascii::CRLF), is_not(ascii::CRLF),
many0(pair( many0(pair(many1(pair(obs_crlf, space1)), is_not(ascii::CRLF))),
many1(pair(obs_crlf, space1)),
is_not(ascii::CRLF),
)),
obs_crlf, obs_crlf,
)))(input) )))(input)
} }
@ -101,7 +98,12 @@ pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
} }
pub fn ccontent(input: &[u8]) -> IResult<&[u8], &[u8]> { pub fn ccontent(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((ctext, recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input) alt((
ctext,
recognize(quoted_pair),
recognize(encoded_word),
recognize(comment),
))(input)
} }
pub fn ctext(input: &[u8]) -> IResult<&[u8], &[u8]> { pub fn ctext(input: &[u8]) -> IResult<&[u8], &[u8]> {

View file

@ -1,5 +1,5 @@
use crate::text::whitespace::cfws;
use crate::text::ascii; use crate::text::ascii;
use crate::text::whitespace::cfws;
use nom::{ use nom::{
bytes::complete::{tag, take_while1}, bytes::complete::{tag, take_while1},
character::is_alphanumeric, character::is_alphanumeric,