diff --git a/src/fragments/address.rs b/src/fragments/address.rs index 75d2fca..16515b8 100644 --- a/src/fragments/address.rs +++ b/src/fragments/address.rs @@ -14,40 +14,40 @@ use crate::fragments::misc_token::phrase; use crate::fragments::whitespace::{cfws}; use crate::error::IMFError; -impl<'a> TryFrom> for MailboxRef { +impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef { type Error = IMFError<'a>; - fn try_from(mx: lazy::Mailbox<'a>) -> Result { + fn try_from(mx: &'a lazy::Mailbox<'a>) -> Result { mailbox(mx.0) .map(|(_, m)| m) .map_err(|e| IMFError::Mailbox(e)) } } -impl<'a> TryFrom> for MailboxList { +impl<'a> TryFrom<&'a lazy::MailboxList<'a>> for MailboxList { type Error = IMFError<'a>; - fn try_from(ml: lazy::MailboxList<'a>) -> Result { + fn try_from(ml: &'a lazy::MailboxList<'a>) -> Result { mailbox_list(ml.0) .map(|(_, m)| m) .map_err(|e| IMFError::MailboxList(e)) } } -impl<'a> TryFrom> for AddressList { +impl<'a> TryFrom<&'a lazy::AddressList<'a>> for AddressList { type Error = IMFError<'a>; - fn try_from(al: lazy::AddressList<'a>) -> Result { + fn try_from(al: &'a lazy::AddressList<'a>) -> Result { address_list(al.0) .map(|(_, a)| a) .map_err(|e| IMFError::AddressList(e)) } } -impl<'a> TryFrom> for AddressList { +impl<'a> TryFrom<&'a lazy::NullableAddressList<'a>> for AddressList { type Error = IMFError<'a>; - fn try_from(nal: lazy::NullableAddressList<'a>) -> Result { + fn try_from(nal: &'a lazy::NullableAddressList<'a>) -> Result { opt(alt((address_list, address_list_cfws)))(nal.0) .map(|(_, a)| a.unwrap_or(vec![])) .map_err(|e| IMFError::NullableAddressList(e)) diff --git a/src/fragments/datetime.rs b/src/fragments/datetime.rs index 7af6589..34d68e4 100644 --- a/src/fragments/datetime.rs +++ b/src/fragments/datetime.rs @@ -18,10 +18,10 @@ use crate::error::IMFError; const MIN: i32 = 60; const HOUR: i32 = 60 * MIN; -impl<'a> TryFrom> for DateTime { +impl<'a> TryFrom<&'a lazy::DateTime<'a>> for DateTime { type Error = IMFError<'a>; - fn try_from(value: lazy::DateTime<'a>) -> Result { + fn try_from(value: &'a lazy::DateTime<'a>) -> Result { match section(value.0) { Ok((_, Some(dt))) => Ok(dt), Err(e) => Err(IMFError::DateTimeParse(e)), diff --git a/src/fragments/eager.rs b/src/fragments/eager.rs index 513990e..ce602ea 100644 --- a/src/fragments/eager.rs +++ b/src/fragments/eager.rs @@ -45,10 +45,10 @@ pub enum Field<'a> { } use Field::*; -impl<'a> TryFrom> for Field<'a> { +impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> { type Error = IMFError<'a>; - fn try_from(l: Lazy<'a>) -> Result { + fn try_from(l: &'a Lazy<'a>) -> Result { match l { Lazy::Date(v) => v.try_into().map(|v| Date(v)), Lazy::From(v) => v.try_into().map(|v| From(v)), diff --git a/src/fragments/identification.rs b/src/fragments/identification.rs index f8d51c6..42fa843 100644 --- a/src/fragments/identification.rs +++ b/src/fragments/identification.rs @@ -15,20 +15,20 @@ use crate::fragments::mailbox::is_dtext; use crate::fragments::model::{MessageId, MessageIdList}; use crate::error::IMFError; -impl<'a> TryFrom> for MessageId<'a> { +impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> { type Error = IMFError<'a>; - fn try_from(id: lazy::Identifier<'a>) -> Result { + fn try_from(id: &'a lazy::Identifier<'a>) -> Result { msg_id(id.0) .map(|(_, i)| i) .map_err(|e| IMFError::MessageID(e)) } } -impl<'a> TryFrom> for MessageIdList<'a> { +impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> { type Error = IMFError<'a>; - fn try_from(id: lazy::IdentifierList<'a>) -> Result { + fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result { many1(msg_id)(id.0) .map(|(_, i)| i) .map_err(|e| IMFError::MessageIDList(e)) diff --git a/src/fragments/misc_token.rs b/src/fragments/misc_token.rs index a366c40..44ddd89 100644 --- a/src/fragments/misc_token.rs +++ b/src/fragments/misc_token.rs @@ -21,20 +21,20 @@ pub struct Unstructured(pub String); #[derive(Debug, PartialEq, Default)] pub struct PhraseList(pub Vec); -impl<'a> TryFrom> for Unstructured { +impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured { type Error = IMFError<'a>; - fn try_from(input: lazy::Unstructured<'a>) -> Result { + fn try_from(input: &'a lazy::Unstructured<'a>) -> Result { unstructured(input.0) .map(|(_, v)| Unstructured(v)) .map_err(|e| IMFError::Unstructured(e)) } } -impl<'a> TryFrom> for PhraseList { +impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList { type Error = IMFError<'a>; - fn try_from(p: lazy::PhraseList<'a>) -> Result { + fn try_from(p: &'a lazy::PhraseList<'a>) -> Result { separated_list1(tag(","), phrase)(p.0) .map(|(_, q)| PhraseList(q)) .map_err(|e| IMFError::PhraseList(e)) diff --git a/src/fragments/section.rs b/src/fragments/section.rs index 7f241a8..cff2938 100644 --- a/src/fragments/section.rs +++ b/src/fragments/section.rs @@ -12,45 +12,45 @@ use crate::fragments::lazy; #[derive(Debug, PartialEq, Default)] pub struct Section<'a> { // 3.6.1. The Origination Date Field - pub date: Option>, + pub date: Option<&'a DateTime>, // 3.6.2. Originator Fields - pub from: Vec, - pub sender: Option, - pub reply_to: Vec, + pub from: Vec<&'a MailboxRef>, + pub sender: Option<&'a MailboxRef>, + pub reply_to: Vec<&'a AddressRef>, // 3.6.3. Destination Address Fields - pub to: Vec, - pub cc: Vec, - pub bcc: Vec, + pub to: Vec<&'a AddressRef>, + pub cc: Vec<&'a AddressRef>, + pub bcc: Vec<&'a AddressRef>, // 3.6.4. Identification Fields - pub msg_id: Option>, - pub in_reply_to: Vec>, - pub references: Vec>, + pub msg_id: Option<&'a MessageId<'a>>, + pub in_reply_to: Vec<&'a MessageId<'a>>, + pub references: Vec<&'a MessageId<'a>>, // 3.6.5. Informational Fields - pub subject: Option, - pub comments: Vec, - pub keywords: Vec, + pub subject: Option<&'a Unstructured>, + pub comments: Vec<&'a Unstructured>, + pub keywords: Vec<&'a PhraseList>, // 3.6.6 Not implemented // 3.6.7 Trace Fields - pub return_path: Vec, - pub received: Vec>, + pub return_path: Vec<&'a MailboxRef>, + pub received: Vec<&'a ReceivedLog<'a>>, // 3.6.8. Optional Fields - pub optional: HashMap<&'a str, Unstructured>, + pub optional: HashMap<&'a str, &'a Unstructured>, // Recovery - pub bad_fields: Vec>, + pub bad_fields: Vec<&'a lazy::Field<'a>>, pub unparsed: Vec<&'a str>, } //@FIXME min and max limits are not enforced, // it may result in missing data or silently overriden data. -impl<'a> FromIterator> for Section<'a> { - fn from_iter>>(iter: I) -> Self { +impl<'a> FromIterator<&'a Field<'a>> for Section<'a> { + fn from_iter>>(iter: I) -> Self { let mut section = Section::default(); for field in iter { match field { diff --git a/src/fragments/trace.rs b/src/fragments/trace.rs index 239da67..d15dfd9 100644 --- a/src/fragments/trace.rs +++ b/src/fragments/trace.rs @@ -14,10 +14,10 @@ use crate::error::IMFError; #[derive(Debug, PartialEq)] pub struct ReceivedLog<'a>(pub &'a str); -impl<'a> TryFrom> for ReceivedLog<'a> { +impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> { type Error = IMFError<'a>; - fn try_from(input: lazy::ReceivedLog<'a>) -> Result { + fn try_from(input: &'a lazy::ReceivedLog<'a>) -> Result { received_body(input.0) .map_err(|e| IMFError::ReceivedLog(e)) .map(|(_, v)| ReceivedLog(v)) diff --git a/src/multipass/extract_fields.rs b/src/multipass/extract_fields.rs index b34a51d..a413ef8 100644 --- a/src/multipass/extract_fields.rs +++ b/src/multipass/extract_fields.rs @@ -8,23 +8,26 @@ use nom::{ sequence::{pair, tuple}, }; -use crate::multipass::guess_charset::GuessCharset; use crate::error::IMFError; use crate::fragments::whitespace; +use crate::multipass::guess_charset; +use crate::multipass::field_lazy; #[derive(Debug, PartialEq)] -pub struct ExtractFields<'a> { +pub struct Parsed<'a> { pub fields: Vec<&'a str>, pub body: &'a [u8], } -impl<'a> TryFrom<&'a GuessCharset<'a>> for ExtractFields<'a> { - type Error = IMFError<'a>; +pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result, IMFError<'a>> { + all_consuming(many0(foldable_line))(&gcha.header) + .map_err(|e| IMFError::ExtractFields(e)) + .map(|(_, fields)| Parsed { fields, body: gcha.body }) +} - fn try_from(gcha: &'a GuessCharset<'a>) -> Result { - all_consuming(many0(foldable_line))(&gcha.header) - .map_err(|e| IMFError::ExtractFields(e)) - .map(|(_, fields)| ExtractFields { fields, body: gcha.body }) +impl<'a> Parsed<'a> { + pub fn names(&'a self) -> field_lazy::Parsed<'a> { + field_lazy::new(self) } } @@ -48,13 +51,13 @@ mod tests { #[test] fn test_extract() { assert_eq!( - ExtractFields::try_from(&GuessCharset { + new(&guess_charset::Parsed { header: "From: hello@world.com,\r\n\talice@wonderlands.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(), encoding: encoding_rs::UTF_8, malformed: false, body: b"Hello world!", }), - Ok(ExtractFields { + Ok(Parsed { fields: vec![ "From: hello@world.com,\r\n\talice@wonderlands.com\r\n", "Date: 12 Mar 1997 07:33:25 Z\r\n", diff --git a/src/multipass/field_eager.rs b/src/multipass/field_eager.rs index 6cc8a3f..37f8a08 100644 --- a/src/multipass/field_eager.rs +++ b/src/multipass/field_eager.rs @@ -1,5 +1,6 @@ use crate::fragments::eager; use crate::multipass::field_lazy; +use crate::multipass::header_section; #[derive(Debug, PartialEq)] pub struct Parsed<'a> { @@ -7,12 +8,19 @@ pub struct Parsed<'a> { pub body: &'a [u8], } -impl<'a> From > for Parsed<'a> { - fn from(p: field_lazy::Parsed<'a>) -> Self { - Parsed { - fields: p.fields.into_iter().filter_map(|entry| entry.try_into().ok()).collect(), - body: p.body, - } +pub fn new<'a>(p: &'a field_lazy::Parsed<'a>) -> Parsed<'a> { + Parsed { + fields: p.fields + .iter() + .filter_map(|entry| entry.try_into().ok()) + .collect(), + body: p.body, + } +} + +impl<'a> Parsed<'a> { + pub fn section(&'a self) -> header_section::Parsed<'a> { + header_section::new(self) } } @@ -25,7 +33,7 @@ mod tests { #[test] fn test_field_body() { - assert_eq!(Parsed::from(field_lazy::Parsed { + assert_eq!(new(field_lazy::Parsed { fields: vec![ lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")), lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")), diff --git a/src/multipass/field_lazy.rs b/src/multipass/field_lazy.rs index ba58ddf..b68b581 100644 --- a/src/multipass/field_lazy.rs +++ b/src/multipass/field_lazy.rs @@ -1,5 +1,6 @@ use crate::fragments::lazy; -use crate::multipass::extract_fields::ExtractFields; +use crate::multipass::extract_fields; +use crate::multipass::field_eager; #[derive(Debug, PartialEq)] pub struct Parsed<'a> { @@ -7,12 +8,16 @@ pub struct Parsed<'a> { pub body: &'a [u8], } -impl<'a> From > for Parsed<'a> { - fn from(ef: ExtractFields<'a>) -> Self { - Parsed { - fields: ef.fields.iter().map(|e| (*e).into()).collect(), - body: ef.body, - } +pub fn new<'a>(ef: &'a extract_fields::Parsed<'a>) -> Parsed<'a> { + Parsed { + fields: ef.fields.iter().map(|e| (*e).into()).collect(), + body: ef.body, + } +} + +impl<'a> Parsed<'a> { + pub fn body(&'a self) -> field_eager::Parsed<'a> { + field_eager::new(self) } } @@ -22,7 +27,7 @@ mod tests { #[test] fn test_field_name() { - assert_eq!(Parsed::from(ExtractFields { + assert_eq!(new(extract_fields::Parsed { fields: vec![ "From: hello@world.com,\r\n\talice@wonderlands.com\r\n", "Date: 12 Mar 1997 07:33:25 Z\r\n", diff --git a/src/multipass/guess_charset.rs b/src/multipass/guess_charset.rs index 4d1c237..465e682 100644 --- a/src/multipass/guess_charset.rs +++ b/src/multipass/guess_charset.rs @@ -2,10 +2,12 @@ use std::borrow::Cow; use chardetng::EncodingDetector; use encoding_rs::Encoding; -use crate::multipass::segment::Segment; +use crate::error::IMFError; +use crate::multipass::segment; +use crate::multipass::extract_fields; #[derive(Debug, PartialEq)] -pub struct GuessCharset<'a> { +pub struct Parsed<'a> { pub header: Cow<'a, str>, pub encoding: &'static Encoding, pub malformed: bool, @@ -16,17 +18,25 @@ const IS_LAST_BUFFER: bool = true; const ALLOW_UTF8: bool = true; const NO_TLD: Option<&[u8]> = None; -impl<'a> From> for GuessCharset<'a> { - fn from(seg: Segment<'a>) -> Self { - // Create detector - let mut detector = EncodingDetector::new(); - detector.feed(&seg.header, IS_LAST_BUFFER); +pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> { + // Create detector + let mut detector = EncodingDetector::new(); + detector.feed(&seg.header, IS_LAST_BUFFER); - // Get encoding - let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8); - let (header, encoding, malformed) = enc.decode(&seg.header); + // Get encoding + let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8); + let (header, encoding, malformed) = enc.decode(&seg.header); + Parsed { + header, + encoding, + malformed, + body: seg.body + } +} - GuessCharset { header, encoding, malformed, body: seg.body } +impl<'a> Parsed<'a> { + pub fn fields(&'a self) -> Result, IMFError<'a>> { + extract_fields::new(self) } } @@ -37,11 +47,12 @@ mod tests { #[test] fn test_charset() { assert_eq!( - GuessCharset::from(Segment { - body: b"Hello world!", - header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", - }), - GuessCharset { + new(&segment::Parsed { + body: b"Hello world!", + header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", + } + ), + Parsed { header: "From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(), encoding: encoding_rs::UTF_8, malformed: false, diff --git a/src/multipass/header_section.rs b/src/multipass/header_section.rs index e76224d..7f716a3 100644 --- a/src/multipass/header_section.rs +++ b/src/multipass/header_section.rs @@ -7,12 +7,10 @@ pub struct Parsed<'a> { pub body: &'a [u8], } -impl<'a> From> for Parsed<'a> { - fn from(p: field_eager::Parsed<'a>) -> Self { - Parsed { - fields: Section::from_iter(p.fields.into_iter()), - body: p.body, - } +pub fn new<'a>(p: &'a field_eager::Parsed<'a>) -> Parsed<'a> { + Parsed { + fields: Section::from_iter(p.fields.iter()), + body: p.body, } } @@ -25,7 +23,7 @@ mod tests { #[test] fn test_section() { - assert_eq!(Parsed::from(field_eager::Parsed { + assert_eq!(new(&field_eager::Parsed { fields: vec![ eager::Field::From(vec![ model::MailboxRef { diff --git a/src/multipass/segment.rs b/src/multipass/segment.rs index 6478e8d..ef72777 100644 --- a/src/multipass/segment.rs +++ b/src/multipass/segment.rs @@ -8,40 +8,43 @@ use nom::{ multi::many0, }; +use crate::multipass::guess_charset; use crate::error::IMFError; #[derive(Debug, PartialEq)] -pub struct Segment<'a> { +pub struct Parsed<'a> { pub header: &'a [u8], pub body: &'a [u8], } -const cr: u8 = 0x0D; -const lf: u8 = 0x0A; -const crlf: &[u8] = &[cr, lf]; +const CR: u8 = 0x0D; +const LF: u8 = 0x0A; +const CRLF: &[u8] = &[CR, LF]; -impl<'a> TryFrom<&'a [u8]> for Segment<'a> { - type Error = IMFError<'a>; +pub fn new<'a>(buffer: &'a [u8]) -> Result, IMFError<'a>> { + terminated( + recognize(many0(line)), + obs_crlf + )(buffer) + .map_err(|e| IMFError::Segment(e)) + .map(|(body, header)| Parsed { header, body }) +} - fn try_from(buffer: &'a [u8]) -> Result { - terminated( - recognize(many0(line)), - obs_crlf - )(buffer) - .map_err(|e| IMFError::Segment(e)) - .map(|(body, header)| Segment { header, body }) +impl<'a> Parsed<'a> { + pub fn charset(&'a self) -> guess_charset::Parsed<'a> { + guess_charset::new(self) } } fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { pair( - is_not(crlf), + is_not(CRLF), obs_crlf, )(input) } fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> { - alt((tag(crlf), tag(&[cr]), tag(&[lf])))(input) + alt((tag(CRLF), tag(&[CR]), tag(&[LF])))(input) } #[cfg(test)] @@ -51,10 +54,10 @@ mod tests { #[test] fn test_segment() { assert_eq!( - Segment::try_from(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]), - Ok(Segment { - body: b"Hello world!", + new(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]), + Ok(Parsed { header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n", + body: b"Hello world!", }) ); } diff --git a/src/parse.rs b/src/parse.rs index 01f6bd7..c9d7031 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,7 +1,25 @@ -use imf_codec::multipass; +use imf_codec::multipass::{ + segment, + guess_charset, + field_lazy, + field_eager, + header_section +}; +use imf_codec::fragments::section::Section; use std::io; use std::io::Read; +fn parser<'a, F>(input: &'a [u8], func: F) -> () +where F: FnOnce(&Section) -> () { + let seg = segment::new(input).unwrap(); + let charset = seg.charset(); + let fields = charset.fields().unwrap(); + let field_names = fields.names(); + let field_body = field_names.body(); + let section = field_body.section(); + + func(§ion.fields); +} fn main() { // Read full mail in memory @@ -9,21 +27,11 @@ fn main() { io::stdin().lock().read_to_end(&mut rawmail).unwrap(); // Parse it - let segment = multipass::segment::Segment::try_from(&rawmail[..]).unwrap(); - let charng = multipass::guess_charset::GuessCharset::from(segment); - let extr = multipass::extract_fields::ExtractFields::try_from(&charng).unwrap(); - let lazy = multipass::field_lazy::Parsed::from(extr); - let eager = multipass::field_eager::Parsed::from(lazy); - let section = multipass::header_section::Parsed::from(eager); - //let section: multipass::header_section::Parsed = rawmail.as_ref().into(); - //let (email, encoding, malformed) = header::from_bytes(&rawmail); - //println!("Encoding: {:?}, Malformed: {:?}", encoding, malformed); - - //let (input, hdrs) = header::section(&email).unwrap(); - - // Checks/debug - println!("{:?}", section); - //assert!(hdrs.date.is_some()); - //assert!(hdrs.from.len() > 0); - //assert!(hdrs.bad_fields.len() == 0); + parser(&rawmail[..], |section| { + // Checks/debug + println!("{:?}", section); + //assert!(hdrs.date.is_some()); + //assert!(hdrs.from.len() > 0); + //assert!(hdrs.bad_fields.len() == 0); + }); }