refactor parser!

This commit is contained in:
Quentin 2023-06-22 10:48:07 +02:00
parent 34334398d8
commit 9d14868491
Signed by: quentin
GPG key ID: E9602264D639FF68
14 changed files with 161 additions and 125 deletions

View file

@ -14,40 +14,40 @@ use crate::fragments::misc_token::phrase;
use crate::fragments::whitespace::{cfws};
use crate::error::IMFError;
impl<'a> TryFrom<lazy::Mailbox<'a>> for MailboxRef {
impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>;
fn try_from(mx: lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
fn try_from(mx: &'a lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
mailbox(mx.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::Mailbox(e))
}
}
impl<'a> TryFrom<lazy::MailboxList<'a>> for MailboxList {
impl<'a> TryFrom<&'a lazy::MailboxList<'a>> for MailboxList {
type Error = IMFError<'a>;
fn try_from(ml: lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
fn try_from(ml: &'a lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
mailbox_list(ml.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::MailboxList(e))
}
}
impl<'a> TryFrom<lazy::AddressList<'a>> for AddressList {
impl<'a> TryFrom<&'a lazy::AddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(al: lazy::AddressList<'a>) -> Result<Self, Self::Error> {
fn try_from(al: &'a lazy::AddressList<'a>) -> Result<Self, Self::Error> {
address_list(al.0)
.map(|(_, a)| a)
.map_err(|e| IMFError::AddressList(e))
}
}
impl<'a> TryFrom<lazy::NullableAddressList<'a>> for AddressList {
impl<'a> TryFrom<&'a lazy::NullableAddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(nal: lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
fn try_from(nal: &'a lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
opt(alt((address_list, address_list_cfws)))(nal.0)
.map(|(_, a)| a.unwrap_or(vec![]))
.map_err(|e| IMFError::NullableAddressList(e))

View file

@ -18,10 +18,10 @@ use crate::error::IMFError;
const MIN: i32 = 60;
const HOUR: i32 = 60 * MIN;
impl<'a> TryFrom<lazy::DateTime<'a>> for DateTime<FixedOffset> {
impl<'a> TryFrom<&'a lazy::DateTime<'a>> for DateTime<FixedOffset> {
type Error = IMFError<'a>;
fn try_from(value: lazy::DateTime<'a>) -> Result<Self, Self::Error> {
fn try_from(value: &'a lazy::DateTime<'a>) -> Result<Self, Self::Error> {
match section(value.0) {
Ok((_, Some(dt))) => Ok(dt),
Err(e) => Err(IMFError::DateTimeParse(e)),

View file

@ -45,10 +45,10 @@ pub enum Field<'a> {
}
use Field::*;
impl<'a> TryFrom<Lazy<'a>> for Field<'a> {
impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
type Error = IMFError<'a>;
fn try_from(l: Lazy<'a>) -> Result<Self, Self::Error> {
fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> {
match l {
Lazy::Date(v) => v.try_into().map(|v| Date(v)),
Lazy::From(v) => v.try_into().map(|v| From(v)),

View file

@ -15,20 +15,20 @@ use crate::fragments::mailbox::is_dtext;
use crate::fragments::model::{MessageId, MessageIdList};
use crate::error::IMFError;
impl<'a> TryFrom<lazy::Identifier<'a>> for MessageId<'a> {
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
type Error = IMFError<'a>;
fn try_from(id: lazy::Identifier<'a>) -> Result<Self, Self::Error> {
fn try_from(id: &'a lazy::Identifier<'a>) -> Result<Self, Self::Error> {
msg_id(id.0)
.map(|(_, i)| i)
.map_err(|e| IMFError::MessageID(e))
}
}
impl<'a> TryFrom<lazy::IdentifierList<'a>> for MessageIdList<'a> {
impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> {
type Error = IMFError<'a>;
fn try_from(id: lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
many1(msg_id)(id.0)
.map(|(_, i)| i)
.map_err(|e| IMFError::MessageIDList(e))

View file

@ -21,20 +21,20 @@ pub struct Unstructured(pub String);
#[derive(Debug, PartialEq, Default)]
pub struct PhraseList(pub Vec<String>);
impl<'a> TryFrom<lazy::Unstructured<'a>> for Unstructured {
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
type Error = IMFError<'a>;
fn try_from(input: lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
unstructured(input.0)
.map(|(_, v)| Unstructured(v))
.map_err(|e| IMFError::Unstructured(e))
}
}
impl<'a> TryFrom<lazy::PhraseList<'a>> for PhraseList {
impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
type Error = IMFError<'a>;
fn try_from(p: lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
separated_list1(tag(","), phrase)(p.0)
.map(|(_, q)| PhraseList(q))
.map_err(|e| IMFError::PhraseList(e))

View file

@ -12,45 +12,45 @@ use crate::fragments::lazy;
#[derive(Debug, PartialEq, Default)]
pub struct Section<'a> {
// 3.6.1. The Origination Date Field
pub date: Option<DateTime<FixedOffset>>,
pub date: Option<&'a DateTime<FixedOffset>>,
// 3.6.2. Originator Fields
pub from: Vec<MailboxRef>,
pub sender: Option<MailboxRef>,
pub reply_to: Vec<AddressRef>,
pub from: Vec<&'a MailboxRef>,
pub sender: Option<&'a MailboxRef>,
pub reply_to: Vec<&'a AddressRef>,
// 3.6.3. Destination Address Fields
pub to: Vec<AddressRef>,
pub cc: Vec<AddressRef>,
pub bcc: Vec<AddressRef>,
pub to: Vec<&'a AddressRef>,
pub cc: Vec<&'a AddressRef>,
pub bcc: Vec<&'a AddressRef>,
// 3.6.4. Identification Fields
pub msg_id: Option<MessageId<'a>>,
pub in_reply_to: Vec<MessageId<'a>>,
pub references: Vec<MessageId<'a>>,
pub msg_id: Option<&'a MessageId<'a>>,
pub in_reply_to: Vec<&'a MessageId<'a>>,
pub references: Vec<&'a MessageId<'a>>,
// 3.6.5. Informational Fields
pub subject: Option<Unstructured>,
pub comments: Vec<Unstructured>,
pub keywords: Vec<PhraseList>,
pub subject: Option<&'a Unstructured>,
pub comments: Vec<&'a Unstructured>,
pub keywords: Vec<&'a PhraseList>,
// 3.6.6 Not implemented
// 3.6.7 Trace Fields
pub return_path: Vec<MailboxRef>,
pub received: Vec<ReceivedLog<'a>>,
pub return_path: Vec<&'a MailboxRef>,
pub received: Vec<&'a ReceivedLog<'a>>,
// 3.6.8. Optional Fields
pub optional: HashMap<&'a str, Unstructured>,
pub optional: HashMap<&'a str, &'a Unstructured>,
// Recovery
pub bad_fields: Vec<lazy::Field<'a>>,
pub bad_fields: Vec<&'a lazy::Field<'a>>,
pub unparsed: Vec<&'a str>,
}
//@FIXME min and max limits are not enforced,
// it may result in missing data or silently overriden data.
impl<'a> FromIterator<Field<'a>> for Section<'a> {
fn from_iter<I: IntoIterator<Item=Field<'a>>>(iter: I) -> Self {
impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
fn from_iter<I: IntoIterator<Item=&'a Field<'a>>>(iter: I) -> Self {
let mut section = Section::default();
for field in iter {
match field {

View file

@ -14,10 +14,10 @@ use crate::error::IMFError;
#[derive(Debug, PartialEq)]
pub struct ReceivedLog<'a>(pub &'a str);
impl<'a> TryFrom<lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
type Error = IMFError<'a>;
fn try_from(input: lazy::ReceivedLog<'a>) -> Result<Self, Self::Error> {
fn try_from(input: &'a lazy::ReceivedLog<'a>) -> Result<Self, Self::Error> {
received_body(input.0)
.map_err(|e| IMFError::ReceivedLog(e))
.map(|(_, v)| ReceivedLog(v))

View file

@ -8,23 +8,26 @@ use nom::{
sequence::{pair, tuple},
};
use crate::multipass::guess_charset::GuessCharset;
use crate::error::IMFError;
use crate::fragments::whitespace;
use crate::multipass::guess_charset;
use crate::multipass::field_lazy;
#[derive(Debug, PartialEq)]
pub struct ExtractFields<'a> {
pub struct Parsed<'a> {
pub fields: Vec<&'a str>,
pub body: &'a [u8],
}
impl<'a> TryFrom<&'a GuessCharset<'a>> for ExtractFields<'a> {
type Error = IMFError<'a>;
fn try_from(gcha: &'a GuessCharset<'a>) -> Result<Self, Self::Error> {
pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> {
all_consuming(many0(foldable_line))(&gcha.header)
.map_err(|e| IMFError::ExtractFields(e))
.map(|(_, fields)| ExtractFields { fields, body: gcha.body })
.map(|(_, fields)| Parsed { fields, body: gcha.body })
}
impl<'a> Parsed<'a> {
pub fn names(&'a self) -> field_lazy::Parsed<'a> {
field_lazy::new(self)
}
}
@ -48,13 +51,13 @@ mod tests {
#[test]
fn test_extract() {
assert_eq!(
ExtractFields::try_from(&GuessCharset {
new(&guess_charset::Parsed {
header: "From: hello@world.com,\r\n\talice@wonderlands.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(),
encoding: encoding_rs::UTF_8,
malformed: false,
body: b"Hello world!",
}),
Ok(ExtractFields {
Ok(Parsed {
fields: vec![
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
"Date: 12 Mar 1997 07:33:25 Z\r\n",

View file

@ -1,5 +1,6 @@
use crate::fragments::eager;
use crate::multipass::field_lazy;
use crate::multipass::header_section;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
@ -7,12 +8,19 @@ pub struct Parsed<'a> {
pub body: &'a [u8],
}
impl<'a> From <field_lazy::Parsed<'a>> for Parsed<'a> {
fn from(p: field_lazy::Parsed<'a>) -> Self {
pub fn new<'a>(p: &'a field_lazy::Parsed<'a>) -> Parsed<'a> {
Parsed {
fields: p.fields.into_iter().filter_map(|entry| entry.try_into().ok()).collect(),
fields: p.fields
.iter()
.filter_map(|entry| entry.try_into().ok())
.collect(),
body: p.body,
}
}
impl<'a> Parsed<'a> {
pub fn section(&'a self) -> header_section::Parsed<'a> {
header_section::new(self)
}
}
@ -25,7 +33,7 @@ mod tests {
#[test]
fn test_field_body() {
assert_eq!(Parsed::from(field_lazy::Parsed {
assert_eq!(new(field_lazy::Parsed {
fields: vec![
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")),
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),

View file

@ -1,5 +1,6 @@
use crate::fragments::lazy;
use crate::multipass::extract_fields::ExtractFields;
use crate::multipass::extract_fields;
use crate::multipass::field_eager;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
@ -7,12 +8,16 @@ pub struct Parsed<'a> {
pub body: &'a [u8],
}
impl<'a> From <ExtractFields<'a>> for Parsed<'a> {
fn from(ef: ExtractFields<'a>) -> Self {
pub fn new<'a>(ef: &'a extract_fields::Parsed<'a>) -> Parsed<'a> {
Parsed {
fields: ef.fields.iter().map(|e| (*e).into()).collect(),
body: ef.body,
}
}
impl<'a> Parsed<'a> {
pub fn body(&'a self) -> field_eager::Parsed<'a> {
field_eager::new(self)
}
}
@ -22,7 +27,7 @@ mod tests {
#[test]
fn test_field_name() {
assert_eq!(Parsed::from(ExtractFields {
assert_eq!(new(extract_fields::Parsed {
fields: vec![
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
"Date: 12 Mar 1997 07:33:25 Z\r\n",

View file

@ -2,10 +2,12 @@ use std::borrow::Cow;
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use crate::multipass::segment::Segment;
use crate::error::IMFError;
use crate::multipass::segment;
use crate::multipass::extract_fields;
#[derive(Debug, PartialEq)]
pub struct GuessCharset<'a> {
pub struct Parsed<'a> {
pub header: Cow<'a, str>,
pub encoding: &'static Encoding,
pub malformed: bool,
@ -16,8 +18,7 @@ const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
impl<'a> From<Segment<'a>> for GuessCharset<'a> {
fn from(seg: Segment<'a>) -> Self {
pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> {
// Create detector
let mut detector = EncodingDetector::new();
detector.feed(&seg.header, IS_LAST_BUFFER);
@ -25,8 +26,17 @@ impl<'a> From<Segment<'a>> for GuessCharset<'a> {
// Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, encoding, malformed) = enc.decode(&seg.header);
Parsed {
header,
encoding,
malformed,
body: seg.body
}
}
GuessCharset { header, encoding, malformed, body: seg.body }
impl<'a> Parsed<'a> {
pub fn fields(&'a self) -> Result<extract_fields::Parsed<'a>, IMFError<'a>> {
extract_fields::new(self)
}
}
@ -37,11 +47,12 @@ mod tests {
#[test]
fn test_charset() {
assert_eq!(
GuessCharset::from(Segment {
new(&segment::Parsed {
body: b"Hello world!",
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
}),
GuessCharset {
}
),
Parsed {
header: "From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n".into(),
encoding: encoding_rs::UTF_8,
malformed: false,

View file

@ -7,13 +7,11 @@ pub struct Parsed<'a> {
pub body: &'a [u8],
}
impl<'a> From<field_eager::Parsed<'a>> for Parsed<'a> {
fn from(p: field_eager::Parsed<'a>) -> Self {
pub fn new<'a>(p: &'a field_eager::Parsed<'a>) -> Parsed<'a> {
Parsed {
fields: Section::from_iter(p.fields.into_iter()),
fields: Section::from_iter(p.fields.iter()),
body: p.body,
}
}
}
#[cfg(test)]
@ -25,7 +23,7 @@ mod tests {
#[test]
fn test_section() {
assert_eq!(Parsed::from(field_eager::Parsed {
assert_eq!(new(&field_eager::Parsed {
fields: vec![
eager::Field::From(vec![
model::MailboxRef {

View file

@ -8,40 +8,43 @@ use nom::{
multi::many0,
};
use crate::multipass::guess_charset;
use crate::error::IMFError;
#[derive(Debug, PartialEq)]
pub struct Segment<'a> {
pub struct Parsed<'a> {
pub header: &'a [u8],
pub body: &'a [u8],
}
const cr: u8 = 0x0D;
const lf: u8 = 0x0A;
const crlf: &[u8] = &[cr, lf];
const CR: u8 = 0x0D;
const LF: u8 = 0x0A;
const CRLF: &[u8] = &[CR, LF];
impl<'a> TryFrom<&'a [u8]> for Segment<'a> {
type Error = IMFError<'a>;
fn try_from(buffer: &'a [u8]) -> Result<Self, Self::Error> {
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
terminated(
recognize(many0(line)),
obs_crlf
)(buffer)
.map_err(|e| IMFError::Segment(e))
.map(|(body, header)| Segment { header, body })
.map(|(body, header)| Parsed { header, body })
}
impl<'a> Parsed<'a> {
pub fn charset(&'a self) -> guess_charset::Parsed<'a> {
guess_charset::new(self)
}
}
fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
pair(
is_not(crlf),
is_not(CRLF),
obs_crlf,
)(input)
}
fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
alt((tag(crlf), tag(&[cr]), tag(&[lf])))(input)
alt((tag(CRLF), tag(&[CR]), tag(&[LF])))(input)
}
#[cfg(test)]
@ -51,10 +54,10 @@ mod tests {
#[test]
fn test_segment() {
assert_eq!(
Segment::try_from(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]),
Ok(Segment {
body: b"Hello world!",
new(&b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n\r\nHello world!"[..]),
Ok(Parsed {
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
body: b"Hello world!",
})
);
}

View file

@ -1,7 +1,25 @@
use imf_codec::multipass;
use imf_codec::multipass::{
segment,
guess_charset,
field_lazy,
field_eager,
header_section
};
use imf_codec::fragments::section::Section;
use std::io;
use std::io::Read;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&Section) -> () {
let seg = segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
let field_names = fields.names();
let field_body = field_names.body();
let section = field_body.section();
func(&section.fields);
}
fn main() {
// Read full mail in memory
@ -9,21 +27,11 @@ fn main() {
io::stdin().lock().read_to_end(&mut rawmail).unwrap();
// Parse it
let segment = multipass::segment::Segment::try_from(&rawmail[..]).unwrap();
let charng = multipass::guess_charset::GuessCharset::from(segment);
let extr = multipass::extract_fields::ExtractFields::try_from(&charng).unwrap();
let lazy = multipass::field_lazy::Parsed::from(extr);
let eager = multipass::field_eager::Parsed::from(lazy);
let section = multipass::header_section::Parsed::from(eager);
//let section: multipass::header_section::Parsed = rawmail.as_ref().into();
//let (email, encoding, malformed) = header::from_bytes(&rawmail);
//println!("Encoding: {:?}, Malformed: {:?}", encoding, malformed);
//let (input, hdrs) = header::section(&email).unwrap();
parser(&rawmail[..], |section| {
// Checks/debug
println!("{:?}", section);
//assert!(hdrs.date.is_some());
//assert!(hdrs.from.len() > 0);
//assert!(hdrs.bad_fields.len() == 0);
});
}