write an eager parser

This commit is contained in:
Quentin 2023-06-20 17:03:18 +02:00
parent e9c7da850e
commit 458d6517d1
Signed by: quentin
GPG key ID: E9602264D639FF68
12 changed files with 181 additions and 26 deletions

View file

@ -14,4 +14,5 @@ pub enum IMFError<'a> {
MessageIDList(nom::Err<nom::error::Error<&'a str>>), MessageIDList(nom::Err<nom::error::Error<&'a str>>),
Unstructured(nom::Err<nom::error::Error<&'a str>>), Unstructured(nom::Err<nom::error::Error<&'a str>>),
PhraseList(nom::Err<nom::error::Error<&'a str>>), PhraseList(nom::Err<nom::error::Error<&'a str>>),
ReceivedLog(nom::Err<nom::error::Error<&'a str>>),
} }

View file

@ -14,40 +14,40 @@ use crate::fragments::misc_token::phrase;
use crate::fragments::whitespace::{cfws}; use crate::fragments::whitespace::{cfws};
use crate::error::IMFError; use crate::error::IMFError;
impl<'a> TryFrom<lazy::Mailbox<'a>> for MailboxRef { impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(mx: lazy::Mailbox<'a>) -> Result<Self, Self::Error> { fn try_from(mx: &'a lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
mailbox(mx.0) mailbox(mx.0)
.map(|(_, m)| m) .map(|(_, m)| m)
.map_err(|e| IMFError::Mailbox(e)) .map_err(|e| IMFError::Mailbox(e))
} }
} }
impl<'a> TryFrom<lazy::MailboxList<'a>> for MailboxList { impl<'a> TryFrom<&'a lazy::MailboxList<'a>> for MailboxList {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(ml: lazy::MailboxList<'a>) -> Result<Self, Self::Error> { fn try_from(ml: &'a lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
mailbox_list(ml.0) mailbox_list(ml.0)
.map(|(_, m)| m) .map(|(_, m)| m)
.map_err(|e| IMFError::MailboxList(e)) .map_err(|e| IMFError::MailboxList(e))
} }
} }
impl<'a> TryFrom<lazy::AddressList<'a>> for AddressList { impl<'a> TryFrom<&'a lazy::AddressList<'a>> for AddressList {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(al: lazy::AddressList<'a>) -> Result<Self, Self::Error> { fn try_from(al: &'a lazy::AddressList<'a>) -> Result<Self, Self::Error> {
address_list(al.0) address_list(al.0)
.map(|(_, a)| a) .map(|(_, a)| a)
.map_err(|e| IMFError::AddressList(e)) .map_err(|e| IMFError::AddressList(e))
} }
} }
impl<'a> TryFrom<lazy::NullableAddressList<'a>> for AddressList { impl<'a> TryFrom<&'a lazy::NullableAddressList<'a>> for AddressList {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(nal: lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> { fn try_from(nal: &'a lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
opt(alt((address_list, address_list_cfws)))(nal.0) opt(alt((address_list, address_list_cfws)))(nal.0)
.map(|(_, a)| a.unwrap_or(vec![])) .map(|(_, a)| a.unwrap_or(vec![]))
.map_err(|e| IMFError::NullableAddressList(e)) .map_err(|e| IMFError::NullableAddressList(e))

View file

@ -18,10 +18,10 @@ use crate::error::IMFError;
const MIN: i32 = 60; const MIN: i32 = 60;
const HOUR: i32 = 60 * MIN; const HOUR: i32 = 60 * MIN;
impl<'a> TryFrom<lazy::DateTime<'a>> for DateTime<FixedOffset> { impl<'a> TryFrom<&'a lazy::DateTime<'a>> for DateTime<FixedOffset> {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(value: lazy::DateTime<'a>) -> Result<Self, Self::Error> { fn try_from(value: &'a lazy::DateTime<'a>) -> Result<Self, Self::Error> {
match section(value.0) { match section(value.0) {
Ok((_, Some(dt))) => Ok(dt), Ok((_, Some(dt))) => Ok(dt),
Err(e) => Err(IMFError::DateTimeParse(e)), Err(e) => Err(IMFError::DateTimeParse(e)),

72
src/fragments/eager.rs Normal file
View file

@ -0,0 +1,72 @@
use chrono::{DateTime, FixedOffset};
use crate::fragments::model::{
MailboxList, MailboxRef, AddressList,
MessageId, MessageIdList};
use crate::fragments::misc_token::{Unstructured, PhraseList};
use crate::fragments::trace::ReceivedLog;
use crate::fragments::lazy::Field as Lazy;
use crate::error::IMFError;
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
// 3.6.1. The Origination Date Field
Date(DateTime<FixedOffset>),
// 3.6.2. Originator Fields
From(MailboxList),
Sender(MailboxRef),
ReplyTo(AddressList),
// 3.6.3. Destination Address Fields
To(AddressList),
Cc(AddressList),
Bcc(AddressList),
// 3.6.4. Identification Fields
MessageID(MessageId<'a>),
InReplyTo(MessageIdList<'a>),
References(MessageIdList<'a>),
// 3.6.5. Informational Fields
Subject(Unstructured),
Comments(Unstructured),
Keywords(PhraseList),
// 3.6.6 Resent Fields (not implemented)
// 3.6.7 Trace Fields
Received(ReceivedLog<'a>),
ReturnPath(MailboxRef),
// 3.6.8. Optional Fields
Optional(&'a str, Unstructured),
// None
Rescue(&'a str),
}
use Field::*;
impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
type Error = IMFError<'a>;
fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> {
match l {
Lazy::Date(v) => v.try_into().map(|v| Date(v)),
Lazy::From(v) => v.try_into().map(|v| From(v)),
Lazy::Sender(v) => v.try_into().map(|v| Sender(v)),
Lazy::ReplyTo(v) => v.try_into().map(|v| ReplyTo(v)),
Lazy::To(v) => v.try_into().map(|v| To(v)),
Lazy::Cc(v) => v.try_into().map(|v| Cc(v)),
Lazy::Bcc(v) => v.try_into().map(|v| Bcc(v)),
Lazy::MessageID(v) => v.try_into().map(|v| MessageID(v)),
Lazy::InReplyTo(v) => v.try_into().map(|v| InReplyTo(v)),
Lazy::References(v) => v.try_into().map(|v| References(v)),
Lazy::Subject(v) => v.try_into().map(|v| Subject(v)),
Lazy::Comments(v) => v.try_into().map(|v| Comments(v)),
Lazy::Keywords(v) => v.try_into().map(|v| Keywords(v)),
Lazy::Received(v) => v.try_into().map(|v| Received(v)),
Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)),
Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
Lazy::Rescue(v) => Ok(Rescue(*v)),
}
}
}

View file

@ -15,20 +15,20 @@ use crate::fragments::mailbox::is_dtext;
use crate::fragments::model::{MessageId, MessageIdList}; use crate::fragments::model::{MessageId, MessageIdList};
use crate::error::IMFError; use crate::error::IMFError;
impl<'a> TryFrom<lazy::Identifier<'a>> for MessageId<'a> { impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(id: lazy::Identifier<'a>) -> Result<Self, Self::Error> { fn try_from(id: &'a lazy::Identifier<'a>) -> Result<Self, Self::Error> {
msg_id(id.0) msg_id(id.0)
.map(|(_, i)| i) .map(|(_, i)| i)
.map_err(|e| IMFError::MessageID(e)) .map_err(|e| IMFError::MessageID(e))
} }
} }
impl<'a> TryFrom<lazy::IdentifierList<'a>> for MessageIdList<'a> { impl<'a> TryFrom<&'a lazy::IdentifierList<'a>> for MessageIdList<'a> {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(id: lazy::IdentifierList<'a>) -> Result<Self, Self::Error> { fn try_from(id: &'a lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
many1(msg_id)(id.0) many1(msg_id)(id.0)
.map(|(_, i)| i) .map(|(_, i)| i)
.map_err(|e| IMFError::MessageIDList(e)) .map_err(|e| IMFError::MessageIDList(e))

View file

@ -71,7 +71,7 @@ pub enum Field<'a> {
ReturnPath(Mailbox<'a>), ReturnPath(Mailbox<'a>),
// 3.6.8. Optional Fields // 3.6.8. Optional Fields
Optional(&'a str, &'a str), Optional(&'a str, Unstructured<'a>),
// None // None
Rescue(&'a str), Rescue(&'a str),
@ -126,6 +126,6 @@ fn correct_field(input: &str) -> IResult<&str, Field> {
"return-path" => ReturnPath(Mailbox(rest)), "return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)), "received" => Received(ReceivedLog(rest)),
_ => Optional(name, rest), _ => Optional(name, Unstructured(rest)),
})) }))
} }

View file

@ -15,25 +15,28 @@ use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
use crate::fragments::words::{atom, is_vchar}; use crate::fragments::words::{atom, is_vchar};
use crate::error::IMFError; use crate::error::IMFError;
type Unstructured = String; #[derive(Debug, PartialEq)]
type PhraseList = Vec<String>; pub struct Unstructured(pub String);
impl<'a> TryFrom<lazy::Unstructured<'a>> for Unstructured { #[derive(Debug, PartialEq)]
pub struct PhraseList(pub Vec<String>);
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(input: lazy::Unstructured<'a>) -> Result<Self, Self::Error> { fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
unstructured(input.0) unstructured(input.0)
.map(|(_, v)| v) .map(|(_, v)| Unstructured(v))
.map_err(|e| IMFError::Unstructured(e)) .map_err(|e| IMFError::Unstructured(e))
} }
} }
impl<'a> TryFrom<lazy::PhraseList<'a>> for PhraseList { impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(p: lazy::PhraseList<'a>) -> Result<Self, Self::Error> { fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
separated_list1(tag(","), phrase)(p.0) separated_list1(tag(","), phrase)(p.0)
.map(|(_, q)| q) .map(|(_, q)| PhraseList(q))
.map_err(|e| IMFError::PhraseList(e)) .map_err(|e| IMFError::PhraseList(e))
} }
} }

View file

@ -14,6 +14,7 @@ mod identification;
mod trace; mod trace;
mod datetime; mod datetime;
pub mod lazy; pub mod lazy;
pub mod eager;
// Header blocks // Header blocks
pub mod header; pub mod header;

View file

@ -8,7 +8,21 @@ use nom::{
multi::many0, multi::many0,
sequence::{delimited, pair, tuple}, sequence::{delimited, pair, tuple},
}; };
use crate::fragments::{datetime, mailbox, model, misc_token, whitespace}; use crate::fragments::{datetime, mailbox, model, misc_token, whitespace, lazy};
use crate::error::IMFError;
#[derive(Debug, PartialEq)]
pub struct ReceivedLog<'a>(pub &'a str);
impl<'a> TryFrom<&'a lazy::ReceivedLog<'a>> for ReceivedLog<'a> {
type Error = IMFError<'a>;
fn try_from(input: &'a lazy::ReceivedLog<'a>) -> Result<Self, Self::Error> {
received_body(input.0)
.map_err(|e| IMFError::ReceivedLog(e))
.map(|(_, v)| ReceivedLog(v))
}
}
pub fn received_body(input: &str) -> IResult<&str, &str> { pub fn received_body(input: &str) -> IResult<&str, &str> {
map( map(

View file

@ -0,0 +1,63 @@
use crate::fragments::eager;
use crate::multipass::field_lazy;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
pub fields: Vec<eager::Field<'a>>,
pub body: &'a [u8],
}
impl<'a> From <&'a field_lazy::Parsed<'a>> for Parsed<'a> {
fn from(p: &'a field_lazy::Parsed<'a>) -> Self {
Parsed {
fields: p.fields.iter().filter_map(|entry| entry.try_into().ok()).collect(),
body: p.body,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::fragments::lazy;
use crate::fragments::model;
use chrono::{FixedOffset, TimeZone};
#[test]
fn test_field_name() {
assert_eq!(Parsed::from(&field_lazy::Parsed {
fields: vec![
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")),
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
],
body: b"Hello world!",
}),
Parsed {
fields: vec![
eager::Field::From(vec![
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "hello".into(),
domain: "world.com".into()
}
},
model::MailboxRef {
name: None,
addrspec: model::AddrSpec {
local_part: "alice".into(),
domain: "wonderlands.com".into()
}
},
]),
eager::Field::Date(
FixedOffset::east_opt(0)
.unwrap()
.with_ymd_and_hms(1997, 03, 12, 7, 33, 25)
.unwrap()
),
],
body: b"Hello world!",
});
}
}

View file

@ -1,4 +1,5 @@
pub mod segment; pub mod segment;
pub mod guess_charset; pub mod guess_charset;
pub mod extract_fields; pub mod extract_fields;
pub mod parse_field_lazy; pub mod field_lazy;
pub mod field_eager;