refactor of lazy header parser

This commit is contained in:
Quentin 2023-06-20 15:56:06 +02:00
parent 972ce658eb
commit e9c7da850e
Signed by: quentin
GPG key ID: E9602264D639FF68
12 changed files with 263 additions and 113 deletions

View file

@ -4,4 +4,14 @@ use nom;
pub enum IMFError<'a> { pub enum IMFError<'a> {
Segment(nom::Err<nom::error::Error<&'a [u8]>>), Segment(nom::Err<nom::error::Error<&'a [u8]>>),
ExtractFields(nom::Err<nom::error::Error<&'a str>>), ExtractFields(nom::Err<nom::error::Error<&'a str>>),
DateTimeParse(nom::Err<nom::error::Error<&'a str>>),
DateTimeLogic,
Mailbox(nom::Err<nom::error::Error<&'a str>>),
MailboxList(nom::Err<nom::error::Error<&'a str>>),
AddressList(nom::Err<nom::error::Error<&'a str>>),
NullableAddressList(nom::Err<nom::error::Error<&'a str>>),
MessageID(nom::Err<nom::error::Error<&'a str>>),
MessageIDList(nom::Err<nom::error::Error<&'a str>>),
Unstructured(nom::Err<nom::error::Error<&'a str>>),
PhraseList(nom::Err<nom::error::Error<&'a str>>),
} }

View file

@ -7,10 +7,52 @@ use nom::{
sequence::tuple, sequence::tuple,
}; };
use crate::fragments::model::{GroupRef, AddressRef, MailboxRef}; use crate::fragments::lazy;
use crate::fragments::model::{GroupRef, AddressRef, MailboxRef, MailboxList, AddressList};
use crate::fragments::mailbox::{addr_spec, mailbox}; use crate::fragments::mailbox::{addr_spec, mailbox};
use crate::fragments::misc_token::phrase; use crate::fragments::misc_token::phrase;
use crate::fragments::whitespace::{cfws}; use crate::fragments::whitespace::{cfws};
use crate::error::IMFError;
impl<'a> TryFrom<lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>;
fn try_from(mx: lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
mailbox(mx.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::Mailbox(e))
}
}
impl<'a> TryFrom<lazy::MailboxList<'a>> for MailboxList {
type Error = IMFError<'a>;
fn try_from(ml: lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
mailbox_list(ml.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::MailboxList(e))
}
}
impl<'a> TryFrom<lazy::AddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(al: lazy::AddressList<'a>) -> Result<Self, Self::Error> {
address_list(al.0)
.map(|(_, a)| a)
.map_err(|e| IMFError::AddressList(e))
}
}
impl<'a> TryFrom<lazy::NullableAddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(nal: lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
opt(alt((address_list, address_list_cfws)))(nal.0)
.map(|(_, a)| a.unwrap_or(vec![]))
.map_err(|e| IMFError::NullableAddressList(e))
}
}
/// Address (section 3.4 of RFC5322) /// Address (section 3.4 of RFC5322)
/// ///

View file

@ -11,11 +11,25 @@ use nom::{
sequence::{preceded, terminated, tuple, delimited }, sequence::{preceded, terminated, tuple, delimited },
}; };
use crate::fragments::misc_token; use crate::fragments::misc_token;
use crate::fragments::lazy;
use crate::fragments::whitespace::{fws, cfws}; use crate::fragments::whitespace::{fws, cfws};
use crate::error::IMFError;
const MIN: i32 = 60; const MIN: i32 = 60;
const HOUR: i32 = 60 * MIN; const HOUR: i32 = 60 * MIN;
impl<'a> TryFrom<lazy::DateTime<'a>> for DateTime<FixedOffset> {
type Error = IMFError<'a>;
fn try_from(value: lazy::DateTime<'a>) -> Result<Self, Self::Error> {
match section(value.0) {
Ok((_, Some(dt))) => Ok(dt),
Err(e) => Err(IMFError::DateTimeParse(e)),
_ => Err(IMFError::DateTimeLogic),
}
}
}
/// Read datetime /// Read datetime
/// ///
/// ```abnf /// ```abnf

View file

@ -1,98 +0,0 @@
use std::convert::From;
use nom::{
IResult,
bytes::complete::{take_while1, tag},
character::complete::space0,
sequence::{terminated, tuple},
};
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
// 3.6.1. The Origination Date Field
Date(&'a str),
// 3.6.2. Originator Fields
From(&'a str),
Sender(&'a str),
ReplyTo(&'a str),
// 3.6.3. Destination Address Fields
To(&'a str),
Cc(&'a str),
Bcc(&'a str),
// 3.6.4. Identification Fields
MessageID(&'a str),
InReplyTo(&'a str),
References(&'a str),
// 3.6.5. Informational Fields
Subject(&'a str),
Comments(&'a str),
Keywords(&'a str),
// 3.6.6 Resent Fields (not implemented)
// 3.6.7 Trace Fields
Received(&'a str),
ReturnPath(&'a str),
// 3.6.8. Optional Fields
Optional(&'a str, &'a str),
// None
Rescue(&'a str),
}
use Field::*;
impl<'a> From<&'a str> for Field<'a> {
fn from(input: &'a str) -> Self {
match correct_field(input) {
Ok((_, field)) => field,
Err(_) => Rescue(input),
}
}
}
/// Optional field
///
/// ```abnf
/// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext
/// ftext = %d33-57 / ; Printable US-ASCII
/// %d59-126 ; characters not including
/// ; ":".
/// ```
fn field_name(input: &str) -> IResult<&str, &str> {
terminated(
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
tuple((space0, tag(":"), space0))
)(input)
}
fn correct_field(input: &str) -> IResult<&str, Field> {
field_name(input)
.map(|(rest, name)| ("", match name.to_lowercase().as_ref() {
"date" => Date(rest),
"from" => From(rest),
"sender" => Sender(rest),
"reply-to" => ReplyTo(rest),
"to" => To(rest),
"cc" => Cc(rest),
"bcc" => Bcc(rest),
"message-id" => MessageID(rest),
"in-reply-to" => InReplyTo(rest),
"references" => References(rest),
"subject" => Subject(rest),
"comments" => Comments(rest),
"keywords" => Keywords(rest),
"return-path" => ReturnPath(rest),
"received" => Received(rest),
_ => Optional(name, rest),
}))
}

View file

@ -4,13 +4,36 @@ use nom::{
branch::alt, branch::alt,
bytes::complete::{take_while, tag}, bytes::complete::{take_while, tag},
combinator::opt, combinator::opt,
multi::many1,
sequence::{delimited, pair, tuple}, sequence::{delimited, pair, tuple},
}; };
use crate::fragments::lazy;
use crate::fragments::whitespace::cfws; use crate::fragments::whitespace::cfws;
use crate::fragments::words::dot_atom_text; use crate::fragments::words::dot_atom_text;
use crate::fragments::mailbox::is_dtext; use crate::fragments::mailbox::is_dtext;
use crate::fragments::model::MessageId; use crate::fragments::model::{MessageId, MessageIdList};
use crate::error::IMFError;
impl<'a> TryFrom<lazy::Identifier<'a>> for MessageId<'a> {
type Error = IMFError<'a>;
fn try_from(id: lazy::Identifier<'a>) -> Result<Self, Self::Error> {
msg_id(id.0)
.map(|(_, i)| i)
.map_err(|e| IMFError::MessageID(e))
}
}
impl<'a> TryFrom<lazy::IdentifierList<'a>> for MessageIdList<'a> {
type Error = IMFError<'a>;
fn try_from(id: lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
many1(msg_id)(id.0)
.map(|(_, i)| i)
.map_err(|e| IMFError::MessageIDList(e))
}
}
/// Message identifier /// Message identifier
/// ///

131
src/fragments/lazy.rs Normal file
View file

@ -0,0 +1,131 @@
use std::convert::From;
use nom::{
IResult,
bytes::complete::{take_while1, tag},
character::complete::space0,
sequence::{terminated, tuple},
};
#[derive(Debug, PartialEq)]
pub struct DateTime<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct MailboxList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Mailbox<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct AddressList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct NullableAddressList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Identifier<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct IdentifierList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Unstructured<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct PhraseList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct ReceivedLog<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Path<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
// 3.6.1. The Origination Date Field
Date(DateTime<'a>),
// 3.6.2. Originator Fields
From(MailboxList<'a>),
Sender(Mailbox<'a>),
ReplyTo(AddressList<'a>),
// 3.6.3. Destination Address Fields
To(AddressList<'a>),
Cc(AddressList<'a>),
Bcc(NullableAddressList<'a>),
// 3.6.4. Identification Fields
MessageID(Identifier<'a>),
InReplyTo(IdentifierList<'a>),
References(IdentifierList<'a>),
// 3.6.5. Informational Fields
Subject(Unstructured<'a>),
Comments(Unstructured<'a>),
Keywords(PhraseList<'a>),
// 3.6.6 Resent Fields (not implemented)
// 3.6.7 Trace Fields
Received(ReceivedLog<'a>),
ReturnPath(Mailbox<'a>),
// 3.6.8. Optional Fields
Optional(&'a str, &'a str),
// None
Rescue(&'a str),
}
use Field::*;
impl<'a> From<&'a str> for Field<'a> {
fn from(input: &'a str) -> Self {
match correct_field(input) {
Ok((_, field)) => field,
Err(_) => Rescue(input),
}
}
}
/// Optional field
///
/// ```abnf
/// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext
/// ftext = %d33-57 / ; Printable US-ASCII
/// %d59-126 ; characters not including
/// ; ":".
/// ```
fn field_name(input: &str) -> IResult<&str, &str> {
terminated(
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
tuple((space0, tag(":"), space0))
)(input)
}
fn correct_field(input: &str) -> IResult<&str, Field> {
field_name(input)
.map(|(rest, name)| ("", match name.to_lowercase().as_ref() {
"date" => Date(DateTime(rest)),
"from" => From(MailboxList(rest)),
"sender" => Sender(Mailbox(rest)),
"reply-to" => ReplyTo(AddressList(rest)),
"to" => To(AddressList(rest)),
"cc" => Cc(AddressList(rest)),
"bcc" => Bcc(NullableAddressList(rest)),
"message-id" => MessageID(Identifier(rest)),
"in-reply-to" => InReplyTo(IdentifierList(rest)),
"references" => References(IdentifierList(rest)),
"subject" => Subject(Unstructured(rest)),
"comments" => Comments(Unstructured(rest)),
"keywords" => Keywords(PhraseList(rest)),
"return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)),
_ => Optional(name, rest),
}))
}

View file

@ -2,16 +2,41 @@ use std::borrow::Cow;
use nom::{ use nom::{
IResult, IResult,
branch::alt, branch::alt,
bytes::complete::take_while1, bytes::complete::{take_while1, tag},
character::complete::space0, character::complete::space0,
combinator::{into, opt}, combinator::{into, opt},
multi::{many0, many1}, multi::{many0, many1, separated_list1},
sequence::{pair, tuple}, sequence::{pair, tuple},
}; };
use crate::fragments::lazy;
use crate::fragments::quoted::quoted_string; use crate::fragments::quoted::quoted_string;
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl}; use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
use crate::fragments::words::{atom, is_vchar}; use crate::fragments::words::{atom, is_vchar};
use crate::error::IMFError;
type Unstructured = String;
type PhraseList = Vec<String>;
impl<'a> TryFrom<lazy::Unstructured<'a>> for Unstructured {
type Error = IMFError<'a>;
fn try_from(input: lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
unstructured(input.0)
.map(|(_, v)| v)
.map_err(|e| IMFError::Unstructured(e))
}
}
impl<'a> TryFrom<lazy::PhraseList<'a>> for PhraseList {
type Error = IMFError<'a>;
fn try_from(p: lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
separated_list1(tag(","), phrase)(p.0)
.map(|(_, q)| q)
.map_err(|e| IMFError::PhraseList(e))
}
}
/// Word /// Word
/// ///

View file

@ -13,7 +13,7 @@ mod address;
mod identification; mod identification;
mod trace; mod trace;
mod datetime; mod datetime;
pub mod field_raw; pub mod lazy;
// Header blocks // Header blocks
pub mod header; pub mod header;

View file

@ -26,6 +26,7 @@ impl From<AddrSpec> for MailboxRef {
} }
} }
} }
pub type MailboxList = Vec<MailboxRef>;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct GroupRef { pub struct GroupRef {
@ -48,12 +49,14 @@ impl From<GroupRef> for AddressRef {
AddressRef::Many(grp) AddressRef::Many(grp)
} }
} }
pub type AddressList = Vec<AddressRef>;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct MessageId<'a> { pub struct MessageId<'a> {
pub left: &'a str, pub left: &'a str,
pub right: &'a str, pub right: &'a str,
} }
pub type MessageIdList<'a> = Vec<MessageId<'a>>;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum FieldBody<'a, T> { pub enum FieldBody<'a, T> {

View file

@ -1,4 +1,4 @@
pub mod segment; pub mod segment;
pub mod guess_charset; pub mod guess_charset;
pub mod extract_fields; pub mod extract_fields;
pub mod parse_field_names; pub mod parse_field_lazy;

View file

@ -1,15 +1,15 @@
use crate::fragments::field_raw; use crate::fragments::lazy;
use crate::multipass::extract_fields::ExtractFields; use crate::multipass::extract_fields::ExtractFields;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct ParseFieldName<'a> { pub struct Parsed<'a> {
pub fields: Vec<field_raw::Field<'a>>, pub fields: Vec<lazy::Field<'a>>,
pub body: &'a [u8], pub body: &'a [u8],
} }
impl<'a> From <&'a ExtractFields<'a>> for ParseFieldName<'a> { impl<'a> From <&'a ExtractFields<'a>> for Parsed<'a> {
fn from(ef: &'a ExtractFields<'a>) -> Self { fn from(ef: &'a ExtractFields<'a>) -> Self {
ParseFieldName { Parsed {
fields: ef.fields.iter().map(|e| (*e).into()).collect(), fields: ef.fields.iter().map(|e| (*e).into()).collect(),
body: ef.body, body: ef.body,
} }
@ -22,17 +22,17 @@ mod tests {
#[test] #[test]
fn test_field_name() { fn test_field_name() {
assert_eq!(ParseFieldName::from(&ExtractFields { assert_eq!(Parsed::from(&ExtractFields {
fields: vec![ fields: vec![
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n", "From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
"Date: 12 Mar 1997 07:33:25 Z\r\n", "Date: 12 Mar 1997 07:33:25 Z\r\n",
], ],
body: b"Hello world!", body: b"Hello world!",
}), }),
ParseFieldName { Parsed {
fields: vec![ fields: vec![
field_raw::Field::From("hello@world.com,\r\n\talice@wonderlands.com\r\n"), lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")),
field_raw::Field::Date("12 Mar 1997 07:33:25 Z\r\n"), lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
], ],
body: b"Hello world!", body: b"Hello world!",
}); });