refactor of lazy header parser
This commit is contained in:
parent
972ce658eb
commit
e9c7da850e
12 changed files with 263 additions and 113 deletions
10
src/error.rs
10
src/error.rs
|
@ -4,4 +4,14 @@ use nom;
|
|||
pub enum IMFError<'a> {
|
||||
Segment(nom::Err<nom::error::Error<&'a [u8]>>),
|
||||
ExtractFields(nom::Err<nom::error::Error<&'a str>>),
|
||||
DateTimeParse(nom::Err<nom::error::Error<&'a str>>),
|
||||
DateTimeLogic,
|
||||
Mailbox(nom::Err<nom::error::Error<&'a str>>),
|
||||
MailboxList(nom::Err<nom::error::Error<&'a str>>),
|
||||
AddressList(nom::Err<nom::error::Error<&'a str>>),
|
||||
NullableAddressList(nom::Err<nom::error::Error<&'a str>>),
|
||||
MessageID(nom::Err<nom::error::Error<&'a str>>),
|
||||
MessageIDList(nom::Err<nom::error::Error<&'a str>>),
|
||||
Unstructured(nom::Err<nom::error::Error<&'a str>>),
|
||||
PhraseList(nom::Err<nom::error::Error<&'a str>>),
|
||||
}
|
||||
|
|
|
@ -7,10 +7,52 @@ use nom::{
|
|||
sequence::tuple,
|
||||
};
|
||||
|
||||
use crate::fragments::model::{GroupRef, AddressRef, MailboxRef};
|
||||
use crate::fragments::lazy;
|
||||
use crate::fragments::model::{GroupRef, AddressRef, MailboxRef, MailboxList, AddressList};
|
||||
use crate::fragments::mailbox::{addr_spec, mailbox};
|
||||
use crate::fragments::misc_token::phrase;
|
||||
use crate::fragments::whitespace::{cfws};
|
||||
use crate::error::IMFError;
|
||||
|
||||
impl<'a> TryFrom<lazy::Mailbox<'a>> for MailboxRef {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(mx: lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
|
||||
mailbox(mx.0)
|
||||
.map(|(_, m)| m)
|
||||
.map_err(|e| IMFError::Mailbox(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<lazy::MailboxList<'a>> for MailboxList {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(ml: lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
|
||||
mailbox_list(ml.0)
|
||||
.map(|(_, m)| m)
|
||||
.map_err(|e| IMFError::MailboxList(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<lazy::AddressList<'a>> for AddressList {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(al: lazy::AddressList<'a>) -> Result<Self, Self::Error> {
|
||||
address_list(al.0)
|
||||
.map(|(_, a)| a)
|
||||
.map_err(|e| IMFError::AddressList(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<lazy::NullableAddressList<'a>> for AddressList {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(nal: lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
|
||||
opt(alt((address_list, address_list_cfws)))(nal.0)
|
||||
.map(|(_, a)| a.unwrap_or(vec![]))
|
||||
.map_err(|e| IMFError::NullableAddressList(e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Address (section 3.4 of RFC5322)
|
||||
///
|
||||
|
|
|
@ -11,11 +11,25 @@ use nom::{
|
|||
sequence::{preceded, terminated, tuple, delimited },
|
||||
};
|
||||
use crate::fragments::misc_token;
|
||||
use crate::fragments::lazy;
|
||||
use crate::fragments::whitespace::{fws, cfws};
|
||||
use crate::error::IMFError;
|
||||
|
||||
const MIN: i32 = 60;
|
||||
const HOUR: i32 = 60 * MIN;
|
||||
|
||||
impl<'a> TryFrom<lazy::DateTime<'a>> for DateTime<FixedOffset> {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(value: lazy::DateTime<'a>) -> Result<Self, Self::Error> {
|
||||
match section(value.0) {
|
||||
Ok((_, Some(dt))) => Ok(dt),
|
||||
Err(e) => Err(IMFError::DateTimeParse(e)),
|
||||
_ => Err(IMFError::DateTimeLogic),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read datetime
|
||||
///
|
||||
/// ```abnf
|
||||
|
|
|
@ -1,98 +0,0 @@
|
|||
use std::convert::From;
|
||||
|
||||
use nom::{
|
||||
IResult,
|
||||
bytes::complete::{take_while1, tag},
|
||||
character::complete::space0,
|
||||
sequence::{terminated, tuple},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Field<'a> {
|
||||
// 3.6.1. The Origination Date Field
|
||||
Date(&'a str),
|
||||
|
||||
// 3.6.2. Originator Fields
|
||||
From(&'a str),
|
||||
Sender(&'a str),
|
||||
ReplyTo(&'a str),
|
||||
|
||||
// 3.6.3. Destination Address Fields
|
||||
To(&'a str),
|
||||
Cc(&'a str),
|
||||
Bcc(&'a str),
|
||||
|
||||
// 3.6.4. Identification Fields
|
||||
MessageID(&'a str),
|
||||
InReplyTo(&'a str),
|
||||
References(&'a str),
|
||||
|
||||
// 3.6.5. Informational Fields
|
||||
Subject(&'a str),
|
||||
Comments(&'a str),
|
||||
Keywords(&'a str),
|
||||
|
||||
// 3.6.6 Resent Fields (not implemented)
|
||||
// 3.6.7 Trace Fields
|
||||
Received(&'a str),
|
||||
ReturnPath(&'a str),
|
||||
|
||||
// 3.6.8. Optional Fields
|
||||
Optional(&'a str, &'a str),
|
||||
|
||||
// None
|
||||
Rescue(&'a str),
|
||||
}
|
||||
use Field::*;
|
||||
|
||||
impl<'a> From<&'a str> for Field<'a> {
|
||||
fn from(input: &'a str) -> Self {
|
||||
match correct_field(input) {
|
||||
Ok((_, field)) => field,
|
||||
Err(_) => Rescue(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Optional field
|
||||
///
|
||||
/// ```abnf
|
||||
/// field = field-name ":" unstructured CRLF
|
||||
/// field-name = 1*ftext
|
||||
/// ftext = %d33-57 / ; Printable US-ASCII
|
||||
/// %d59-126 ; characters not including
|
||||
/// ; ":".
|
||||
/// ```
|
||||
fn field_name(input: &str) -> IResult<&str, &str> {
|
||||
terminated(
|
||||
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
|
||||
tuple((space0, tag(":"), space0))
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn correct_field(input: &str) -> IResult<&str, Field> {
|
||||
field_name(input)
|
||||
.map(|(rest, name)| ("", match name.to_lowercase().as_ref() {
|
||||
"date" => Date(rest),
|
||||
|
||||
"from" => From(rest),
|
||||
"sender" => Sender(rest),
|
||||
"reply-to" => ReplyTo(rest),
|
||||
|
||||
"to" => To(rest),
|
||||
"cc" => Cc(rest),
|
||||
"bcc" => Bcc(rest),
|
||||
|
||||
"message-id" => MessageID(rest),
|
||||
"in-reply-to" => InReplyTo(rest),
|
||||
"references" => References(rest),
|
||||
|
||||
"subject" => Subject(rest),
|
||||
"comments" => Comments(rest),
|
||||
"keywords" => Keywords(rest),
|
||||
|
||||
"return-path" => ReturnPath(rest),
|
||||
"received" => Received(rest),
|
||||
|
||||
_ => Optional(name, rest),
|
||||
}))
|
||||
}
|
|
@ -4,13 +4,36 @@ use nom::{
|
|||
branch::alt,
|
||||
bytes::complete::{take_while, tag},
|
||||
combinator::opt,
|
||||
multi::many1,
|
||||
sequence::{delimited, pair, tuple},
|
||||
};
|
||||
|
||||
use crate::fragments::lazy;
|
||||
use crate::fragments::whitespace::cfws;
|
||||
use crate::fragments::words::dot_atom_text;
|
||||
use crate::fragments::mailbox::is_dtext;
|
||||
use crate::fragments::model::MessageId;
|
||||
use crate::fragments::model::{MessageId, MessageIdList};
|
||||
use crate::error::IMFError;
|
||||
|
||||
impl<'a> TryFrom<lazy::Identifier<'a>> for MessageId<'a> {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(id: lazy::Identifier<'a>) -> Result<Self, Self::Error> {
|
||||
msg_id(id.0)
|
||||
.map(|(_, i)| i)
|
||||
.map_err(|e| IMFError::MessageID(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<lazy::IdentifierList<'a>> for MessageIdList<'a> {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(id: lazy::IdentifierList<'a>) -> Result<Self, Self::Error> {
|
||||
many1(msg_id)(id.0)
|
||||
.map(|(_, i)| i)
|
||||
.map_err(|e| IMFError::MessageIDList(e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Message identifier
|
||||
///
|
||||
|
|
131
src/fragments/lazy.rs
Normal file
131
src/fragments/lazy.rs
Normal file
|
@ -0,0 +1,131 @@
|
|||
use std::convert::From;
|
||||
|
||||
use nom::{
|
||||
IResult,
|
||||
bytes::complete::{take_while1, tag},
|
||||
character::complete::space0,
|
||||
sequence::{terminated, tuple},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct DateTime<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MailboxList<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Mailbox<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct AddressList<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct NullableAddressList<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Identifier<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct IdentifierList<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Unstructured<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct PhraseList<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ReceivedLog<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Path<'a>(pub &'a str);
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Field<'a> {
|
||||
// 3.6.1. The Origination Date Field
|
||||
Date(DateTime<'a>),
|
||||
|
||||
// 3.6.2. Originator Fields
|
||||
From(MailboxList<'a>),
|
||||
Sender(Mailbox<'a>),
|
||||
ReplyTo(AddressList<'a>),
|
||||
|
||||
// 3.6.3. Destination Address Fields
|
||||
To(AddressList<'a>),
|
||||
Cc(AddressList<'a>),
|
||||
Bcc(NullableAddressList<'a>),
|
||||
|
||||
// 3.6.4. Identification Fields
|
||||
MessageID(Identifier<'a>),
|
||||
InReplyTo(IdentifierList<'a>),
|
||||
References(IdentifierList<'a>),
|
||||
|
||||
// 3.6.5. Informational Fields
|
||||
Subject(Unstructured<'a>),
|
||||
Comments(Unstructured<'a>),
|
||||
Keywords(PhraseList<'a>),
|
||||
|
||||
// 3.6.6 Resent Fields (not implemented)
|
||||
// 3.6.7 Trace Fields
|
||||
Received(ReceivedLog<'a>),
|
||||
ReturnPath(Mailbox<'a>),
|
||||
|
||||
// 3.6.8. Optional Fields
|
||||
Optional(&'a str, &'a str),
|
||||
|
||||
// None
|
||||
Rescue(&'a str),
|
||||
}
|
||||
use Field::*;
|
||||
|
||||
impl<'a> From<&'a str> for Field<'a> {
|
||||
fn from(input: &'a str) -> Self {
|
||||
match correct_field(input) {
|
||||
Ok((_, field)) => field,
|
||||
Err(_) => Rescue(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Optional field
|
||||
///
|
||||
/// ```abnf
|
||||
/// field = field-name ":" unstructured CRLF
|
||||
/// field-name = 1*ftext
|
||||
/// ftext = %d33-57 / ; Printable US-ASCII
|
||||
/// %d59-126 ; characters not including
|
||||
/// ; ":".
|
||||
/// ```
|
||||
fn field_name(input: &str) -> IResult<&str, &str> {
|
||||
terminated(
|
||||
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
|
||||
tuple((space0, tag(":"), space0))
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn correct_field(input: &str) -> IResult<&str, Field> {
|
||||
field_name(input)
|
||||
.map(|(rest, name)| ("", match name.to_lowercase().as_ref() {
|
||||
"date" => Date(DateTime(rest)),
|
||||
|
||||
"from" => From(MailboxList(rest)),
|
||||
"sender" => Sender(Mailbox(rest)),
|
||||
"reply-to" => ReplyTo(AddressList(rest)),
|
||||
|
||||
"to" => To(AddressList(rest)),
|
||||
"cc" => Cc(AddressList(rest)),
|
||||
"bcc" => Bcc(NullableAddressList(rest)),
|
||||
|
||||
"message-id" => MessageID(Identifier(rest)),
|
||||
"in-reply-to" => InReplyTo(IdentifierList(rest)),
|
||||
"references" => References(IdentifierList(rest)),
|
||||
|
||||
"subject" => Subject(Unstructured(rest)),
|
||||
"comments" => Comments(Unstructured(rest)),
|
||||
"keywords" => Keywords(PhraseList(rest)),
|
||||
|
||||
"return-path" => ReturnPath(Mailbox(rest)),
|
||||
"received" => Received(ReceivedLog(rest)),
|
||||
|
||||
_ => Optional(name, rest),
|
||||
}))
|
||||
}
|
|
@ -2,16 +2,41 @@ use std::borrow::Cow;
|
|||
use nom::{
|
||||
IResult,
|
||||
branch::alt,
|
||||
bytes::complete::take_while1,
|
||||
bytes::complete::{take_while1, tag},
|
||||
character::complete::space0,
|
||||
combinator::{into, opt},
|
||||
multi::{many0, many1},
|
||||
multi::{many0, many1, separated_list1},
|
||||
sequence::{pair, tuple},
|
||||
};
|
||||
|
||||
use crate::fragments::lazy;
|
||||
use crate::fragments::quoted::quoted_string;
|
||||
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
|
||||
use crate::fragments::words::{atom, is_vchar};
|
||||
use crate::error::IMFError;
|
||||
|
||||
type Unstructured = String;
|
||||
type PhraseList = Vec<String>;
|
||||
|
||||
impl<'a> TryFrom<lazy::Unstructured<'a>> for Unstructured {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(input: lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
|
||||
unstructured(input.0)
|
||||
.map(|(_, v)| v)
|
||||
.map_err(|e| IMFError::Unstructured(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<lazy::PhraseList<'a>> for PhraseList {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(p: lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
|
||||
separated_list1(tag(","), phrase)(p.0)
|
||||
.map(|(_, q)| q)
|
||||
.map_err(|e| IMFError::PhraseList(e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Word
|
||||
///
|
||||
|
|
|
@ -13,7 +13,7 @@ mod address;
|
|||
mod identification;
|
||||
mod trace;
|
||||
mod datetime;
|
||||
pub mod field_raw;
|
||||
pub mod lazy;
|
||||
|
||||
// Header blocks
|
||||
pub mod header;
|
||||
|
|
|
@ -26,6 +26,7 @@ impl From<AddrSpec> for MailboxRef {
|
|||
}
|
||||
}
|
||||
}
|
||||
pub type MailboxList = Vec<MailboxRef>;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct GroupRef {
|
||||
|
@ -48,12 +49,14 @@ impl From<GroupRef> for AddressRef {
|
|||
AddressRef::Many(grp)
|
||||
}
|
||||
}
|
||||
pub type AddressList = Vec<AddressRef>;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MessageId<'a> {
|
||||
pub left: &'a str,
|
||||
pub right: &'a str,
|
||||
}
|
||||
pub type MessageIdList<'a> = Vec<MessageId<'a>>;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum FieldBody<'a, T> {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
pub mod segment;
|
||||
pub mod guess_charset;
|
||||
pub mod extract_fields;
|
||||
pub mod parse_field_names;
|
||||
pub mod parse_field_lazy;
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
use crate::fragments::field_raw;
|
||||
use crate::fragments::lazy;
|
||||
use crate::multipass::extract_fields::ExtractFields;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ParseFieldName<'a> {
|
||||
pub fields: Vec<field_raw::Field<'a>>,
|
||||
pub struct Parsed<'a> {
|
||||
pub fields: Vec<lazy::Field<'a>>,
|
||||
pub body: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> From <&'a ExtractFields<'a>> for ParseFieldName<'a> {
|
||||
impl<'a> From <&'a ExtractFields<'a>> for Parsed<'a> {
|
||||
fn from(ef: &'a ExtractFields<'a>) -> Self {
|
||||
ParseFieldName {
|
||||
Parsed {
|
||||
fields: ef.fields.iter().map(|e| (*e).into()).collect(),
|
||||
body: ef.body,
|
||||
}
|
||||
|
@ -22,17 +22,17 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_field_name() {
|
||||
assert_eq!(ParseFieldName::from(&ExtractFields {
|
||||
assert_eq!(Parsed::from(&ExtractFields {
|
||||
fields: vec![
|
||||
"From: hello@world.com,\r\n\talice@wonderlands.com\r\n",
|
||||
"Date: 12 Mar 1997 07:33:25 Z\r\n",
|
||||
],
|
||||
body: b"Hello world!",
|
||||
}),
|
||||
ParseFieldName {
|
||||
Parsed {
|
||||
fields: vec![
|
||||
field_raw::Field::From("hello@world.com,\r\n\talice@wonderlands.com\r\n"),
|
||||
field_raw::Field::Date("12 Mar 1997 07:33:25 Z\r\n"),
|
||||
lazy::Field::From(lazy::MailboxList("hello@world.com,\r\n\talice@wonderlands.com\r\n")),
|
||||
lazy::Field::Date(lazy::DateTime("12 Mar 1997 07:33:25 Z\r\n")),
|
||||
],
|
||||
body: b"Hello world!",
|
||||
});
|
Loading…
Reference in a new issue