working field parsing

This commit is contained in:
Quentin 2023-07-20 09:41:10 +02:00
parent cd5289c8c1
commit b7ad47b41e
Signed by: quentin
GPG key ID: E9602264D639FF68
5 changed files with 72 additions and 74 deletions

View file

@ -1,7 +1,7 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
combinator::{into, opt}, combinator::{into, map, opt},
multi::separated_list1, multi::separated_list1,
sequence::tuple, sequence::tuple,
IResult, IResult,
@ -140,7 +140,7 @@ pub fn address_list_cfws(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
Ok((input, vec![])) Ok((input, vec![]))
} }
pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<>> { pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
map( map(
opt(alt((address_list, address_list_cfws))), opt(alt((address_list, address_list_cfws))),
|v| v.unwrap_or(vec![]), |v| v.unwrap_or(vec![]),

View file

@ -1,32 +1,40 @@
use chrono::{DateTime, FixedOffset};
use nom::{ use nom::{
IResult, IResult,
branch::alt,
bytes::complete::{tag, tag_no_case, take_while1},
character::complete::space0,
combinator::map,
sequence::{pair, preceded, terminated, tuple},
}; };
use crate::rfc5322::address::{MailboxList, AddressList}; use crate::rfc5322::address::{AddressList, address_list, nullable_address_list, mailbox_list};
use crate::rfc5322::mailbox::MailboxRef; use crate::rfc5322::datetime::section as date;
use crate::rfc5322::identification::{MessageId, MessageIdList}; use crate::rfc5322::mailbox::{MailboxRef, MailboxList, AddrSpec, mailbox};
use crate::rfc5322::trace::ReceivedLog; use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list};
use crate::text::misc_token::{Unstructured, PhraseList}; use crate::rfc5322::trace::{ReceivedLog, return_path, received_log};
use crate::rfc5322::mime::{Version, version};
use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Field<'a> { pub enum Field<'a> {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
Date(DateTime<'a>), Date(Option<DateTime<FixedOffset>>),
// 3.6.2. Originator Fields // 3.6.2. Originator Fields
From(MailboxList<'a>), From(MailboxList<'a>),
Sender(Mailbox<'a>), Sender(MailboxRef<'a>),
ReplyTo(AddressList<'a>), ReplyTo(AddressList<'a>),
// 3.6.3. Destination Address Fields // 3.6.3. Destination Address Fields
To(AddressList<'a>), To(AddressList<'a>),
Cc(AddressList<'a>), Cc(AddressList<'a>),
Bcc(NullableAddressList<'a>), Bcc(AddressList<'a>),
// 3.6.4. Identification Fields // 3.6.4. Identification Fields
MessageID(Identifier<'a>), MessageID(MessageID<'a>),
InReplyTo(IdentifierList<'a>), InReplyTo(MessageIDList<'a>),
References(IdentifierList<'a>), References(MessageIDList<'a>),
// 3.6.5. Informational Fields // 3.6.5. Informational Fields
Subject(Unstructured<'a>), Subject(Unstructured<'a>),
@ -38,39 +46,46 @@ pub enum Field<'a> {
Received(ReceivedLog<'a>), Received(ReceivedLog<'a>),
ReturnPath(Option<AddrSpec<'a>>), ReturnPath(Option<AddrSpec<'a>>),
MIMEVersion(Version<'a>), MIMEVersion(Version),
Optional(&'a [u8], Unstructured<'a>),
} }
pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> { pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
let (name, rest) = field_name(input)?; alt((
match name.to_lowercase().as_ref() { preceded(field_name(b"date"), map(date, Field::Date)),
"date" => datetime::section(rest).map(Field::Date),
"from" => mailbox_list(rest).map(Field::From),
"sender" => mailbox(rest).map(Field::Sender),
"reply-to" => address_list(rest).map(Field::ReplyTo),
"to" => address_list(rest).map(Field::To), preceded(field_name(b"from"), map(mailbox_list, Field::From)),
"cc" => address_list(rest).map(Field::Cc), preceded(field_name(b"sender"), map(mailbox, Field::Sender)),
"bcc" => nullable_address_list(rest).map(Field::Bcc), preceded(field_name(b"reply-to"), map(address_list, Field::ReplyTo)),
"message-id" => msg_id(rest).map(Field::MessageID), preceded(field_name(b"to"), map(address_list, Field::To)),
"in-reply-to" => msg_list(rest).map(Field::InReplyTo), preceded(field_name(b"cc"), map(address_list, Field::Cc)),
"references" => msg_list(rest).map(Field::References), preceded(field_name(b"bcc"), map(nullable_address_list, Field::Bcc)),
"subject" => unstructured(rest).map(Field::Subject), preceded(field_name(b"message-id"), map(msg_id, Field::MessageID)),
"comments" => unstructured(rest).map(Field::Comments), preceded(field_name(b"in-reply-to"), map(msg_list, Field::InReplyTo)),
"keywords" => phrase_list(rest).map(Field::Keywords), preceded(field_name(b"references"), map(msg_list, Field::References)),
"return-path" => return_path(rest).map(Field::ReturnPath), preceded(field_name(b"subject"), map(unstructured, Field::Subject)),
"received" => received_log(rest).map(Field::ReceivedLog), preceded(field_name(b"comments"), map(unstructured, Field::Comments)),
preceded(field_name(b"keywords"), map(phrase_list, Field::Keywords)),
"mime-version" => version(rest).map(Field::MIMEVersion), preceded(field_name(b"return-path"), map(return_path, Field::ReturnPath)),
_ => unstructured(rest).map(|v| Field::Optional(name, v)), preceded(field_name(b"received"), map(received_log, Field::Received)),
}
preceded(field_name(b"mime-version"), map(version, Field::MIMEVersion)),
))(input)
} }
fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input| {
terminated(
tag_no_case(name),
tuple((space0, tag(b":"), space0)),
)(input)
}
}
/// Optional field /// Optional field
/// ///
/// ```abnf /// ```abnf
@ -80,9 +95,12 @@ pub fn field(input: &[u8]) -> IResult<&[u8], Field<'a>> {
/// %d59-126 ; characters not including /// %d59-126 ; characters not including
/// ; ":". /// ; ":".
/// ``` /// ```
fn field_name(input: &[u8]) -> IResult<&[u8], &[u8]> { fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> {
pair(
terminated( terminated(
take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A), take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
tuple((space0, tag(b":"), space0)), tuple((space0, tag(b":"), space0)),
),
unstructured,
)(input) )(input)
} }

View file

@ -2,6 +2,7 @@ use nom::{
branch::alt, branch::alt,
bytes::complete::{tag, take_while}, bytes::complete::{tag, take_while},
combinator::opt, combinator::opt,
multi::many1,
sequence::{delimited, pair, tuple}, sequence::{delimited, pair, tuple},
IResult, IResult,
}; };
@ -12,27 +13,27 @@ use crate::text::words::dot_atom_text;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct MessageId<'a> { pub struct MessageID<'a> {
pub left: &'a [u8], pub left: &'a [u8],
pub right: &'a [u8], pub right: &'a [u8],
} }
pub type MessageIdList<'a> = Vec<MessageId<'a>>; pub type MessageIDList<'a> = Vec<MessageID<'a>>;
/// Message identifier /// Message identifier
/// ///
/// ```abnf /// ```abnf
/// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] /// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
/// ``` /// ```
pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageId> { pub fn msg_id(input: &[u8]) -> IResult<&[u8], MessageID> {
let (input, (left, _, right)) = delimited( let (input, (left, _, right)) = delimited(
pair(opt(cfws), tag("<")), pair(opt(cfws), tag("<")),
tuple((id_left, tag("@"), id_right)), tuple((id_left, tag("@"), id_right)),
pair(tag(">"), opt(cfws)), pair(tag(">"), opt(cfws)),
)(input)?; )(input)?;
Ok((input, MessageId { left, right })) Ok((input, MessageID { left, right }))
} }
pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIdList> { pub fn msg_list(input: &[u8]) -> IResult<&[u8], MessageIDList> {
many1(msg_id)(input) many1(msg_id)(input)
} }
@ -60,7 +61,7 @@ mod tests {
msg_id(b"<5678.21-Nov-1997@example.com>"), msg_id(b"<5678.21-Nov-1997@example.com>"),
Ok(( Ok((
&b""[..], &b""[..],
MessageId { MessageID {
left: &b"5678.21-Nov-1997"[..], left: &b"5678.21-Nov-1997"[..],
right: &b"example.com"[..], right: &b"example.com"[..],
} }

View file

@ -85,7 +85,7 @@ mod tests {
for <me@example.com>; Tue, 13 Jun 2023 19:01:08 +0000"#.as_bytes(); for <me@example.com>; Tue, 13 Jun 2023 19:01:08 +0000"#.as_bytes();
assert_eq!( assert_eq!(
received_body(hdrs), received_log(hdrs),
Ok(( Ok((
&b""[..], &b""[..],
ReceivedLog { ReceivedLog {

View file

@ -1,9 +1,9 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::take_while1, bytes::complete::{tag, take_while1},
character::complete::space0, character::complete::space0,
combinator::{map, opt}, combinator::{map, opt},
multi::{many0, many1}, multi::{many0, many1, separated_list1},
sequence::{preceded}, sequence::{preceded},
IResult, IResult,
}; };
@ -17,32 +17,11 @@ use crate::text::{
}; };
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct PhraseList(pub Vec<String>); pub struct PhraseList<'a>(pub Vec<Phrase<'a>>);
pub fn phrase_list(input: &'a [u8]) -> IResult<&[u8], PhraseList> { pub fn phrase_list(input: &[u8]) -> IResult<&[u8], PhraseList> {
separated_list1(tag(","), phrase)(input) map(separated_list1(tag(","), phrase), PhraseList)(input)
} }
/*
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
type Error = IMFError<'a>;
fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
unstructured(input.0)
.map(|(_, v)| Unstructured(v))
.map_err(|e| IMFError::Unstructured(e))
}
}
impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
type Error = IMFError<'a>;
fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
separated_list1(tag(","), phrase)(p.0)
.map(|(_, q)| PhraseList(q))
.map_err(|e| IMFError::PhraseList(e))
}
}*/
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Word<'a> { pub enum Word<'a> {
Quoted(QuotedString<'a>), Quoted(QuotedString<'a>),