extract common field logic to header

This commit is contained in:
Quentin 2023-07-20 16:26:59 +02:00
parent e2d9d03ef2
commit ec937bf11d
Signed by: quentin
GPG key ID: E9602264D639FF68
8 changed files with 84 additions and 371 deletions

55
src/header.rs Normal file
View file

@ -0,0 +1,55 @@
use nom::{
IResult,
bytes::complete::{tag_no_case, tag, take_while1},
character::complete::space0,
sequence::{pair, terminated, tuple},
};
use crate::text::misc_token::{Unstructured, unstructured};
#[derive(Debug, PartialEq)]
pub enum CompField<'a, T> {
Known(T),
Unknown(&'a [u8], Unstructured<'a>),
Bad(&'a [u8]),
}
#[derive(Debug, PartialEq)]
pub struct CompFieldList<'a, T>(pub Vec<CompField<'a, T>>);
impl<'a, T> CompFieldList<'a, T> {
pub fn known(self) -> Vec<T> {
self.0.into_iter().map(|v| match v {
CompField::Known(f) => Some(f),
_ => None,
}).flatten().collect()
}
}
pub fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input| {
terminated(
tag_no_case(name),
tuple((space0, tag(b":"), space0)),
)(input)
}
}
/// Optional field
///
/// ```abnf
/// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext
/// ftext = %d33-57 / ; Printable US-ASCII
/// %d59-126 ; characters not including
/// ; ":".
/// ```
pub fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> {
pair(
terminated(
take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
tuple((space0, tag(b":"), space0)),
),
unstructured,
)(input)
}

View file

@ -1,27 +0,0 @@
use nom::{
self,
combinator::{all_consuming, recognize},
multi::many0,
sequence::terminated,
IResult,
};
use crate::text::whitespace::{foldable_line, line, obs_crlf};
pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
let (body, hdrs) = segment(input)?;
let (_, fields) = fields(hdrs)?;
Ok((body, fields))
}
// -- part 1, segment
fn segment(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(recognize(many0(line)), obs_crlf)(input)
}
// -- part 2, isolate fields
fn fields(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
let (rest, parsed) = all_consuming(many0(foldable_line))(input)?;
Ok((rest, parsed))
}

View file

@ -1,5 +1,5 @@
pub mod error; pub mod error;
//pub mod mime;
pub mod headers;
pub mod text; pub mod text;
pub mod header;
pub mod rfc5322; pub mod rfc5322;
//pub mod mime;

23
src/mime/field.rs Normal file
View file

@ -0,0 +1,23 @@
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(MessageId<'a>),
ContentDescription(Unstructured),
}
fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> {
use MIMEField::*;
field_name(input).map(|(rest, name)| {
(
"",
match name.to_lowercase().as_ref() {
"content-type" => ContentType(Type(rest)),
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
"content-id" => ContentID(Identifier(rest)),
"content-description" => ContentDescription(Unstructured(rest)),
_ => Optional(name, Unstructured(rest)),
}
)
})
}

1
src/mime/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod field;

View file

@ -1,103 +0,0 @@
use crate::error::IMFError;
use crate::fragments::lazy::{Field as Lazy, MIMEField as LazyMIME};
use crate::fragments::mime::{Mechanism, Type, Version};
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList};
use crate::fragments::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset};
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
// 3.6.1. The Origination Date Field
Date(DateTime<FixedOffset>),
// 3.6.2. Originator Fields
From(MailboxList),
Sender(MailboxRef),
ReplyTo(AddressList),
// 3.6.3. Destination Address Fields
To(AddressList),
Cc(AddressList),
Bcc(AddressList),
// 3.6.4. Identification Fields
MessageID(MessageId<'a>),
InReplyTo(MessageIdList<'a>),
References(MessageIdList<'a>),
// 3.6.5. Informational Fields
Subject(Unstructured),
Comments(Unstructured),
Keywords(PhraseList),
// 3.6.6 Resent Fields (not implemented)
// 3.6.7 Trace Fields
Received(ReceivedLog<'a>),
ReturnPath(MailboxRef),
// MIME RFC2045
MIMEVersion(Version),
MIME(MIMEField<'a>),
// 3.6.8. Optional Fields
Optional(&'a str, Unstructured),
// None
Rescue(&'a str),
}
#[derive(Debug, PartialEq)]
pub enum MIMEField<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(MessageId<'a>),
ContentDescription(Unstructured),
Optional(&'a str, Unstructured),
Rescue(&'a str),
}
impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
type Error = IMFError<'a>;
fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> {
use Field::*;
match l {
Lazy::Date(v) => v.try_into().map(|v| Date(v)),
Lazy::From(v) => v.try_into().map(|v| From(v)),
Lazy::Sender(v) => v.try_into().map(|v| Sender(v)),
Lazy::ReplyTo(v) => v.try_into().map(|v| ReplyTo(v)),
Lazy::To(v) => v.try_into().map(|v| To(v)),
Lazy::Cc(v) => v.try_into().map(|v| Cc(v)),
Lazy::Bcc(v) => v.try_into().map(|v| Bcc(v)),
Lazy::MessageID(v) => v.try_into().map(|v| MessageID(v)),
Lazy::InReplyTo(v) => v.try_into().map(|v| InReplyTo(v)),
Lazy::References(v) => v.try_into().map(|v| References(v)),
Lazy::Subject(v) => v.try_into().map(|v| Subject(v)),
Lazy::Comments(v) => v.try_into().map(|v| Comments(v)),
Lazy::Keywords(v) => v.try_into().map(|v| Keywords(v)),
Lazy::Received(v) => v.try_into().map(|v| Received(v)),
Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)),
Lazy::MIMEVersion(v) => v.try_into().map(|v| MIMEVersion(v)),
Lazy::MIME(v) => v.try_into().map(|v| MIME(v)),
Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
Lazy::Rescue(v) => Ok(Rescue(v)),
}
}
}
impl<'a> TryFrom<&'a LazyMIME<'a>> for MIMEField<'a> {
type Error = IMFError<'a>;
fn try_from(l: &'a LazyMIME<'a>) -> Result<Self, Self::Error> {
use MIMEField::*;
match l {
LazyMIME::ContentType(v) => v.try_into().map(|v| ContentType(v)),
LazyMIME::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
LazyMIME::ContentID(v) => v.try_into().map(|v| ContentID(v)),
LazyMIME::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
LazyMIME::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
LazyMIME::Rescue(v) => Ok(Rescue(v)),
}
}
}

View file

@ -17,6 +17,7 @@ use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list}
use crate::rfc5322::trace::{ReceivedLog, return_path, received_log}; use crate::rfc5322::trace::{ReceivedLog, return_path, received_log};
use crate::rfc5322::mime::{Version, version}; use crate::rfc5322::mime::{Version, version};
use crate::rfc5322::message::Message; use crate::rfc5322::message::Message;
use crate::header::*;
use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list}; use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -61,24 +62,6 @@ impl<'a> FieldList<'a> {
} }
} }
#[derive(Debug, PartialEq)]
pub enum CompField<'a> {
Known(Field<'a>),
Unknown(&'a [u8], Unstructured<'a>),
Bad(&'a [u8]),
}
#[derive(Debug, PartialEq)]
pub struct CompFieldList<'a>(pub Vec<CompField<'a>>);
impl<'a> CompFieldList<'a> {
pub fn message(self) -> Message<'a> {
Message::from_iter(self.0.into_iter().map(|v| match v {
CompField::Known(f) => Some(f),
_ => None,
}).flatten())
}
}
pub fn field(input: &[u8]) -> IResult<&[u8], Field> { pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
terminated(alt(( terminated(alt((
preceded(field_name(b"date"), map(date, Field::Date)), preceded(field_name(b"date"), map(date, Field::Date)),
@ -106,36 +89,7 @@ pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
)), obs_crlf)(input) )), obs_crlf)(input)
} }
pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList<Field>> {
fn field_name<'a>(name: &'static [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input| {
terminated(
tag_no_case(name),
tuple((space0, tag(b":"), space0)),
)(input)
}
}
/// Optional field
///
/// ```abnf
/// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext
/// ftext = %d33-57 / ; Printable US-ASCII
/// %d59-126 ; characters not including
/// ; ":".
/// ```
fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> {
pair(
terminated(
take_while1(|c| c >= 0x21 && c <= 0x7E && c != 0x3A),
tuple((space0, tag(b":"), space0)),
),
unstructured,
)(input)
}
pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList> {
map(terminated(many0(alt(( map(terminated(many0(alt((
map(field, CompField::Known), map(field, CompField::Known),
map(opt_field, |(k,v)| CompField::Unknown(k,v)), map(opt_field, |(k,v)| CompField::Unknown(k,v)),
@ -163,7 +117,7 @@ This is the plain text body of the message. Note the blank line
between the header information and the body of the message."; between the header information and the body of the message.";
assert_eq!( assert_eq!(
map(header, |v| v.message())(fullmail), map(header, |v| FieldList(v.known()).message())(fullmail),
Ok(( Ok((
&b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..], &b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..],
Message { Message {

View file

@ -1,190 +0,0 @@
use std::convert::From;
use nom::{
bytes::complete::{tag, take_while1},
character::complete::space0,
sequence::{terminated, tuple},
IResult,
};
#[derive(Debug, PartialEq)]
pub struct DateTime<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct MailboxList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Mailbox<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct AddressList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct NullableAddressList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Identifier<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct IdentifierList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Unstructured<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct PhraseList<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct ReceivedLog<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Path<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Version<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Type<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub struct Mechanism<'a>(pub &'a str);
#[derive(Debug, PartialEq)]
pub enum Field<'a> {
// 3.6.1. The Origination Date Field
Date(DateTime<'a>),
// 3.6.2. Originator Fields
From(MailboxList<'a>),
Sender(Mailbox<'a>),
ReplyTo(AddressList<'a>),
// 3.6.3. Destination Address Fields
To(AddressList<'a>),
Cc(AddressList<'a>),
Bcc(NullableAddressList<'a>),
// 3.6.4. Identification Fields
MessageID(Identifier<'a>),
InReplyTo(IdentifierList<'a>),
References(IdentifierList<'a>),
// 3.6.5. Informational Fields
Subject(Unstructured<'a>),
Comments(Unstructured<'a>),
Keywords(PhraseList<'a>),
// 3.6.6 Resent Fields (not implemented)
// 3.6.7 Trace Fields
Received(ReceivedLog<'a>),
ReturnPath(Mailbox<'a>),
// MIME RFC 2045
MIMEVersion(Version<'a>),
MIME(MIMEField<'a>),
// 3.6.8. Optional Fields
Optional(&'a str, Unstructured<'a>),
// None
Rescue(&'a str),
}
impl<'a> From<&'a str> for Field<'a> {
fn from(input: &'a str) -> Self {
match correct_field(input) {
Ok((_, field)) => field,
Err(_) => Field::Rescue(input),
}
}
}
#[derive(Debug, PartialEq)]
pub enum MIMEField<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(Identifier<'a>),
ContentDescription(Unstructured<'a>),
Optional(&'a str, Unstructured<'a>),
Rescue(&'a str),
}
impl<'a> From<&'a str> for MIMEField<'a> {
fn from(input: &'a str) -> Self {
match correct_mime_field(input) {
Ok((_, field)) => field,
Err(_) => MIMEField::Rescue(input),
}
}
}
/// Optional field
///
/// ```abnf
/// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext
/// ftext = %d33-57 / ; Printable US-ASCII
/// %d59-126 ; characters not including
/// ; ":".
/// ```
fn field_name(input: &str) -> IResult<&str, &str> {
terminated(
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
tuple((space0, tag(":"), space0)),
)(input)
}
fn correct_field(input: &str) -> IResult<&str, Field> {
use Field::*;
field_name(input).map(|(rest, name)| {
(
"",
match name.to_lowercase().as_ref() {
"date" => Date(DateTime(rest)),
"from" => From(MailboxList(rest)),
"sender" => Sender(Mailbox(rest)),
"reply-to" => ReplyTo(AddressList(rest)),
"to" => To(AddressList(rest)),
"cc" => Cc(AddressList(rest)),
"bcc" => Bcc(NullableAddressList(rest)),
"message-id" => MessageID(Identifier(rest)),
"in-reply-to" => InReplyTo(IdentifierList(rest)),
"references" => References(IdentifierList(rest)),
"subject" => Subject(Unstructured(rest)),
"comments" => Comments(Unstructured(rest)),
"keywords" => Keywords(PhraseList(rest)),
"return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)),
"content-type" => MIME(MIMEField::ContentType(Type(rest))),
"content-transfer-encoding" => MIME(MIMEField::ContentTransferEncoding(Mechanism(rest))),
"content-id" => MIME(MIMEField::ContentID(Identifier(rest))),
"content-description" => MIME(MIMEField::ContentDescription(Unstructured(rest))),
"mime-version" => MIMEVersion(Version(rest)),
_ => Optional(name, Unstructured(rest)),
},
)
})
}
fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> {
use MIMEField::*;
field_name(input).map(|(rest, name)| {
(
"",
match name.to_lowercase().as_ref() {
"content-type" => ContentType(Type(rest)),
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
"content-id" => ContentID(Identifier(rest)),
"content-description" => ContentDescription(Unstructured(rest)),
_ => Optional(name, Unstructured(rest)),
}
)
})
}