refactor common fields

This commit is contained in:
Quentin 2023-06-13 19:35:41 +02:00
parent 09a6c6acdf
commit f22da50c9b
Signed by: quentin
GPG key ID: E9602264D639FF68
5 changed files with 142 additions and 157 deletions

View file

@ -7,13 +7,13 @@ use nom::{
character::complete::space0, character::complete::space0,
combinator::opt, combinator::opt,
multi::{many0, many1, fold_many0, separated_list1}, multi::{many0, many1, fold_many0, separated_list1},
sequence::tuple, sequence::{terminated, preceded, pair, tuple},
}; };
use crate::whitespace::{fws, perm_crlf}; use crate::whitespace::{fws, perm_crlf};
use crate::words::vchar_seq; use crate::words::vchar_seq;
use crate::misc_token::{phrase, unstructured}; use crate::misc_token::{phrase, unstructured};
use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef, AddressRef}; use crate::model::{CommonFields, HeaderDate, MailboxRef, AddressRef};
use crate::mailbox::mailbox; use crate::mailbox::mailbox;
use crate::address::{mailbox_list, address_list, address_list_cfws}; use crate::address::{mailbox_list, address_list, address_list_cfws};
use crate::identification::msg_id; use crate::identification::msg_id;
@ -24,10 +24,10 @@ use crate::model;
/// Header section /// Header section
/// ///
/// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2 /// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2
pub fn section(input: &str) -> IResult<&str, PermissiveHeaderSection> { pub fn section(input: &str) -> IResult<&str, CommonFields> {
let (input, headers) = fold_many0( let (input, headers) = fold_many0(
header_field, alt((header_field, unknown_field)),
PermissiveHeaderSection::default, CommonFields::default,
|mut section, head| { |mut section, head| {
match head { match head {
//@FIXME min and max limits are not enforced, //@FIXME min and max limits are not enforced,
@ -99,7 +99,6 @@ pub fn section(input: &str) -> IResult<&str, PermissiveHeaderSection> {
HeaderField::Optional(name, body) => { HeaderField::Optional(name, body) => {
section.optional.insert(name, body); section.optional.insert(name, body);
} }
_ => unimplemented!(),
}; };
section section
} }
@ -110,7 +109,7 @@ pub fn section(input: &str) -> IResult<&str, PermissiveHeaderSection> {
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
enum HeaderField<'a> { pub enum HeaderField<'a> {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
Date(HeaderDate), Date(HeaderDate),
@ -138,11 +137,107 @@ enum HeaderField<'a> {
Optional(&'a str, String) Optional(&'a str, String)
} }
/// Parse one header field pub fn field_name(input: &str) -> IResult<&str, &str> {
terminated(
take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A'),
pair(tag(":"), space0)
)(input)
}
/// Parse one known header field
/// ///
/// RFC5322 optional-field seems to be a generalization of the field terminology. /// RFC5322 optional-field seems to be a generalization of the field terminology.
/// We use it to parse all header names: /// We use it to parse all header names:
/// pub fn header_field(input: &str) -> IResult<&str, HeaderField> {
terminated(
alt((
// 3.6.1. The Origination Date Field
date,
// 3.6.2. Originator Fields
alt((from, sender, reply_to)),
// 3.6.3. Destination Address Fields
alt((to, cc, bcc)),
// 3.6.4. Identification Fields
alt((msg_id_field, in_reply_to, references)),
// 3.6.5. Informational Fields
alt((subject, comments, keywords)),
)),
perm_crlf,
)(input)
}
// 3.6.1. The Origination Date Field
fn date(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Date:"), space0), datetime)(input)?;
Ok((input, HeaderField::Date(body)))
}
// 3.6.2. Originator Fields
fn from(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("From:"), space0), mailbox_list)(input)?;
Ok((input, HeaderField::From(body)))
}
fn sender(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Sender:"), space0), mailbox)(input)?;
Ok((input, HeaderField::Sender(body)))
}
fn reply_to(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Reply-To:"), space0), address_list)(input)?;
Ok((input, HeaderField::ReplyTo(body)))
}
// 3.6.3. Destination Address Fields
fn to(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("To:"), space0), address_list)(input)?;
Ok((input, HeaderField::To(body)))
}
fn cc(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Cc:"), space0), address_list)(input)?;
Ok((input, HeaderField::Cc(body)))
}
fn bcc(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(
pair(tag("Bcc:"), space0),
opt(alt((address_list, address_list_cfws))),
)(input)?;
Ok((input, HeaderField::Bcc(body.unwrap_or(vec![]))))
}
// 3.6.4. Identification Fields
fn msg_id_field(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Message-ID:"), space0), msg_id)(input)?;
Ok((input, HeaderField::MessageID(body)))
}
fn in_reply_to(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("In-Reply-To:"), space0), many1(msg_id))(input)?;
Ok((input, HeaderField::InReplyTo(body)))
}
fn references(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("References:"), space0), many1(msg_id))(input)?;
Ok((input, HeaderField::References(body)))
}
// 3.6.5. Informational Fields
fn subject(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Subject:"), space0), unstructured)(input)?;
Ok((input, HeaderField::Subject(body)))
}
fn comments(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(pair(tag("Comments:"), space0), unstructured)(input)?;
Ok((input, HeaderField::Comments(body)))
}
fn keywords(input: &str) -> IResult<&str, HeaderField> {
let (input, body) = preceded(
pair(tag("Keywords:"), space0),
separated_list1(tag(","), phrase),
)(input)?;
Ok((input, HeaderField::Keywords(body)))
}
/// Optional field
///
/// ```abnf /// ```abnf
/// field = field-name ":" unstructured CRLF /// field = field-name ":" unstructured CRLF
/// field-name = 1*ftext /// field-name = 1*ftext
@ -150,111 +245,12 @@ enum HeaderField<'a> {
/// %d59-126 ; characters not including /// %d59-126 ; characters not including
/// ; ":". /// ; ":".
/// ``` /// ```
fn header_field(input: &str) -> IResult<&str, HeaderField> { fn unknown_field(input: &str) -> IResult<&str, HeaderField> {
// Extract field name // Extract field name
let (input, field_name) = take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A')(input)?; let (input, field_name) = take_while1(|c| c >= '\x21' && c <= '\x7E' && c != '\x3A')(input)?;
let (input, _) = tuple((tag(":"), space0))(input)?; let (input, _) = tuple((tag(":"), space0))(input)?;
let (input, body) = unstructured(input)?;
// Extract field body Ok((input, HeaderField::Optional(field_name, body)))
let (input, hfield) = match field_name {
// 3.6.1. The Origination Date Field
"Date" => {
let (input, body) = datetime(input)?;
Ok((input, HeaderField::Date(body)))
}
// 3.6.2. Originator Fields
"From" => {
let (input, body) = mailbox_list(input)?;
(input, HeaderField::From(body))
},
"Sender" => {
let (input, body) = mailbox(input)?;
(input, HeaderField::Sender(body))
},
"Reply-To" => {
let (input, body) = address_list(input)?;
(input, HeaderField::ReplyTo(body))
}
// 3.6.3. Destination Address Fields
"To" => {
let (input, body) = address_list(input)?;
(input, HeaderField::To(body))
},
"Cc" => {
let (input, body) = address_list(input)?;
(input, HeaderField::Cc(body))
},
"Bcc" => {
let (input, body) = opt(alt((address_list, address_list_cfws)))(input)?;
(input, HeaderField::Bcc(body.unwrap_or(vec![])))
},
// 3.6.4. Identification Fields
"Message-ID" => {
let (input, body) = msg_id(input)?;
(input, HeaderField::MessageID(body))
},
"In-Reply-To" => {
let (input, body) = many1(msg_id)(input)?;
(input, HeaderField::InReplyTo(body))
},
"References" => {
let (input, body) = many1(msg_id)(input)?;
(input, HeaderField::References(body))
},
// 3.6.5. Informational Fields
"Subject" => {
let (input, body) = unstructured(input)?;
(input, HeaderField::Subject(body))
},
"Comments" => {
let (input, body) = unstructured(input)?;
(input, HeaderField::Comments(body))
}
"Keywords" => {
let (input, body) = separated_list1(tag(","), phrase)(input)?;
(input, HeaderField::Keywords(body))
}
// 3.6.6. Resent Fields
"Resent-Date" => {
let (input, body) = datetime(input)?;
Ok((input, HeaderField::ResentDate(body)))
}
"Resent-From" => {
unimplemented!();
}
"Resent-Sender" => {
unimplemented!();
}
"Resent-To" => {
unimplemented!();
}
"Resent-Cc" => {
unimplemented!();
}
"Resent-Bcc" => {
unimplemented!();
}
"Resent-Message-ID" => {
unimplemented!();
}
// 3.6.7. Trace Fields
// 3.6.8. Optional Fields
_ => {
let (input, body) = unstructured(input)?;
(input, HeaderField::Optional(field_name, body))
}
};
// Drop EOL
let (input, _) = perm_crlf(input)?;
return Ok((input, hfield));
} }
fn datetime(input: &str) -> IResult<&str, HeaderDate> { fn datetime(input: &str) -> IResult<&str, HeaderDate> {
@ -262,8 +258,8 @@ fn datetime(input: &str) -> IResult<&str, HeaderDate> {
// to better handle obsolete/bad cases instead of returning raw text. // to better handle obsolete/bad cases instead of returning raw text.
let (input, raw_date) = unstructured(input)?; let (input, raw_date) = unstructured(input)?;
match DateTime::parse_from_rfc2822(&raw_date) { match DateTime::parse_from_rfc2822(&raw_date) {
Ok(chronodt) => HeaderDate::Parsed(chronodt), Ok(chronodt) => Ok((input, HeaderDate::Parsed(chronodt))),
Err(e) => HeaderDate::Unknown(raw_date, e), Err(e) => Ok((input, HeaderDate::Unknown(raw_date, e))),
} }
} }
@ -275,8 +271,8 @@ mod tests {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
#[test] #[test]
fn test_datetime() { fn test_datetime() {
let datefield = "Thu,\r\n 13\r\n Feb\r\n 1969\r\n 23:32\r\n -0330 (Newfoundland Time)"; let datefield = "Date: Thu,\r\n 13\r\n Feb\r\n 1969\r\n 23:32\r\n -0330 (Newfoundland Time)\r\n";
let (input, v) = datetime(datefield).unwrap(); let (input, v) = header_field(datefield).unwrap();
assert_eq!(input, ""); assert_eq!(input, "");
match v { match v {
HeaderField::Date(HeaderDate::Parsed(_)) => (), HeaderField::Date(HeaderDate::Parsed(_)) => (),
@ -402,24 +398,33 @@ mod tests {
} }
// 3.6.5. Informational Fields // 3.6.5. Informational Fields
#[test]
fn test_subject() { fn test_subject() {
assert_eq!( assert_eq!(
header_field("Subject: Aérogramme\r\n"), header_field("Subject: Aérogramme\r\n"),
Ok(("", HeaderField::Subject("Aérogramme".into()))) Ok(("", HeaderField::Subject("Aérogramme".into())))
); );
} }
#[test]
fn test_comments() { fn test_comments() {
assert_eq!( assert_eq!(
header_field("Comments: 😛 easter egg!\r\n"), header_field("Comments: 😛 easter egg!\r\n"),
Ok(("", HeaderField::Comments("😛 easter egg!".into()))) Ok(("", HeaderField::Comments("😛 easter egg!".into())))
); );
} }
#[test]
fn test_keywords() { fn test_keywords() {
assert_eq!( assert_eq!(
header_field("Keywords: fantasque, farfelu, fanfreluche\r\n"), header_field("Keywords: fantasque, farfelu, fanfreluche\r\n"),
Ok(("", HeaderField::Keywords(vec!["fantasque".into(), "farfelu".into(), "fanfreluche".into()]))) Ok(("", HeaderField::Keywords(vec!["fantasque".into(), "farfelu".into(), "fanfreluche".into()])))
); );
} }
// Test invalid field name
#[test]
fn test_invalid_field_name() {
assert!(header_field("Unknown: unknown\r\n").is_err());
}
} }

View file

@ -1,16 +1,16 @@
use nom::{ use nom::{
IResult, IResult,
multi::many0, multi::many0,
} };
use crate::{common_fields, trace, whitespace}; use crate::{common_fields, trace, whitespace, model};
pub fn section(input: &str) -> IResult(&str, HeaderSection) { pub fn section(input: &str) -> IResult<&str, model::HeaderSection> {
let (input, traces) = many0(trace::section)(input)?; let (input, traces) = many0(trace::section)(input)?;
let (input, common) = common_fields::section(input)?; let (input, common) = common_fields::section(input)?;
let (input, _) = whitespace::perm_crlf(input)?; let (input, _) = whitespace::perm_crlf(input)?;
Ok((input, HeaderSection { traces, common })) Ok((input, model::HeaderSection { traces, common }))
} }

View file

@ -64,7 +64,7 @@ pub struct MessageId<'a> {
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Trace { pub struct Trace<'a> {
// 3.6.7 Traces // 3.6.7 Traces
pub received: Vec<String>, pub received: Vec<String>,
pub return_path: Option<String>, pub return_path: Option<String>,
@ -123,31 +123,8 @@ pub struct CommonFields<'a> {
pub struct HeaderSection<'a> { pub struct HeaderSection<'a> {
// 3.6.7 Traces // 3.6.7 Traces
pub traces: Vec<Trace>, pub traces: Vec<Trace<'a>>,
// 3.6.x // 3.6.x
pub common: CommonFields, pub common: CommonFields<'a>,
}
enum InvalidEmailErr {
NoUsableDate,
}
impl<'a> PermissiveHeaderSection<'a> {
/// Check validity of the email
///
/// Especially check that there is no missing fields,
/// or no unique fields declared multiple times.
///
/// See: https://www.rfc-editor.org/rfc/rfc5322#section-3.6
//@FIXME could be changed to a to_StrictHeaderSection call. All fixed errors would be returned in
// a vec of errors.
fn is_valid(&self) -> Result<(), InvalidEmailErr> {
match self.date {
HeaderDate::Parsed(_) => (),
_ => return Err(InvalidEmailErr::NoUsableDate),
};
Ok(())
}
} }

View file

@ -1,4 +1,4 @@
use imf_codec::common_headers; use imf_codec::common_fields;
fn main() { fn main() {
let header = r#"Date: Fri, 21 Nov 1997 10:01:10 -0600 let header = r#"Date: Fri, 21 Nov 1997 10:01:10 -0600
@ -21,5 +21,5 @@ References: <1234@local.machine.example>
This is a reply to your hello. This is a reply to your hello.
"#; "#;
println!("{:?}", common_headers::section(header)); println!("{:?}", common_fields::section(header));
} }

View file

@ -1,24 +1,24 @@
use nom::{ use nom::{
IResult, IResult,
} };
use crate::model; use crate::model;
enum RestField<'a> { enum RestField<'a> {
// 3.6.6. Resent Fields // 3.6.6. Resent Fields
ResentDate(HeaderDate), ResentDate(model::HeaderDate),
ResentFrom(Vec<MailboxRef>), ResentFrom(Vec<model::MailboxRef>),
ResentSender(MailboxRef), ResentSender(model::MailboxRef),
ResentTo(Vec<AddressRef>), ResentTo(Vec<model::AddressRef>),
ResentCc(Vec<AddressRef>), ResentCc(Vec<model::AddressRef>),
ResentBcc(Vec<AddressRef>), ResentBcc(Vec<model::AddressRef>),
ResentMessageID(model::MessageId<'a>), ResentMessageID(model::MessageId<'a>),
// 3.6.8. Optional fields // 3.6.8. Optional fields
Optional(&'a str, String), Optional(&'a str, String),
} }
enum PreludeField<'a> { enum PreludeField {
// 3.6.7. Trace Fields // 3.6.7. Trace Fields
ReturnPath(String), ReturnPath(String),
Received(Vec<String>), Received(Vec<String>),
@ -41,15 +41,16 @@ enum PreludeField<'a> {
/// ``` /// ```
pub fn section(input: &str) -> IResult<&str, model::Trace> { pub fn section(input: &str) -> IResult<&str, model::Trace> {
let (input, mut prelude_trace) = prelude(input)?; let (input, mut prelude_trace) = prelude(input)?;
let (input, full_trace) = fold_many0( /*let (input, full_trace) = fold_many0(
rest_field, rest_field,
prelude_trace, prelude_trace,
|mut trace, field| { |mut trace, field| {
match field { match field {
} }
} }*/
unimplemented!();
} }
/// Trace prelude /// Trace prelude
@ -63,8 +64,10 @@ pub fn section(input: &str) -> IResult<&str, model::Trace> {
/// received-token = word / angle-addr / addr-spec / domain /// received-token = word / angle-addr / addr-spec / domain
/// ``` /// ```
fn prelude(input: &str) -> IResult<&str, model::Trace> { fn prelude(input: &str) -> IResult<&str, model::Trace> {
unimplemented!();
} }
fn rest_field(input: &str) -> IResult<&str, RestField> { fn rest_field(input: &str) -> IResult<&str, RestField> {
unimplemented!();
// Ensure this is not a new prelude // Ensure this is not a new prelude
} }