parse rfc5322 headers!

This commit is contained in:
Quentin 2023-07-20 15:51:13 +02:00
parent 46dacb62c3
commit e2d9d03ef2
Signed by: quentin
GPG key ID: E9602264D639FF68
2 changed files with 112 additions and 23 deletions

View file

@ -5,15 +5,18 @@ use nom::{
bytes::complete::{tag, tag_no_case, take_while1}, bytes::complete::{tag, tag_no_case, take_while1},
character::complete::space0, character::complete::space0,
combinator::map, combinator::map,
multi::many0,
sequence::{pair, preceded, terminated, tuple}, sequence::{pair, preceded, terminated, tuple},
}; };
use crate::text::whitespace::{obs_crlf, foldable_line};
use crate::rfc5322::address::{AddressList, address_list, nullable_address_list, mailbox_list}; use crate::rfc5322::address::{AddressList, address_list, nullable_address_list, mailbox_list};
use crate::rfc5322::datetime::section as date; use crate::rfc5322::datetime::section as date;
use crate::rfc5322::mailbox::{MailboxRef, MailboxList, AddrSpec, mailbox}; use crate::rfc5322::mailbox::{MailboxRef, MailboxList, AddrSpec, mailbox};
use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list}; use crate::rfc5322::identification::{MessageID, MessageIDList, msg_id, msg_list};
use crate::rfc5322::trace::{ReceivedLog, return_path, received_log}; use crate::rfc5322::trace::{ReceivedLog, return_path, received_log};
use crate::rfc5322::mime::{Version, version}; use crate::rfc5322::mime::{Version, version};
use crate::rfc5322::message::Message;
use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list}; use crate::text::misc_token::{Unstructured, PhraseList, unstructured, phrase_list};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -49,8 +52,35 @@ pub enum Field<'a> {
MIMEVersion(Version), MIMEVersion(Version),
} }
#[derive(Debug, PartialEq)]
pub struct FieldList<'a>(pub Vec<Field<'a>>);
impl<'a> FieldList<'a> {
pub fn message(self) -> Message<'a> {
Message::from_iter(self.0)
}
}
#[derive(Debug, PartialEq)]
pub enum CompField<'a> {
Known(Field<'a>),
Unknown(&'a [u8], Unstructured<'a>),
Bad(&'a [u8]),
}
#[derive(Debug, PartialEq)]
pub struct CompFieldList<'a>(pub Vec<CompField<'a>>);
impl<'a> CompFieldList<'a> {
pub fn message(self) -> Message<'a> {
Message::from_iter(self.0.into_iter().map(|v| match v {
CompField::Known(f) => Some(f),
_ => None,
}).flatten())
}
}
pub fn field(input: &[u8]) -> IResult<&[u8], Field> { pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
alt(( terminated(alt((
preceded(field_name(b"date"), map(date, Field::Date)), preceded(field_name(b"date"), map(date, Field::Date)),
preceded(field_name(b"from"), map(mailbox_list, Field::From)), preceded(field_name(b"from"), map(mailbox_list, Field::From)),
@ -73,7 +103,7 @@ pub fn field(input: &[u8]) -> IResult<&[u8], Field> {
preceded(field_name(b"received"), map(received_log, Field::Received)), preceded(field_name(b"received"), map(received_log, Field::Received)),
preceded(field_name(b"mime-version"), map(version, Field::MIMEVersion)), preceded(field_name(b"mime-version"), map(version, Field::MIMEVersion)),
))(input) )), obs_crlf)(input)
} }
@ -105,4 +135,63 @@ fn opt_field(input: &[u8]) -> IResult<&[u8], (&[u8], Unstructured)> {
)(input) )(input)
} }
// @TODO write a parse header function pub fn header(input: &[u8]) -> IResult<&[u8], CompFieldList> {
map(terminated(many0(alt((
map(field, CompField::Known),
map(opt_field, |(k,v)| CompField::Unknown(k,v)),
map(foldable_line, CompField::Bad),
))), obs_crlf), CompFieldList)(input)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{DateTime, FixedOffset, TimeZone};
use crate::rfc5322::mailbox::*;
use crate::rfc5322::address::*;
use crate::text::misc_token::*;
#[test]
fn test_header() {
let fullmail = b"Date: 7 Mar 2023 08:00:00 +0200
From: someone@example.com
To: someone_else@example.com
Subject: An RFC 822 formatted message
This is the plain text body of the message. Note the blank line
between the header information and the body of the message.";
assert_eq!(
map(header, |v| v.message())(fullmail),
Ok((
&b"This is the plain text body of the message. Note the blank line\nbetween the header information and the body of the message."[..],
Message {
date: Some(FixedOffset::east_opt(2 * 3600).unwrap().with_ymd_and_hms(2023, 3, 7, 8, 0, 0).unwrap()),
from: vec![MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
}],
to: vec![AddressRef::Single(MailboxRef {
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"someone_else"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
})],
subject: Some(Unstructured(vec![
UnstrToken::Plain(&b"An"[..]),
UnstrToken::Plain(&b"RFC"[..]),
UnstrToken::Plain(&b"822"[..]),
UnstrToken::Plain(&b"formatted"[..]),
UnstrToken::Plain(&b"message"[..]),
])),
..Message::default()
}
)),
)
}
}

View file

@ -2,7 +2,7 @@ use crate::text::misc_token::{PhraseList, Unstructured};
use crate::rfc5322::mime::Version; use crate::rfc5322::mime::Version;
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef}; use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
use crate::rfc5322::address::{AddressRef}; use crate::rfc5322::address::{AddressRef};
use crate::rfc5322::identification::{MessageID, MessageIDList}; use crate::rfc5322::identification::{MessageID};
use crate::rfc5322::field::Field; use crate::rfc5322::field::Field;
use crate::rfc5322::trace::ReceivedLog; use crate::rfc5322::trace::ReceivedLog;
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
@ -13,43 +13,43 @@ pub struct Message<'a> {
pub date: Option<DateTime<FixedOffset>>, pub date: Option<DateTime<FixedOffset>>,
// 3.6.2. Originator Fields // 3.6.2. Originator Fields
pub from: Vec<&'a MailboxRef<'a>>, pub from: Vec<MailboxRef<'a>>,
pub sender: Option<&'a MailboxRef<'a>>, pub sender: Option<MailboxRef<'a>>,
pub reply_to: Vec<&'a AddressRef<'a>>, pub reply_to: Vec<AddressRef<'a>>,
// 3.6.3. Destination Address Fields // 3.6.3. Destination Address Fields
pub to: Vec<&'a AddressRef<'a>>, pub to: Vec<AddressRef<'a>>,
pub cc: Vec<&'a AddressRef<'a>>, pub cc: Vec<AddressRef<'a>>,
pub bcc: Vec<&'a AddressRef<'a>>, pub bcc: Vec<AddressRef<'a>>,
// 3.6.4. Identification Fields // 3.6.4. Identification Fields
pub msg_id: Option<&'a MessageID<'a>>, pub msg_id: Option<MessageID<'a>>,
pub in_reply_to: Vec<&'a MessageID<'a>>, pub in_reply_to: Vec<MessageID<'a>>,
pub references: Vec<&'a MessageID<'a>>, pub references: Vec<MessageID<'a>>,
// 3.6.5. Informational Fields // 3.6.5. Informational Fields
pub subject: Option<&'a Unstructured<'a>>, pub subject: Option<Unstructured<'a>>,
pub comments: Vec<&'a Unstructured<'a>>, pub comments: Vec<Unstructured<'a>>,
pub keywords: Vec<&'a PhraseList<'a>>, pub keywords: Vec<PhraseList<'a>>,
// 3.6.6 Not implemented // 3.6.6 Not implemented
// 3.6.7 Trace Fields // 3.6.7 Trace Fields
pub return_path: Vec<&'a AddrSpec<'a>>, pub return_path: Vec<AddrSpec<'a>>,
pub received: Vec<&'a ReceivedLog<'a>>, pub received: Vec<ReceivedLog<'a>>,
// MIME // MIME
pub mime_version: Option<&'a Version>, pub mime_version: Option<Version>,
} }
//@FIXME min and max limits are not enforced, //@FIXME min and max limits are not enforced,
// it may result in missing data or silently overriden data. // it may result in missing data or silently overriden data.
impl<'a> FromIterator<&'a Field<'a>> for Message<'a> { impl<'a> FromIterator<Field<'a>> for Message<'a> {
fn from_iter<I: IntoIterator<Item = &'a Field<'a>>>(iter: I) -> Self { fn from_iter<I: IntoIterator<Item = Field<'a>>>(iter: I) -> Self {
iter.into_iter().fold( iter.into_iter().fold(
Message::default(), Message::default(),
|mut section, field| { |mut section, field| {
match field { match field {
Field::Date(v) => section.date = *v, Field::Date(v) => section.date = v,
Field::From(v) => section.from.extend(v), Field::From(v) => section.from.extend(v),
Field::Sender(v) => section.sender = Some(v), Field::Sender(v) => section.sender = Some(v),
Field::ReplyTo(v) => section.reply_to.extend(v), Field::ReplyTo(v) => section.reply_to.extend(v),
@ -62,7 +62,7 @@ impl<'a> FromIterator<&'a Field<'a>> for Message<'a> {
Field::Subject(v) => section.subject = Some(v), Field::Subject(v) => section.subject = Some(v),
Field::Comments(v) => section.comments.push(v), Field::Comments(v) => section.comments.push(v),
Field::Keywords(v) => section.keywords.push(v), Field::Keywords(v) => section.keywords.push(v),
Field::ReturnPath(v) => v.as_ref().map(|x| section.return_path.push(x)).unwrap_or(()), Field::ReturnPath(v) => v.map(|x| section.return_path.push(x)).unwrap_or(()),
Field::Received(v) => section.received.push(v), Field::Received(v) => section.received.push(v),
Field::MIMEVersion(v) => section.mime_version = Some(v), Field::MIMEVersion(v) => section.mime_version = Some(v),
}; };