refactor, mod rfc5322 to imf

This commit is contained in:
Quentin 2023-07-24 11:02:49 +02:00
parent 28873ce073
commit 63892af012
Signed by: quentin
GPG key ID: E9602264D639FF68
16 changed files with 46 additions and 44 deletions

View file

@ -3,6 +3,24 @@
**⚠️ Work in progress, do not use in production**
**⚠️ This is currently only a decoder (parser), encoding is not yet implemented.**
## Example
```rust
let input = br#"
Date: 7 Mar 2023 08:00:00 +0200
From: deuxfleurs@example.com
To: someone_else@example.com
Subject: An RFC 822 formatted message
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
This is the plain text body of the message. Note the blank line
between the header information and the body of the message.#;
let email = eml_codec::email(input).unwrap();
println!("{} just sent you an email with subject \"{}\", email.1.
```
## About the name
This library does not aim at implementing a specific RFC, but to be a swiss-army knife to decode and encode ("codec") what is generaly considered an email (generally abbreviated "eml"), hence the name: **eml-codec**.
@ -18,8 +36,7 @@ This library does not aim at implementing a specific RFC, but to be a swiss-army
- Parsing optimization that would make more complicated to understand the logic.
- Optimization for a specific use case, to the detriment of other use cases.
- Pipelining/streaming/buffering as the parser can arbitrarily backtrack + our result contains reference to the whole buffer, imf-codec must keep the whole buffer in memory. Avoiding the sequential approach would certainly speed-up a little bit the parsing, but it's too much work to implement currently.
- Zerocopy. It might be implementable in the future, but to quickly bootstrap this project, I avoided it for now.
- Pipelining/streaming/buffering as the parser can arbitrarily backtrack + our result contains reference to the whole buffer, eml-codec must keep the whole buffer in memory. Avoiding the sequential approach would certainly speed-up a little bit the parsing, but it's too much work to implement currently.
## Missing / known bugs
@ -37,7 +54,7 @@ Speak about parser combinators.
## Testing strategy
imf-codec aims to be as much tested as possible against reald
eml-codec aims to be as much tested as possible against real word data.
### Unit testing: parser combinator independently (done)

View file

@ -1,8 +0,0 @@
Test enron files:
```
cd resources/enron
./restore.sh
cd -
(set -euo pipefail; find resources/enron/maildir/ -type f | while read f; do echo $f; ./target/debug/imf_parse < $f > /dev/null; done)
```

View file

@ -1,7 +0,0 @@
Date: 7 Mar 2023 08:00:00 +0200
From: someone@example.com
To: someone_else@example.com
Subject: An RFC 822 formatted message
This is the plain text body of the message. Note the blank line
between the header information and the body of the message.

View file

@ -8,7 +8,7 @@ use nom::{
};
//use crate::error::IMFError;
use crate::rfc5322::mailbox::{mailbox, MailboxRef};
use crate::imf::mailbox::{mailbox, MailboxRef};
use crate::text::misc_token::{phrase, Phrase};
use crate::text::whitespace::cfws;
@ -109,7 +109,7 @@ pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
#[cfg(test)]
mod tests {
use super::*;
use crate::rfc5322::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken};
use crate::imf::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken};
use crate::text::misc_token::{Phrase, Word};
#[test]

View file

@ -7,13 +7,13 @@ use nom::{
};
use crate::header::{field_name, header};
use crate::rfc5322::address::{address_list, mailbox_list, nullable_address_list, AddressList};
use crate::rfc5322::datetime::section as date;
use crate::rfc5322::identification::{msg_id, msg_list, MessageID, MessageIDList};
use crate::rfc5322::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
use crate::rfc5322::message::Message;
use crate::rfc5322::mime::{version, Version};
use crate::rfc5322::trace::{received_log, return_path, ReceivedLog};
use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList};
use crate::imf::datetime::section as date;
use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList};
use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
use crate::imf::message::Message;
use crate::imf::mime::{version, Version};
use crate::imf::trace::{received_log, return_path, ReceivedLog};
use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured};
use crate::text::whitespace::obs_crlf;
@ -95,8 +95,8 @@ pub fn message(input: &[u8]) -> IResult<&[u8], Message> {
#[cfg(test)]
mod tests {
use super::*;
use crate::rfc5322::address::*;
use crate::rfc5322::mailbox::*;
use crate::imf::address::*;
use crate::imf::mailbox::*;
use crate::text::misc_token::*;
use chrono::{FixedOffset, TimeZone};

View file

@ -8,7 +8,7 @@ use nom::{
IResult,
};
use crate::rfc5322::mailbox::is_dtext;
use crate::imf::mailbox::is_dtext;
use crate::text::whitespace::cfws;
use crate::text::words::dot_atom_text;

View file

@ -1,9 +1,9 @@
use crate::rfc5322::address::AddressRef;
use crate::rfc5322::field::Field;
use crate::rfc5322::identification::MessageID;
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
use crate::rfc5322::mime::Version;
use crate::rfc5322::trace::ReceivedLog;
use crate::imf::address::AddressRef;
use crate::imf::field::Field;
use crate::imf::identification::MessageID;
use crate::imf::mailbox::{AddrSpec, MailboxRef};
use crate::imf::mime::Version;
use crate::imf::trace::ReceivedLog;
use crate::text::misc_token::{PhraseList, Unstructured};
use chrono::{DateTime, FixedOffset};

View file

@ -8,7 +8,7 @@ use nom::{
IResult,
};
use crate::rfc5322::{datetime, mailbox};
use crate::imf::{datetime, mailbox};
use crate::text::{ascii, misc_token, whitespace};
#[derive(Debug, PartialEq)]
@ -75,7 +75,7 @@ fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> {
#[cfg(test)]
mod tests {
use super::*;
use crate::rfc5322::trace::misc_token::Word;
use crate::imf::trace::misc_token::Word;
use chrono::TimeZone;
#[test]

View file

@ -2,7 +2,7 @@ mod error;
mod header;
mod mime;
mod part;
mod rfc5322;
mod imf;
mod text;
pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> {
@ -11,8 +11,8 @@ pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> {
.map_err(error::EMLError::ParseError)
}
pub fn imf(input: &[u8]) -> Result<rfc5322::message::Message, error::EMLError> {
rfc5322::field::message(input)
pub fn imf(input: &[u8]) -> Result<imf::message::Message, error::EMLError> {
imf::field::message(input)
.map(|(_, v)| v)
.map_err(error::EMLError::ParseError)
}

View file

@ -9,7 +9,7 @@ use crate::header::{field_name, CompFieldList};
use crate::mime::mechanism::{mechanism, Mechanism};
use crate::mime::mime::AnyMIME;
use crate::mime::r#type::{naive_type, NaiveType};
use crate::rfc5322::identification::{msg_id, MessageID};
use crate::imf::identification::{msg_id, MessageID};
use crate::text::misc_token::{unstructured, Unstructured};
use crate::text::whitespace::obs_crlf;

View file

@ -1,7 +1,7 @@
use crate::mime::field::Content;
use crate::mime::mechanism::Mechanism;
use crate::mime::r#type::{self as ctype, AnyType};
use crate::rfc5322::identification::MessageID;
use crate::imf::identification::MessageID;
use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
#[derive(Debug, PartialEq, Clone)]

View file

@ -11,7 +11,7 @@ use nom::{
use crate::header::{header, CompFieldList};
use crate::mime;
use crate::mime::mime::AnyMIME;
use crate::rfc5322::{self as imf};
use crate::imf::{self as imf};
use crate::text::ascii::CRLF;
use crate::text::boundary::{boundary, Delimiter};
use crate::text::whitespace::obs_crlf;