refactor, mod rfc5322 to imf
This commit is contained in:
parent
28873ce073
commit
63892af012
16 changed files with 46 additions and 44 deletions
23
README.md
23
README.md
|
@ -3,6 +3,24 @@
|
|||
**⚠️ Work in progress, do not use in production**
|
||||
**⚠️ This is currently only a decoder (parser), encoding is not yet implemented.**
|
||||
|
||||
## Example
|
||||
|
||||
```rust
|
||||
let input = br#"
|
||||
Date: 7 Mar 2023 08:00:00 +0200
|
||||
From: deuxfleurs@example.com
|
||||
To: someone_else@example.com
|
||||
Subject: An RFC 822 formatted message
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
|
||||
This is the plain text body of the message. Note the blank line
|
||||
between the header information and the body of the message.#;
|
||||
|
||||
let email = eml_codec::email(input).unwrap();
|
||||
println!("{} just sent you an email with subject \"{}\", email.1.
|
||||
```
|
||||
|
||||
## About the name
|
||||
|
||||
This library does not aim at implementing a specific RFC, but to be a swiss-army knife to decode and encode ("codec") what is generaly considered an email (generally abbreviated "eml"), hence the name: **eml-codec**.
|
||||
|
@ -18,8 +36,7 @@ This library does not aim at implementing a specific RFC, but to be a swiss-army
|
|||
|
||||
- Parsing optimization that would make more complicated to understand the logic.
|
||||
- Optimization for a specific use case, to the detriment of other use cases.
|
||||
- Pipelining/streaming/buffering as the parser can arbitrarily backtrack + our result contains reference to the whole buffer, imf-codec must keep the whole buffer in memory. Avoiding the sequential approach would certainly speed-up a little bit the parsing, but it's too much work to implement currently.
|
||||
- Zerocopy. It might be implementable in the future, but to quickly bootstrap this project, I avoided it for now.
|
||||
- Pipelining/streaming/buffering as the parser can arbitrarily backtrack + our result contains reference to the whole buffer, eml-codec must keep the whole buffer in memory. Avoiding the sequential approach would certainly speed-up a little bit the parsing, but it's too much work to implement currently.
|
||||
|
||||
## Missing / known bugs
|
||||
|
||||
|
@ -37,7 +54,7 @@ Speak about parser combinators.
|
|||
|
||||
## Testing strategy
|
||||
|
||||
imf-codec aims to be as much tested as possible against reald
|
||||
eml-codec aims to be as much tested as possible against real word data.
|
||||
|
||||
### Unit testing: parser combinator independently (done)
|
||||
|
||||
|
|
8
notes.md
8
notes.md
|
@ -1,8 +0,0 @@
|
|||
Test enron files:
|
||||
|
||||
```
|
||||
cd resources/enron
|
||||
./restore.sh
|
||||
cd -
|
||||
(set -euo pipefail; find resources/enron/maildir/ -type f | while read f; do echo $f; ./target/debug/imf_parse < $f > /dev/null; done)
|
||||
```
|
|
@ -1,7 +0,0 @@
|
|||
Date: 7 Mar 2023 08:00:00 +0200
|
||||
From: someone@example.com
|
||||
To: someone_else@example.com
|
||||
Subject: An RFC 822 formatted message
|
||||
|
||||
This is the plain text body of the message. Note the blank line
|
||||
between the header information and the body of the message.
|
|
@ -8,7 +8,7 @@ use nom::{
|
|||
};
|
||||
|
||||
//use crate::error::IMFError;
|
||||
use crate::rfc5322::mailbox::{mailbox, MailboxRef};
|
||||
use crate::imf::mailbox::{mailbox, MailboxRef};
|
||||
use crate::text::misc_token::{phrase, Phrase};
|
||||
use crate::text::whitespace::cfws;
|
||||
|
||||
|
@ -109,7 +109,7 @@ pub fn nullable_address_list(input: &[u8]) -> IResult<&[u8], Vec<AddressRef>> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::rfc5322::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken};
|
||||
use crate::imf::mailbox::{AddrSpec, Domain, LocalPart, LocalPartToken};
|
||||
use crate::text::misc_token::{Phrase, Word};
|
||||
|
||||
#[test]
|
|
@ -7,13 +7,13 @@ use nom::{
|
|||
};
|
||||
|
||||
use crate::header::{field_name, header};
|
||||
use crate::rfc5322::address::{address_list, mailbox_list, nullable_address_list, AddressList};
|
||||
use crate::rfc5322::datetime::section as date;
|
||||
use crate::rfc5322::identification::{msg_id, msg_list, MessageID, MessageIDList};
|
||||
use crate::rfc5322::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
|
||||
use crate::rfc5322::message::Message;
|
||||
use crate::rfc5322::mime::{version, Version};
|
||||
use crate::rfc5322::trace::{received_log, return_path, ReceivedLog};
|
||||
use crate::imf::address::{address_list, mailbox_list, nullable_address_list, AddressList};
|
||||
use crate::imf::datetime::section as date;
|
||||
use crate::imf::identification::{msg_id, msg_list, MessageID, MessageIDList};
|
||||
use crate::imf::mailbox::{mailbox, AddrSpec, MailboxList, MailboxRef};
|
||||
use crate::imf::message::Message;
|
||||
use crate::imf::mime::{version, Version};
|
||||
use crate::imf::trace::{received_log, return_path, ReceivedLog};
|
||||
use crate::text::misc_token::{phrase_list, unstructured, PhraseList, Unstructured};
|
||||
use crate::text::whitespace::obs_crlf;
|
||||
|
||||
|
@ -95,8 +95,8 @@ pub fn message(input: &[u8]) -> IResult<&[u8], Message> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::rfc5322::address::*;
|
||||
use crate::rfc5322::mailbox::*;
|
||||
use crate::imf::address::*;
|
||||
use crate::imf::mailbox::*;
|
||||
use crate::text::misc_token::*;
|
||||
use chrono::{FixedOffset, TimeZone};
|
||||
|
|
@ -8,7 +8,7 @@ use nom::{
|
|||
IResult,
|
||||
};
|
||||
|
||||
use crate::rfc5322::mailbox::is_dtext;
|
||||
use crate::imf::mailbox::is_dtext;
|
||||
use crate::text::whitespace::cfws;
|
||||
use crate::text::words::dot_atom_text;
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
use crate::rfc5322::address::AddressRef;
|
||||
use crate::rfc5322::field::Field;
|
||||
use crate::rfc5322::identification::MessageID;
|
||||
use crate::rfc5322::mailbox::{AddrSpec, MailboxRef};
|
||||
use crate::rfc5322::mime::Version;
|
||||
use crate::rfc5322::trace::ReceivedLog;
|
||||
use crate::imf::address::AddressRef;
|
||||
use crate::imf::field::Field;
|
||||
use crate::imf::identification::MessageID;
|
||||
use crate::imf::mailbox::{AddrSpec, MailboxRef};
|
||||
use crate::imf::mime::Version;
|
||||
use crate::imf::trace::ReceivedLog;
|
||||
use crate::text::misc_token::{PhraseList, Unstructured};
|
||||
use chrono::{DateTime, FixedOffset};
|
||||
|
|
@ -8,7 +8,7 @@ use nom::{
|
|||
IResult,
|
||||
};
|
||||
|
||||
use crate::rfc5322::{datetime, mailbox};
|
||||
use crate::imf::{datetime, mailbox};
|
||||
use crate::text::{ascii, misc_token, whitespace};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -75,7 +75,7 @@ fn received_tokens(input: &[u8]) -> IResult<&[u8], ReceivedLogToken> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::rfc5322::trace::misc_token::Word;
|
||||
use crate::imf::trace::misc_token::Word;
|
||||
use chrono::TimeZone;
|
||||
|
||||
#[test]
|
|
@ -2,7 +2,7 @@ mod error;
|
|||
mod header;
|
||||
mod mime;
|
||||
mod part;
|
||||
mod rfc5322;
|
||||
mod imf;
|
||||
mod text;
|
||||
|
||||
pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> {
|
||||
|
@ -11,8 +11,8 @@ pub fn email(input: &[u8]) -> Result<part::part::Message, error::EMLError> {
|
|||
.map_err(error::EMLError::ParseError)
|
||||
}
|
||||
|
||||
pub fn imf(input: &[u8]) -> Result<rfc5322::message::Message, error::EMLError> {
|
||||
rfc5322::field::message(input)
|
||||
pub fn imf(input: &[u8]) -> Result<imf::message::Message, error::EMLError> {
|
||||
imf::field::message(input)
|
||||
.map(|(_, v)| v)
|
||||
.map_err(error::EMLError::ParseError)
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::header::{field_name, CompFieldList};
|
|||
use crate::mime::mechanism::{mechanism, Mechanism};
|
||||
use crate::mime::mime::AnyMIME;
|
||||
use crate::mime::r#type::{naive_type, NaiveType};
|
||||
use crate::rfc5322::identification::{msg_id, MessageID};
|
||||
use crate::imf::identification::{msg_id, MessageID};
|
||||
use crate::text::misc_token::{unstructured, Unstructured};
|
||||
use crate::text::whitespace::obs_crlf;
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::mime::field::Content;
|
||||
use crate::mime::mechanism::Mechanism;
|
||||
use crate::mime::r#type::{self as ctype, AnyType};
|
||||
use crate::rfc5322::identification::MessageID;
|
||||
use crate::imf::identification::MessageID;
|
||||
use crate::text::misc_token::Unstructured; //Multipart, Message, Text, Binary};
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
|
|
|
@ -11,7 +11,7 @@ use nom::{
|
|||
use crate::header::{header, CompFieldList};
|
||||
use crate::mime;
|
||||
use crate::mime::mime::AnyMIME;
|
||||
use crate::rfc5322::{self as imf};
|
||||
use crate::imf::{self as imf};
|
||||
use crate::text::ascii::CRLF;
|
||||
use crate::text::boundary::{boundary, Delimiter};
|
||||
use crate::text::whitespace::obs_crlf;
|
||||
|
|
Loading…
Reference in a new issue