add some implementation indicators

This commit is contained in:
Quentin 2023-06-19 12:15:05 +02:00
parent 865305a793
commit fc2f0943cf
Signed by: quentin
GPG key ID: E9602264D639FF68
2 changed files with 27 additions and 23 deletions

View file

@ -51,7 +51,7 @@ Early development. Not ready.
Do not use it in production or any software at all. Do not use it in production or any software at all.
Todo: Todo:
- [ ] test over enron dataset - [ ] test over the enron dataset
- [ ] convert to multipass parser - [ ] convert to multipass parser
- [ ] implement mime part 3 (encoded headers) - [ ] implement mime part 3 (encoded headers)
- [ ] implement mime part 1 (new headers) - [ ] implement mime part 1 (new headers)
@ -60,21 +60,22 @@ Todo:
- [ ] implement fuzzing through cargo fuzz - [ ] implement fuzzing through cargo fuzz
- [ ] test over other datasets (jpbush, ml, my inbox) - [ ] test over other datasets (jpbush, ml, my inbox)
- [ ] backport to aerogramme - [ ] backport to aerogramme
- [ ] fix warnings, put examples, document the public API a little bit
## Targeted RFC ## Targeted RFC
| # | Name | | 🚩 | # | Name |
|---|------| |----|---|------|
|822 | ARPA INTERNET TEXT MESSAGES| | 🟩 |822 | ARPA INTERNET TEXT MESSAGES|
|2822 | Internet Message Format (2001) | | 🟩 | |2822 | Internet Message Format (2001) |
|5322 | Internet Message Format (2008) | | 🟩 |5322 | Internet Message Format (2008) |
|2045 | ↳ Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies | | 🔴 |2045 | ↳ Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies |
|2046 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types | | 🔴 |2046 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types |
|2047 | ↳ MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text | | 🔴 |2047 | ↳ MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text |
|2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures | | 🔴 |2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures |
|2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples | | 🔴 |2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples |
|6532 | Internationalized Email Headers | | 🟩 |6532 | Internationalized Email Headers |
|9228 | Delivered-To Email Header Field | | 🔴 |9228 | Delivered-To Email Header Field |
## Alternatives ## Alternatives

View file

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::Read;
@ -10,8 +11,9 @@ use walkdir::WalkDir;
fn test_enron500k() { fn test_enron500k() {
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
d.push("resources/enron/maildir/"); d.push("resources/enron/maildir/");
let prefix_sz = d.as_path().to_str().unwrap().len();
let known_bad_fields = [ let known_bad_fields = HashSet::from([
"white-s/calendar/113.", // To: east <7..> "white-s/calendar/113.", // To: east <7..>
"skilling-j/inbox/223.", // From: pep <performance.> "skilling-j/inbox/223.", // From: pep <performance.>
@ -58,20 +60,21 @@ fn test_enron500k() {
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.> "kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
]; ]);
let known_bad_from = [ let known_bad_from = HashSet::from([
"maildir/skilling-j/inbox/223.", // From: pep <performance.> "skilling-j/inbox/223.", // From: pep <performance.>
]; ]);
let mut i = 0; let mut i = 0;
for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) { for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) {
if entry.metadata().unwrap().is_file() { if entry.metadata().unwrap().is_file() {
//@TODO check list let mail_path = entry.path();
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
// read file // read file
let mut raw = Vec::new(); let mut raw = Vec::new();
let mut f = File::open(entry.path()).unwrap(); let mut f = File::open(mail_path).unwrap();
f.read_to_end(&mut raw).unwrap(); f.read_to_end(&mut raw).unwrap();
// parse // parse
@ -86,16 +89,16 @@ fn test_enron500k() {
let p = entry.path(); let p = entry.path();
if !ok_date || !ok_from || !ok_fields { if !ok_date || !ok_from || !ok_fields {
println!("Issue with: {}", p.display()); println!("Issue with: {}", suffix);
} }
assert!(ok_date); assert!(ok_date);
if !known_bad_from.iter().any(|&s| p.ends_with(s)) { if !known_bad_from.contains(suffix) {
assert!(ok_from); assert!(ok_from);
} }
if !known_bad_fields.iter().any(|&s| p.ends_with(s)) { if !known_bad_fields.contains(suffix) {
assert!(ok_fields); assert!(ok_fields);
} }