add some implementation indicators
This commit is contained in:
parent
865305a793
commit
fc2f0943cf
2 changed files with 27 additions and 23 deletions
27
README.md
27
README.md
|
@ -51,7 +51,7 @@ Early development. Not ready.
|
||||||
Do not use it in production or any software at all.
|
Do not use it in production or any software at all.
|
||||||
|
|
||||||
Todo:
|
Todo:
|
||||||
- [ ] test over enron dataset
|
- [ ] test over the enron dataset
|
||||||
- [ ] convert to multipass parser
|
- [ ] convert to multipass parser
|
||||||
- [ ] implement mime part 3 (encoded headers)
|
- [ ] implement mime part 3 (encoded headers)
|
||||||
- [ ] implement mime part 1 (new headers)
|
- [ ] implement mime part 1 (new headers)
|
||||||
|
@ -60,21 +60,22 @@ Todo:
|
||||||
- [ ] implement fuzzing through cargo fuzz
|
- [ ] implement fuzzing through cargo fuzz
|
||||||
- [ ] test over other datasets (jpbush, ml, my inbox)
|
- [ ] test over other datasets (jpbush, ml, my inbox)
|
||||||
- [ ] backport to aerogramme
|
- [ ] backport to aerogramme
|
||||||
|
- [ ] fix warnings, put examples, document the public API a little bit
|
||||||
|
|
||||||
## Targeted RFC
|
## Targeted RFC
|
||||||
|
|
||||||
| # | Name |
|
| 🚩 | # | Name |
|
||||||
|---|------|
|
|----|---|------|
|
||||||
|822 | ARPA INTERNET TEXT MESSAGES|
|
| 🟩 |822 | ARPA INTERNET TEXT MESSAGES|
|
||||||
|2822 | Internet Message Format (2001) |
|
| 🟩 | |2822 | Internet Message Format (2001) |
|
||||||
|5322 | Internet Message Format (2008) |
|
| 🟩 |5322 | Internet Message Format (2008) |
|
||||||
|2045 | ↳ Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies |
|
| 🔴 |2045 | ↳ Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies |
|
||||||
|2046 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types |
|
| 🔴 |2046 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types |
|
||||||
|2047 | ↳ MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text |
|
| 🔴 |2047 | ↳ MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text |
|
||||||
|2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures |
|
| 🔴 |2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures |
|
||||||
|2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples |
|
| 🔴 |2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples |
|
||||||
|6532 | Internationalized Email Headers |
|
| 🟩 |6532 | Internationalized Email Headers |
|
||||||
|9228 | Delivered-To Email Header Field |
|
| 🔴 |9228 | Delivered-To Email Header Field |
|
||||||
|
|
||||||
## Alternatives
|
## Alternatives
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
@ -10,8 +11,9 @@ use walkdir::WalkDir;
|
||||||
fn test_enron500k() {
|
fn test_enron500k() {
|
||||||
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||||
d.push("resources/enron/maildir/");
|
d.push("resources/enron/maildir/");
|
||||||
|
let prefix_sz = d.as_path().to_str().unwrap().len();
|
||||||
|
|
||||||
let known_bad_fields = [
|
let known_bad_fields = HashSet::from([
|
||||||
"white-s/calendar/113.", // To: east <7..>
|
"white-s/calendar/113.", // To: east <7..>
|
||||||
|
|
||||||
"skilling-j/inbox/223.", // From: pep <performance.>
|
"skilling-j/inbox/223.", // From: pep <performance.>
|
||||||
|
@ -58,20 +60,21 @@ fn test_enron500k() {
|
||||||
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
||||||
|
|
||||||
|
|
||||||
];
|
]);
|
||||||
|
|
||||||
let known_bad_from = [
|
let known_bad_from = HashSet::from([
|
||||||
"maildir/skilling-j/inbox/223.", // From: pep <performance.>
|
"skilling-j/inbox/223.", // From: pep <performance.>
|
||||||
];
|
]);
|
||||||
|
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) {
|
for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) {
|
||||||
if entry.metadata().unwrap().is_file() {
|
if entry.metadata().unwrap().is_file() {
|
||||||
//@TODO check list
|
let mail_path = entry.path();
|
||||||
|
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
|
||||||
|
|
||||||
// read file
|
// read file
|
||||||
let mut raw = Vec::new();
|
let mut raw = Vec::new();
|
||||||
let mut f = File::open(entry.path()).unwrap();
|
let mut f = File::open(mail_path).unwrap();
|
||||||
f.read_to_end(&mut raw).unwrap();
|
f.read_to_end(&mut raw).unwrap();
|
||||||
|
|
||||||
// parse
|
// parse
|
||||||
|
@ -86,16 +89,16 @@ fn test_enron500k() {
|
||||||
|
|
||||||
let p = entry.path();
|
let p = entry.path();
|
||||||
if !ok_date || !ok_from || !ok_fields {
|
if !ok_date || !ok_from || !ok_fields {
|
||||||
println!("Issue with: {}", p.display());
|
println!("Issue with: {}", suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert!(ok_date);
|
assert!(ok_date);
|
||||||
|
|
||||||
if !known_bad_from.iter().any(|&s| p.ends_with(s)) {
|
if !known_bad_from.contains(suffix) {
|
||||||
assert!(ok_from);
|
assert!(ok_from);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !known_bad_fields.iter().any(|&s| p.ends_with(s)) {
|
if !known_bad_fields.contains(suffix) {
|
||||||
assert!(ok_fields);
|
assert!(ok_fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue