add some implementation indicators
This commit is contained in:
parent
865305a793
commit
fc2f0943cf
2 changed files with 27 additions and 23 deletions
27
README.md
27
README.md
|
@ -51,7 +51,7 @@ Early development. Not ready.
|
|||
Do not use it in production or any software at all.
|
||||
|
||||
Todo:
|
||||
- [ ] test over enron dataset
|
||||
- [ ] test over the enron dataset
|
||||
- [ ] convert to multipass parser
|
||||
- [ ] implement mime part 3 (encoded headers)
|
||||
- [ ] implement mime part 1 (new headers)
|
||||
|
@ -60,21 +60,22 @@ Todo:
|
|||
- [ ] implement fuzzing through cargo fuzz
|
||||
- [ ] test over other datasets (jpbush, ml, my inbox)
|
||||
- [ ] backport to aerogramme
|
||||
- [ ] fix warnings, put examples, document the public API a little bit
|
||||
|
||||
## Targeted RFC
|
||||
|
||||
| # | Name |
|
||||
|---|------|
|
||||
|822 | ARPA INTERNET TEXT MESSAGES|
|
||||
|2822 | Internet Message Format (2001) |
|
||||
|5322 | Internet Message Format (2008) |
|
||||
|2045 | ↳ Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies |
|
||||
|2046 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types |
|
||||
|2047 | ↳ MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text |
|
||||
|2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures |
|
||||
|2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples |
|
||||
|6532 | Internationalized Email Headers |
|
||||
|9228 | Delivered-To Email Header Field |
|
||||
| 🚩 | # | Name |
|
||||
|----|---|------|
|
||||
| 🟩 |822 | ARPA INTERNET TEXT MESSAGES|
|
||||
| 🟩 | |2822 | Internet Message Format (2001) |
|
||||
| 🟩 |5322 | Internet Message Format (2008) |
|
||||
| 🔴 |2045 | ↳ Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies |
|
||||
| 🔴 |2046 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types |
|
||||
| 🔴 |2047 | ↳ MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text |
|
||||
| 🔴 |2048 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Four: Registration Procedures |
|
||||
| 🔴 |2049 | ↳ Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples |
|
||||
| 🟩 |6532 | Internationalized Email Headers |
|
||||
| 🔴 |9228 | Delivered-To Email Header Field |
|
||||
|
||||
## Alternatives
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
@ -10,8 +11,9 @@ use walkdir::WalkDir;
|
|||
fn test_enron500k() {
|
||||
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
d.push("resources/enron/maildir/");
|
||||
let prefix_sz = d.as_path().to_str().unwrap().len();
|
||||
|
||||
let known_bad_fields = [
|
||||
let known_bad_fields = HashSet::from([
|
||||
"white-s/calendar/113.", // To: east <7..>
|
||||
|
||||
"skilling-j/inbox/223.", // From: pep <performance.>
|
||||
|
@ -58,20 +60,21 @@ fn test_enron500k() {
|
|||
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
||||
|
||||
|
||||
];
|
||||
]);
|
||||
|
||||
let known_bad_from = [
|
||||
"maildir/skilling-j/inbox/223.", // From: pep <performance.>
|
||||
];
|
||||
let known_bad_from = HashSet::from([
|
||||
"skilling-j/inbox/223.", // From: pep <performance.>
|
||||
]);
|
||||
|
||||
let mut i = 0;
|
||||
for entry in WalkDir::new(d.as_path()).into_iter().filter_map(|file| file.ok()) {
|
||||
if entry.metadata().unwrap().is_file() {
|
||||
//@TODO check list
|
||||
let mail_path = entry.path();
|
||||
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
|
||||
|
||||
// read file
|
||||
let mut raw = Vec::new();
|
||||
let mut f = File::open(entry.path()).unwrap();
|
||||
let mut f = File::open(mail_path).unwrap();
|
||||
f.read_to_end(&mut raw).unwrap();
|
||||
|
||||
// parse
|
||||
|
@ -86,16 +89,16 @@ fn test_enron500k() {
|
|||
|
||||
let p = entry.path();
|
||||
if !ok_date || !ok_from || !ok_fields {
|
||||
println!("Issue with: {}", p.display());
|
||||
println!("Issue with: {}", suffix);
|
||||
}
|
||||
|
||||
assert!(ok_date);
|
||||
|
||||
if !known_bad_from.iter().any(|&s| p.ends_with(s)) {
|
||||
if !known_bad_from.contains(suffix) {
|
||||
assert!(ok_from);
|
||||
}
|
||||
|
||||
if !known_bad_fields.iter().any(|&s| p.ends_with(s)) {
|
||||
if !known_bad_fields.contains(suffix) {
|
||||
assert!(ok_fields);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue