fix enron test
This commit is contained in:
parent
14bcf119a7
commit
d30d38f124
1 changed files with 32 additions and 24 deletions
|
@ -2,9 +2,21 @@ use std::collections::HashSet;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
use imf_codec::fragments::header;
|
use imf_codec::multipass;
|
||||||
|
use imf_codec::fragments::section;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
|
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
||||||
|
where F: FnOnce(§ion::Section) -> () {
|
||||||
|
let seg = multipass::segment::new(input).unwrap();
|
||||||
|
let charset = seg.charset();
|
||||||
|
let fields = charset.fields().unwrap();
|
||||||
|
let field_names = fields.names();
|
||||||
|
let field_body = field_names.body();
|
||||||
|
let section = field_body.section();
|
||||||
|
|
||||||
|
func(§ion.fields);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
#[ignore]
|
||||||
|
@ -91,34 +103,30 @@ fn test_enron500k() {
|
||||||
f.read_to_end(&mut raw).unwrap();
|
f.read_to_end(&mut raw).unwrap();
|
||||||
|
|
||||||
// parse
|
// parse
|
||||||
let (email, encoding, malformed) = header::from_bytes(&raw);
|
parser(&raw, |hdrs| {
|
||||||
//println!("Encoding: {:?}, Malformed: {:?}", encoding, malformed);
|
let ok_date = hdrs.date.is_some();
|
||||||
|
let ok_from = hdrs.from.len() > 0;
|
||||||
|
let ok_fields = hdrs.bad_fields.len() == 0;
|
||||||
|
|
||||||
let (input, hdrs) = header::section(&email).unwrap();
|
if !ok_date || !ok_from || !ok_fields {
|
||||||
//println!("{:?}", hdrs);
|
println!("Issue with: {}", suffix);
|
||||||
let ok_date = hdrs.date.is_some();
|
}
|
||||||
let ok_from = hdrs.from.len() > 0;
|
|
||||||
let ok_fields = hdrs.bad_fields.len() == 0;
|
|
||||||
|
|
||||||
let p = entry.path();
|
assert!(ok_date);
|
||||||
if !ok_date || !ok_from || !ok_fields {
|
|
||||||
println!("Issue with: {}", suffix);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert!(ok_date);
|
if !known_bad_from.contains(suffix) {
|
||||||
|
assert!(ok_from);
|
||||||
|
}
|
||||||
|
|
||||||
if !known_bad_from.contains(suffix) {
|
if !known_bad_fields.contains(suffix) {
|
||||||
assert!(ok_from);
|
assert!(ok_fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !known_bad_fields.contains(suffix) {
|
i += 1;
|
||||||
assert!(ok_fields);
|
if i % 1000 == 0 {
|
||||||
}
|
println!("Analyzed emails: {}", i);
|
||||||
|
}
|
||||||
i += 1;
|
})
|
||||||
if i % 1000 == 0 {
|
|
||||||
println!("Analyzed emails: {}", i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue