fix enron test

This commit is contained in:
Quentin 2023-06-22 14:10:50 +02:00
parent 14bcf119a7
commit d30d38f124
Signed by: quentin
GPG key ID: E9602264D639FF68

View file

@ -2,9 +2,21 @@ use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::Read;
use imf_codec::fragments::header; use imf_codec::multipass;
use imf_codec::fragments::section;
use walkdir::WalkDir; use walkdir::WalkDir;
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
where F: FnOnce(&section::Section) -> () {
let seg = multipass::segment::new(input).unwrap();
let charset = seg.charset();
let fields = charset.fields().unwrap();
let field_names = fields.names();
let field_body = field_names.body();
let section = field_body.section();
func(&section.fields);
}
#[test] #[test]
#[ignore] #[ignore]
@ -91,16 +103,11 @@ fn test_enron500k() {
f.read_to_end(&mut raw).unwrap(); f.read_to_end(&mut raw).unwrap();
// parse // parse
let (email, encoding, malformed) = header::from_bytes(&raw); parser(&raw, |hdrs| {
//println!("Encoding: {:?}, Malformed: {:?}", encoding, malformed);
let (input, hdrs) = header::section(&email).unwrap();
//println!("{:?}", hdrs);
let ok_date = hdrs.date.is_some(); let ok_date = hdrs.date.is_some();
let ok_from = hdrs.from.len() > 0; let ok_from = hdrs.from.len() > 0;
let ok_fields = hdrs.bad_fields.len() == 0; let ok_fields = hdrs.bad_fields.len() == 0;
let p = entry.path();
if !ok_date || !ok_from || !ok_fields { if !ok_date || !ok_from || !ok_fields {
println!("Issue with: {}", suffix); println!("Issue with: {}", suffix);
} }
@ -119,6 +126,7 @@ fn test_enron500k() {
if i % 1000 == 0 { if i % 1000 == 0 {
println!("Analyzed emails: {}", i); println!("Analyzed emails: {}", i);
} }
})
} }
} }
} }