optimize scoring performance
This commit is contained in:
parent
d9251ce395
commit
876488bb4b
5 changed files with 24 additions and 6 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -2,3 +2,4 @@
|
|||
classification.json
|
||||
db.json
|
||||
api_token
|
||||
profile.json
|
||||
|
|
|
@ -19,3 +19,7 @@ tera = "1"
|
|||
lazy_static = "1"
|
||||
actix-files = "0.6"
|
||||
unicode-segmentation = "1"
|
||||
|
||||
[profile.profiling]
|
||||
inherits = "dev"
|
||||
debug = true
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -20,6 +20,8 @@ struct Counter {
|
|||
#[derive(Default, Debug, Serialize, Deserialize)]
|
||||
pub struct Classifier {
|
||||
token_table: HashMap<String, Counter>,
|
||||
spam_total_count: u32,
|
||||
ham_total_count: u32,
|
||||
}
|
||||
|
||||
impl Classifier {
|
||||
|
@ -56,6 +58,7 @@ impl Classifier {
|
|||
for word in tokens {
|
||||
let counter = self.token_table.entry(word.to_string()).or_default();
|
||||
counter.spam += 1;
|
||||
self.spam_total_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,17 +67,18 @@ impl Classifier {
|
|||
for word in tokens {
|
||||
let counter = self.token_table.entry(word.to_string()).or_default();
|
||||
counter.ham += 1;
|
||||
self.ham_total_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the total number of spam in token table.
|
||||
fn spam_total_count(&self) -> u32 {
|
||||
self.token_table.values().map(|x| x.spam).sum()
|
||||
self.spam_total_count
|
||||
}
|
||||
|
||||
/// Return the total number of ham in token table.
|
||||
fn ham_total_count(&self) -> u32 {
|
||||
self.token_table.values().map(|x| x.ham).sum()
|
||||
self.ham_total_count
|
||||
}
|
||||
|
||||
/// Compute the probability of `tokens` to be part of a spam.
|
||||
|
|
14
src/main.rs
14
src/main.rs
|
@ -334,14 +334,22 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> {
|
|||
}
|
||||
|
||||
fn update_user(db: &mut Db, classifier: &mut Classifier, id: UserId) {
|
||||
let tokens = db.users.get(&id).unwrap().to_tokens();
|
||||
let userdata = db.users.get(&id).unwrap();
|
||||
let tokens =
|
||||
match db.tokens.get(&id) {
|
||||
Some(tokens) => tokens,
|
||||
None => {
|
||||
let tokens = userdata.to_tokens();
|
||||
db.tokens.insert(id, tokens);
|
||||
db.tokens.get(&id).unwrap()
|
||||
}
|
||||
};
|
||||
let score = classifier.score(&tokens);
|
||||
|
||||
for tok in &tokens {
|
||||
for tok in tokens {
|
||||
db.users_of_token.entry(tok.to_string()).or_default().push(id)
|
||||
};
|
||||
|
||||
db.tokens.insert(id, tokens);
|
||||
db.score.insert(id, score);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue