optimize scoring performance

2024-11-23 11:10:44 +01:00 · 2024-11-23 11:10:44 +01:00 · 876488bb4b
commit 876488bb4b
parent d9251ce395
5 changed files with 24 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@
 classification.json
 db.json
 api_token
+profile.json
--- a/Cargo.toml
+++ b/Cargo.toml
@ -19,3 +19,7 @@ tera = "1"
 lazy_static = "1"
 actix-files = "0.6"
 unicode-segmentation = "1"
+
+[profile.profiling]
+inherits = "dev"
+debug = true
--- a/model.json
+++ b/model.json
--- a/src/classifier.rs
+++ b/src/classifier.rs
@ -20,6 +20,8 @@ struct Counter {
 #[derive(Default, Debug, Serialize, Deserialize)]
 pub struct Classifier {
    token_table: HashMap<String, Counter>,
+    spam_total_count: u32,
+    ham_total_count: u32,
 }

 impl Classifier {
@ -56,6 +58,7 @@ impl Classifier {
        for word in tokens {
            let counter = self.token_table.entry(word.to_string()).or_default();
            counter.spam += 1;
+            self.spam_total_count += 1;
        }
    }

@ -64,17 +67,18 @@ impl Classifier {
        for word in tokens {
            let counter = self.token_table.entry(word.to_string()).or_default();
            counter.ham += 1;
+            self.ham_total_count += 1;
        }
    }

    /// Return the total number of spam in token table.
    fn spam_total_count(&self) -> u32 {
-        self.token_table.values().map(|x| x.spam).sum()
+        self.spam_total_count
    }

    /// Return the total number of ham in token table.
    fn ham_total_count(&self) -> u32 {
-        self.token_table.values().map(|x| x.ham).sum()
+        self.ham_total_count
    }

    /// Compute the probability of `tokens` to be part of a spam.
--- a/src/main.rs
+++ b/src/main.rs
@ -334,14 +334,22 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> {
 }

 fn update_user(db: &mut Db, classifier: &mut Classifier, id: UserId) {
-    let tokens = db.users.get(&id).unwrap().to_tokens();
+    let userdata = db.users.get(&id).unwrap();
+    let tokens =
+        match db.tokens.get(&id) {
+            Some(tokens) => tokens,
+            None => {
+                let tokens = userdata.to_tokens();
+                db.tokens.insert(id, tokens);
+                db.tokens.get(&id).unwrap()
+            }
+        };
    let score = classifier.score(&tokens);

-    for tok in &tokens {
+    for tok in tokens {
        db.users_of_token.entry(tok.to_string()).or_default().push(id)
    };

-    db.tokens.insert(id, tokens);
    db.score.insert(id, score);
 }