From 483b9860b0d396a7058827dcfc78d414104e023a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arma=C3=ABl=20Gu=C3=A9neau?= Date: Wed, 18 Dec 2024 08:51:23 +0100 Subject: [PATCH] handle classification conflicts between concurrent users --- README.md | 2 -- src/main.rs | 23 ++++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index b047c38..01f1fd0 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,6 @@ - periodically refresh the database of users from forgejo, and merge them with the local db, handling updates in users data (triggering re-classification if needed) -- properly handle concurrent clients of the webapp (and handle classification - conflicts) - add backend to store data on garage instead of local files - replate the `api_token` file with a better mechanism: oauth maybe? - improve error handling diff --git a/src/main.rs b/src/main.rs index 69c05c3..7a6ffed 100644 --- a/src/main.rs +++ b/src/main.rs @@ -385,22 +385,31 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> { fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) { eprintln!("updating classifier"); - for (id, is_spam) in ids { - let tokens = db.tokens.get(id).unwrap(); + for (user_id, is_spam) in ids { + // Train classifier with tokens from the user + let tokens = db.tokens.get(user_id).unwrap(); if *is_spam { classifier.train_spam(tokens); } else { classifier.train_ham(tokens); } + + match db.is_spam.get(user_id) { + Some(b) if b != is_spam => { + // classification conflict between concurrent queries. + // In this case we play it safe and erase the classification for this user; + // it will need to be manually classified again. + eprintln!("Classification conflict (uid %d), forget current user classification"); + db.is_spam.remove(user_id); + }, + _ => { + db.is_spam.insert(*user_id, *is_spam); + } + } } eprintln!("recomputing user scores"); db.recompute_scores(&classifier); - - eprintln!("updating db with new classification"); - for (user_id, is_spam) in ids { - db.is_spam.insert(*user_id, *is_spam); - } } lazy_static! {