handle classification conflicts between concurrent users

This commit is contained in:
Armaël Guéneau 2024-12-18 08:51:23 +01:00
parent 6f412712e8
commit 483b9860b0
2 changed files with 16 additions and 9 deletions

View file

@ -21,8 +21,6 @@
- periodically refresh the database of users from forgejo, and merge them with - periodically refresh the database of users from forgejo, and merge them with
the local db, handling updates in users data (triggering re-classification if the local db, handling updates in users data (triggering re-classification if
needed) needed)
- properly handle concurrent clients of the webapp (and handle classification
conflicts)
- add backend to store data on garage instead of local files - add backend to store data on garage instead of local files
- replate the `api_token` file with a better mechanism: oauth maybe? - replate the `api_token` file with a better mechanism: oauth maybe?
- improve error handling - improve error handling

View file

@ -385,22 +385,31 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> {
fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) { fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) {
eprintln!("updating classifier"); eprintln!("updating classifier");
for (id, is_spam) in ids { for (user_id, is_spam) in ids {
let tokens = db.tokens.get(id).unwrap(); // Train classifier with tokens from the user
let tokens = db.tokens.get(user_id).unwrap();
if *is_spam { if *is_spam {
classifier.train_spam(tokens); classifier.train_spam(tokens);
} else { } else {
classifier.train_ham(tokens); classifier.train_ham(tokens);
} }
match db.is_spam.get(user_id) {
Some(b) if b != is_spam => {
// classification conflict between concurrent queries.
// In this case we play it safe and erase the classification for this user;
// it will need to be manually classified again.
eprintln!("Classification conflict (uid %d), forget current user classification");
db.is_spam.remove(user_id);
},
_ => {
db.is_spam.insert(*user_id, *is_spam);
}
}
} }
eprintln!("recomputing user scores"); eprintln!("recomputing user scores");
db.recompute_scores(&classifier); db.recompute_scores(&classifier);
eprintln!("updating db with new classification");
for (user_id, is_spam) in ids {
db.is_spam.insert(*user_id, *is_spam);
}
} }
lazy_static! { lazy_static! {