From ff95f3807ba4644661c3d3c6684bfcc7a61d0b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arma=C3=ABl=20Gu=C3=A9neau?= Date: Sat, 23 Nov 2024 11:35:25 +0100 Subject: [PATCH] =?UTF-8?q?always=20recompute=20the=20score=20of=20all=20u?= =?UTF-8?q?sers=20(the=20perf=20=CE=B4=20is=20insignificant=20and=20the=20?= =?UTF-8?q?code=20is=20simpler)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main.rs | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5794d84..634ebe4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -63,7 +63,6 @@ struct Db { // caches: derived from the rest score: HashMap, tokens: HashMap>, - users_of_token: HashMap>, } impl UserData { @@ -117,9 +116,12 @@ impl Db { tokens: HashMap::new(), classification: HashMap::new(), score: HashMap::new(), - users_of_token: HashMap::new(), } } + + fn all_users(&self) -> Vec { + self.users.iter().map(|(id, _)| *id).collect() + } } async fn scrape_repos(forge: &Forgejo) -> anyhow::Result> { @@ -320,8 +322,7 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> { db.users = get_users_data(&forge).await?; eprintln!("Scoring users..."); - let ids: Vec<_> = db.users.iter().map(|(id, _)| *id).collect(); - for &user_id in &ids { + for &user_id in &db.all_users() { update_user(&mut db, &mut classifier, user_id); } @@ -344,13 +345,7 @@ fn update_user(db: &mut Db, classifier: &mut Classifier, id: UserId) { db.tokens.get(&id).unwrap() } }; - let score = classifier.score(&tokens); - - for tok in tokens { - db.users_of_token.entry(tok.to_string()).or_default().push(id) - }; - - db.score.insert(id, score); + db.score.insert(id, classifier.score(&tokens)); } fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> { @@ -363,7 +358,7 @@ fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> { fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) { let mut all_tokens = HashSet::new(); - eprintln!("training classifier"); + eprintln!("updating classifier"); for (id, is_spam) in ids { let tokens = db.tokens.get(id).unwrap(); @@ -379,25 +374,10 @@ fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) { } } - eprintln!("computing users to update"); + eprintln!("recomputing user scores"); - let mut users_to_update = HashSet::new(); - - for token in all_tokens { - match db.users_of_token.get(&token) { - None => (), - Some(users) => { - for user in users { - users_to_update.insert(*user); - } - }, - } - } - - eprintln!("recomputing scores for {}/{} users", users_to_update.len(), db.users.len()); - - for user in users_to_update { - update_user(db, classifier, user) + for &user_id in &db.all_users() { + update_user(db, classifier, user_id) } }