always recompute the score of all users
(the perf δ is insignificant and the code is simpler)
This commit is contained in:
parent
876488bb4b
commit
ff95f3807b
1 changed files with 10 additions and 30 deletions
40
src/main.rs
40
src/main.rs
|
@ -63,7 +63,6 @@ struct Db {
|
|||
// caches: derived from the rest
|
||||
score: HashMap<UserId, f32>,
|
||||
tokens: HashMap<UserId, Vec<String>>,
|
||||
users_of_token: HashMap<String, Vec<UserId>>,
|
||||
}
|
||||
|
||||
impl UserData {
|
||||
|
@ -117,9 +116,12 @@ impl Db {
|
|||
tokens: HashMap::new(),
|
||||
classification: HashMap::new(),
|
||||
score: HashMap::new(),
|
||||
users_of_token: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn all_users(&self) -> Vec<UserId> {
|
||||
self.users.iter().map(|(id, _)| *id).collect()
|
||||
}
|
||||
}
|
||||
|
||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
||||
|
@ -320,8 +322,7 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> {
|
|||
db.users = get_users_data(&forge).await?;
|
||||
|
||||
eprintln!("Scoring users...");
|
||||
let ids: Vec<_> = db.users.iter().map(|(id, _)| *id).collect();
|
||||
for &user_id in &ids {
|
||||
for &user_id in &db.all_users() {
|
||||
update_user(&mut db, &mut classifier, user_id);
|
||||
}
|
||||
|
||||
|
@ -344,13 +345,7 @@ fn update_user(db: &mut Db, classifier: &mut Classifier, id: UserId) {
|
|||
db.tokens.get(&id).unwrap()
|
||||
}
|
||||
};
|
||||
let score = classifier.score(&tokens);
|
||||
|
||||
for tok in tokens {
|
||||
db.users_of_token.entry(tok.to_string()).or_default().push(id)
|
||||
};
|
||||
|
||||
db.score.insert(id, score);
|
||||
db.score.insert(id, classifier.score(&tokens));
|
||||
}
|
||||
|
||||
fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> {
|
||||
|
@ -363,7 +358,7 @@ fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> {
|
|||
fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) {
|
||||
let mut all_tokens = HashSet::new();
|
||||
|
||||
eprintln!("training classifier");
|
||||
eprintln!("updating classifier");
|
||||
|
||||
for (id, is_spam) in ids {
|
||||
let tokens = db.tokens.get(id).unwrap();
|
||||
|
@ -379,25 +374,10 @@ fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) {
|
|||
}
|
||||
}
|
||||
|
||||
eprintln!("computing users to update");
|
||||
eprintln!("recomputing user scores");
|
||||
|
||||
let mut users_to_update = HashSet::new();
|
||||
|
||||
for token in all_tokens {
|
||||
match db.users_of_token.get(&token) {
|
||||
None => (),
|
||||
Some(users) => {
|
||||
for user in users {
|
||||
users_to_update.insert(*user);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("recomputing scores for {}/{} users", users_to_update.len(), db.users.len());
|
||||
|
||||
for user in users_to_update {
|
||||
update_user(db, classifier, user)
|
||||
for &user_id in &db.all_users() {
|
||||
update_user(db, classifier, user_id)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue