always recompute the score of all users
(the perf δ is insignificant and the code is simpler)
This commit is contained in:
parent
876488bb4b
commit
ff95f3807b
1 changed files with 10 additions and 30 deletions
40
src/main.rs
40
src/main.rs
|
@ -63,7 +63,6 @@ struct Db {
|
||||||
// caches: derived from the rest
|
// caches: derived from the rest
|
||||||
score: HashMap<UserId, f32>,
|
score: HashMap<UserId, f32>,
|
||||||
tokens: HashMap<UserId, Vec<String>>,
|
tokens: HashMap<UserId, Vec<String>>,
|
||||||
users_of_token: HashMap<String, Vec<UserId>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UserData {
|
impl UserData {
|
||||||
|
@ -117,9 +116,12 @@ impl Db {
|
||||||
tokens: HashMap::new(),
|
tokens: HashMap::new(),
|
||||||
classification: HashMap::new(),
|
classification: HashMap::new(),
|
||||||
score: HashMap::new(),
|
score: HashMap::new(),
|
||||||
users_of_token: HashMap::new(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn all_users(&self) -> Vec<UserId> {
|
||||||
|
self.users.iter().map(|(id, _)| *id).collect()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
||||||
|
@ -320,8 +322,7 @@ async fn load_db() -> anyhow::Result<(Db, Classifier)> {
|
||||||
db.users = get_users_data(&forge).await?;
|
db.users = get_users_data(&forge).await?;
|
||||||
|
|
||||||
eprintln!("Scoring users...");
|
eprintln!("Scoring users...");
|
||||||
let ids: Vec<_> = db.users.iter().map(|(id, _)| *id).collect();
|
for &user_id in &db.all_users() {
|
||||||
for &user_id in &ids {
|
|
||||||
update_user(&mut db, &mut classifier, user_id);
|
update_user(&mut db, &mut classifier, user_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -344,13 +345,7 @@ fn update_user(db: &mut Db, classifier: &mut Classifier, id: UserId) {
|
||||||
db.tokens.get(&id).unwrap()
|
db.tokens.get(&id).unwrap()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let score = classifier.score(&tokens);
|
db.score.insert(id, classifier.score(&tokens));
|
||||||
|
|
||||||
for tok in tokens {
|
|
||||||
db.users_of_token.entry(tok.to_string()).or_default().push(id)
|
|
||||||
};
|
|
||||||
|
|
||||||
db.score.insert(id, score);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> {
|
fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> {
|
||||||
|
@ -363,7 +358,7 @@ fn unclassified_users<'a>(db: &'a Db) -> Vec<(&'a UserId, &'a UserData)> {
|
||||||
fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) {
|
fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) {
|
||||||
let mut all_tokens = HashSet::new();
|
let mut all_tokens = HashSet::new();
|
||||||
|
|
||||||
eprintln!("training classifier");
|
eprintln!("updating classifier");
|
||||||
|
|
||||||
for (id, is_spam) in ids {
|
for (id, is_spam) in ids {
|
||||||
let tokens = db.tokens.get(id).unwrap();
|
let tokens = db.tokens.get(id).unwrap();
|
||||||
|
@ -379,25 +374,10 @@ fn set_spam(db: &mut Db, classifier: &mut Classifier, ids: &[(UserId, bool)]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("computing users to update");
|
eprintln!("recomputing user scores");
|
||||||
|
|
||||||
let mut users_to_update = HashSet::new();
|
for &user_id in &db.all_users() {
|
||||||
|
update_user(db, classifier, user_id)
|
||||||
for token in all_tokens {
|
|
||||||
match db.users_of_token.get(&token) {
|
|
||||||
None => (),
|
|
||||||
Some(users) => {
|
|
||||||
for user in users {
|
|
||||||
users_to_update.insert(*user);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
eprintln!("recomputing scores for {}/{} users", users_to_update.len(), db.users.len());
|
|
||||||
|
|
||||||
for user in users_to_update {
|
|
||||||
update_user(db, classifier, user)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue