use crate::classifier::Classifier; use crate::db::{Db, IsSpam}; use crate::scrape; use forgejo_api::Forgejo; use std::collections::HashMap; use std::path::Path; use std::sync::{Arc, Mutex}; use crate::FORGEJO_POLL_DELAY; use crate::GRACE_PERIOD; use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD}; // Worker to refresh user data by periodically polling Forgejo async fn try_refresh_user_data( forge: &Forgejo, db: Arc>, classifier: Arc>, ) -> anyhow::Result<()> { { let db = &db.lock().unwrap(); let d = db.last_scrape.elapsed()?; if d < FORGEJO_POLL_DELAY { return Ok(()); } } eprintln!("Fetching user data"); let users = scrape::get_user_data(forge).await?; let db: &mut Db = &mut *db.lock().unwrap(); let classifier = &classifier.lock().unwrap(); // NB: Some user accounts may have been deleted since last fetch (hopefully // they were spammers). // Such users will appear in the current [db] but not in the new [users]. // We don't want to keep them in the database, so we rebuild a fresh [db] // containing only data for users who still exist. let mut newdb = Db::from_users(users, HashMap::new(), classifier); // Import spam classification from the previous Db for (&user_id, user_data) in &newdb.users { let &score = newdb.score.get(&user_id).unwrap(); if let Some(&user_was_spam) = db.is_spam.get(&user_id) { if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD) || (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD) { eprintln!( "Score for user {} changed past threshold; discarding our current classification", user_data.login ); } else { newdb.is_spam.insert(user_id, user_was_spam); } } } // switch to [newdb] let _ = std::mem::replace(db, newdb); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME Ok(()) } pub async fn refresh_user_data( forge: Arc, db: Arc>, classifier: Arc>, ) { loop { tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await; if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await { eprintln!("Error refreshing user data: {:?}", e); } } } // Worker to delete spam accounts after their grace period expired async fn try_purge_account(forge: &Forgejo, login: &str) -> anyhow::Result<()> { forge .admin_delete_user( login, forgejo_api::structs::AdminDeleteUserQuery { purge: Some(true) }, ) .await?; Ok(()) } pub async fn purge_spammer_accounts(forge: Arc, db: Arc>) { loop { tokio::time::sleep(std::time::Duration::from_secs(3600)).await; let mut classified_users = Vec::new(); { let db = &db.lock().unwrap(); for (id, user, is_spam) in db.classified_users() { classified_users.push((id, user.login.clone(), is_spam)); } } for (user_id, login, is_spam) in classified_users { if let IsSpam::Spam { classified_at } = is_spam { match classified_at.elapsed() { Ok(duration) if duration > GRACE_PERIOD => { if let Err(e) = try_purge_account(&forge, &login).await { eprintln!("Error while deleting spammer account {login}: {:?}", e) } else { eprintln!("Deleted spammer account {login}"); let db = &mut db.lock().unwrap(); db.users.remove(&user_id); db.is_spam.remove(&user_id); db.score.remove(&user_id); db.tokens.remove(&user_id); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME } } _ => (), } } } } }