forgery/src/workers.rs

126 lines
4.1 KiB
Rust
Raw Normal View History

use crate::classifier::Classifier;
use crate::db::{Db, IsSpam};
use crate::scrape;
2024-12-19 11:49:58 +00:00
use forgejo_api::Forgejo;
use std::collections::HashMap;
use std::path::Path;
use std::sync::{Arc, Mutex};
use crate::FORGEJO_POLL_DELAY;
use crate::GRACE_PERIOD;
use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD};
// Worker to refresh user data by periodically polling Forgejo
2024-12-19 11:49:58 +00:00
async fn try_refresh_user_data(
forge: &Forgejo,
db: Arc<Mutex<Db>>,
classifier: Arc<Mutex<Classifier>>,
) -> anyhow::Result<()> {
{
let db = &db.lock().unwrap();
let d = db.last_scrape.elapsed()?;
if d < FORGEJO_POLL_DELAY {
return Ok(());
}
}
eprintln!("Fetching user data");
let users = scrape::get_user_data(forge).await?;
let db: &mut Db = &mut *db.lock().unwrap();
let classifier = &classifier.lock().unwrap();
// NB: Some user accounts may have been deleted since last fetch (hopefully
// they were spammers).
// Such users will appear in the current [db] but not in the new [users].
// We don't want to keep them in the database, so we rebuild a fresh [db]
// containing only data for users who still exist.
let mut newdb = Db::from_users(users, HashMap::new(), classifier);
// Import spam classification from the previous Db
for (&user_id, user_data) in &newdb.users {
let &score = newdb.score.get(&user_id).unwrap();
if let Some(&user_was_spam) = db.is_spam.get(&user_id) {
2024-12-19 11:49:58 +00:00
if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD)
|| (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD)
{
eprintln!(
"Score for user {} changed past threshold; discarding our current classification",
user_data.login
);
} else {
newdb.is_spam.insert(user_id, user_was_spam);
}
}
}
// switch to [newdb]
let _ = std::mem::replace(db, newdb);
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
Ok(())
}
2024-12-19 11:49:58 +00:00
pub async fn refresh_user_data(
forge: Arc<Forgejo>,
db: Arc<Mutex<Db>>,
classifier: Arc<Mutex<Classifier>>,
) {
loop {
if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await {
eprintln!("Error refreshing user data: {:?}", e);
}
tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await;
}
}
// Worker to delete spam accounts after their grace period expired
async fn try_purge_account(forge: &Forgejo, login: &str) -> anyhow::Result<()> {
forge
.admin_delete_user(
login,
forgejo_api::structs::AdminDeleteUserQuery { purge: Some(true) },
)
.await?;
Ok(())
}
pub async fn purge_spammer_accounts(forge: Arc<Forgejo>, db: Arc<Mutex<Db>>) {
loop {
let mut classified_users = Vec::new();
{
let db = &db.lock().unwrap();
for (id, user, is_spam) in db.classified_users() {
classified_users.push((id, user.login.clone(), is_spam));
}
}
for (user_id, login, is_spam) in classified_users {
if let IsSpam::Spam { classified_at } = is_spam {
match classified_at.elapsed() {
Ok(duration) if duration > GRACE_PERIOD => {
if let Err(e) = try_purge_account(&forge, &login).await {
eprintln!("Error while deleting spammer account {login}: {:?}", e)
} else {
eprintln!("Deleted spammer account {login}");
let db = &mut db.lock().unwrap();
db.users.remove(&user_id);
db.is_spam.remove(&user_id);
db.score.remove(&user_id);
db.tokens.remove(&user_id);
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
}
}
_ => (),
}
}
}
tokio::time::sleep(std::time::Duration::from_secs(3600)).await;
}
}