use crate::classifier::Classifier; use crate::data::UserId; use crate::db::{Db, IsSpam}; use crate::email; use crate::scrape; use anyhow::anyhow; use forgejo_api::Forgejo; use std::collections::HashMap; use std::path::Path; use std::sync::{Arc, Mutex}; use crate::FORGEJO_POLL_DELAY; use crate::GRACE_PERIOD; use crate::{ActuallyBan, Config}; use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD}; // Worker to refresh user data by periodically polling Forgejo async fn try_refresh_user_data( forge: &Forgejo, db: Arc>, classifier: Arc>, ) -> anyhow::Result<()> { { let db = &db.lock().unwrap(); let d = db.last_scrape.elapsed()?; if d < FORGEJO_POLL_DELAY { return Ok(()); } } eprintln!("Fetching user data"); let users = scrape::get_user_data(forge).await?; let db: &mut Db = &mut db.lock().unwrap(); let classifier = &classifier.lock().unwrap(); // NB: Some user accounts may have been deleted since last fetch (hopefully // they were spammers). // Such users will appear in the current [db] but not in the new [users]. // We don't want to keep them in the database, so we rebuild a fresh [db] // containing only data for users who still exist. let mut newdb = Db::from_users(users, HashMap::new(), classifier); // Import spam classification from the previous Db for (&user_id, user_data) in &newdb.users { let &score = newdb.score.get(&user_id).unwrap(); if let Some(&user_was_spam) = db.is_spam.get(&user_id) { if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD) || (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD) { eprintln!( "Score for user {} changed past threshold; discarding our current classification", user_data.login ); } else { newdb.is_spam.insert(user_id, user_was_spam); } } } // switch to [newdb] let _ = std::mem::replace(db, newdb); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME Ok(()) } pub async fn refresh_user_data( forge: Arc, db: Arc>, classifier: Arc>, ) { loop { if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await { eprintln!("Error refreshing user data: {:?}", e); } tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await; } } // Worker to delete spam accounts after their grace period expired async fn try_purge_account(config: &Config, forge: &Forgejo, login: &str) -> anyhow::Result<()> { if let ActuallyBan::No = config.actually_ban { eprintln!("[Simulating: delete account of user {login}]"); return Ok(()); } eprintln!("Deleting account of user {login}"); forge .admin_delete_user( login, forgejo_api::structs::AdminDeleteUserQuery { purge: Some(true) }, ) .await?; eprintln!("Success"); Ok(()) } pub async fn purge_spammer_accounts(config: Arc, forge: Arc, db: Arc>) { loop { let mut classified_users = Vec::new(); { let db = &db.lock().unwrap(); for (id, user, is_spam) in db.classified_users() { classified_users.push((id, user.login.clone(), is_spam)); } } for (user_id, login, is_spam) in classified_users { if let IsSpam::Spam { classified_at, locked, notified, } = is_spam { match classified_at.elapsed() { Ok(duration) if duration > GRACE_PERIOD => { if !locked { // NOTE: this is a minimum sanity check, but can this // realistically happen? could we do better than printing a // message in this case? eprintln!("WARN: grace period for {login} expired but account is not locked! Skip user."); continue; } if !notified { // NOTE: we delete accounts even if we failed to notify them by email. // Could we do better? (But if we do not delete in this case, // spammers could perhaps prevent being deleted by providing // non-working emails…) eprintln!( "WARN: grace period for {login} expired but we failed to send a \ notification email. Deleting anyway..." ); } if let Err(e) = try_purge_account(&config, &forge, &login).await { eprintln!("Error while deleting spammer account {login}: {:?}", e) } else { eprintln!("Deleted spammer account {login}"); let db = &mut db.lock().unwrap(); db.users.remove(&user_id); db.is_spam.remove(&user_id); db.score.remove(&user_id); db.tokens.remove(&user_id); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME } } _ => (), } } } tokio::time::sleep(std::time::Duration::from_secs(3600)).await; } } // Lock a user account and send a notification email. // Since this can fail, we put it into a worker that periodically retries on any // user marked as spam and not already locked/notified. async fn lock_user_account(forge: &Forgejo, username: &str) -> anyhow::Result<()> { let opts = forgejo_api::structs::EditUserOption { // boilerplate: we do not change these settings active: None, admin: None, allow_create_organization: None, allow_git_hook: None, allow_import_local: None, description: None, email: None, full_name: None, location: None, login_name: None, max_repo_creation: None, must_change_password: None, password: None, pronouns: None, restricted: None, source_id: None, website: None, // lock the account and set its visibility to private: the user's // description and info will not be publicly visible prohibit_login: Some(true), visibility: Some("private".to_string()), }; forge.admin_edit_user(username, opts).await?; Ok(()) } pub async fn try_lock_and_notify_user( config: &Config, forge: &Forgejo, db: Arc>, user_id: UserId, ) -> anyhow::Result<()> { let (login, email, is_spam) = { let db = &db.lock().unwrap(); let user = db.users.get(&user_id).unwrap(); let is_spam = match db.is_spam.get(&user_id) { Some(IsSpam::Spam { classified_at, locked, notified, }) => Some((*classified_at, *locked, *notified)), _ => None, }; (user.login.clone(), user.email.clone(), is_spam) }; if let Some((classified_at, locked, notified)) = is_spam { if !locked { match &config.actually_ban { ActuallyBan::Yes { .. } => { eprintln!("Locking account of user {login}"); lock_user_account(forge, &login).await?; eprintln!("Success"); } ActuallyBan::No => eprintln!("[Simulating: lock account of user {login}]"), } let db = &mut db.lock().unwrap(); db.is_spam.insert( user_id, IsSpam::Spam { classified_at, locked: true, notified, }, ); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME } if !notified { match &config.actually_ban { ActuallyBan::Yes { smtp } => { eprintln!("Sending notification email to user {login}"); email::send_locked_account_notice(config, smtp, &login, &email).await?; eprintln!("Success"); } ActuallyBan::No => { eprintln!("[Simulating: send notification email to user {login}]") } } let db = &mut db.lock().unwrap(); db.is_spam.insert( user_id, IsSpam::Spam { classified_at, locked: true, notified: true, }, ); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME } Ok(()) } else { Err(anyhow!( "Tried to lock user {} who was not classified as spam", login )) } } pub async fn lock_and_notify_users(config: Arc, forge: Arc, db: Arc>) { let mut spammers = Vec::new(); { let db = &db.lock().unwrap(); for (id, user, is_spam) in db.classified_users() { if is_spam.as_bool() { spammers.push((id, user.login.clone())) } } } for (user_id, login) in spammers { try_lock_and_notify_user(&config, &forge, db.clone(), user_id) .await .unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}")); } tokio::time::sleep(std::time::Duration::from_secs(3600)).await; }