From dcbe528c64bab4e8e42e58313b49de1eb4c74285 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arma=C3=ABl=20Gu=C3=A9neau?= Date: Fri, 20 Dec 2024 20:48:13 +0100 Subject: [PATCH] Move "lock account + send email" to a worker with retries --- README.md | 7 ++- src/db.rs | 8 ++- src/main.rs | 65 ++++++++-------------- src/workers.rs | 143 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 176 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 81b8624..85055b3 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,11 @@ ## Todos -- take concrete actions for spam accounts: lock the account, send a warning - email, then delete+purge account after some time. +- discuss the current design choices for when locking the account/sending a + notification email fails. + (Current behavior is to periodically retry, avoid deleting if the account + could not be locked, but delete the account after the grace period even if + the email could not be sent…) - add backend to store data on garage instead of local files - replate the `api_token` file with a better mechanism: oauth maybe? - improve error handling diff --git a/src/db.rs b/src/db.rs index bc26e02..a510eff 100644 --- a/src/db.rs +++ b/src/db.rs @@ -11,7 +11,11 @@ use std::time::SystemTime; #[derive(Serialize, Deserialize, Debug, Clone, Copy)] pub enum IsSpam { Legit, - Spam { classified_at: SystemTime }, + Spam { + classified_at: SystemTime, + locked: bool, + notified: bool, + }, } impl IsSpam { @@ -26,6 +30,8 @@ impl IsSpam { if b { IsSpam::Spam { classified_at: SystemTime::now(), + locked: false, + notified: false, } } else { IsSpam::Legit diff --git a/src/main.rs b/src/main.rs index c14356c..d211ddf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -162,53 +162,24 @@ fn set_spam( spammers } -async fn lock_user_account(forge: &Forgejo, username: &str) -> anyhow::Result<()> { - let opts = forgejo_api::structs::EditUserOption { - // boilerplate: we do not change these settings - active: None, - admin: None, - allow_create_organization: None, - allow_git_hook: None, - allow_import_local: None, - description: None, - email: None, - full_name: None, - location: None, - login_name: None, - max_repo_creation: None, - must_change_password: None, - password: None, - pronouns: None, - restricted: None, - source_id: None, - website: None, - // lock the account and set its visibility to private: the user's - // description and info will not be publicly visible - prohibit_login: Some(true), - visibility: Some("private".to_string()), - }; - forge.admin_edit_user(username, opts).await?; - Ok(()) -} - async fn apply_classification( forge: &Forgejo, mailer: &SmtpConfig, - db: &mut Db, + db: Arc>, classifier: &mut Classifier, ids: &[(UserId, bool)], overwrite: bool, -) -> anyhow::Result<()> { - let spammers = set_spam(db, classifier, ids, overwrite); +) { + let spammers = set_spam(&mut db.lock().unwrap(), classifier, ids, overwrite); for user in spammers { - let user = &db.users.get(&user).unwrap(); - lock_user_account(forge, &user.login).await?; - email::send_locked_account_notice(&mailer, &user.login, &user.email).await?; - // TODO: better and more robust error handling: retries, or a worker to send the emails ..? + let login = db.lock().unwrap().users.get(&user).unwrap().login.clone(); + // It is ok for any of these calls to fail now: a worker will periodically retry + // TODO: signal the worker to wake up instead of performing a manual call here + workers::try_lock_and_notify_user(forge, mailer, db.clone(), user) + .await + .unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}")); } - - Ok(()) } lazy_static! { @@ -325,8 +296,8 @@ async fn post_classified( ) -> impl Responder { eprintln!("POST {}", req.uri()); - let db = &mut data.db.lock().unwrap(); let classifier = &mut data.classifier.lock().unwrap(); + let db = data.db.clone(); let updates: Vec<(UserId, bool)> = form .iter() @@ -336,15 +307,17 @@ async fn post_classified( apply_classification( &data.forge, &data.mailer, - db, + data.db.clone(), classifier, &updates, overwrite, ) - .await - .unwrap(); // FIXME + .await; - db.store_to_path(Path::new("db.json")).unwrap(); // FIXME + db.lock() + .unwrap() + .store_to_path(Path::new("db.json")) + .unwrap(); // FIXME classifier .save(&mut File::create(Path::new("model.json")).unwrap(), false) .unwrap(); // FIXME @@ -434,6 +407,12 @@ async fn main() -> anyhow::Result<()> { let db = db.clone(); tokio::spawn(async move { workers::purge_spammer_accounts(forge, db) }) }; + let _ = { + let forge = forge.clone(); + let mailer = mailer.clone(); + let db = db.clone(); + tokio::spawn(async move { workers::lock_and_notify_users(forge, mailer, db) }) + }; println!("Listening on http://127.0.0.1:8080"); diff --git a/src/workers.rs b/src/workers.rs index fa065c3..c3ca731 100644 --- a/src/workers.rs +++ b/src/workers.rs @@ -1,6 +1,9 @@ use crate::classifier::Classifier; +use crate::data::UserId; use crate::db::{Db, IsSpam}; +use crate::email::SmtpConfig; use crate::scrape; +use anyhow::anyhow; use forgejo_api::Forgejo; use std::collections::HashMap; use std::path::Path; @@ -100,9 +103,32 @@ pub async fn purge_spammer_accounts(forge: Arc, db: Arc>) { } for (user_id, login, is_spam) in classified_users { - if let IsSpam::Spam { classified_at } = is_spam { + if let IsSpam::Spam { + classified_at, + locked, + notified, + } = is_spam + { match classified_at.elapsed() { Ok(duration) if duration > GRACE_PERIOD => { + if !locked { + // NOTE: this is a minimum sanity check, but can this + // realistically happen? could we do better than printing a + // message in this case? + eprintln!("WARN: grace period for {login} expired but account is not locked! Skip user."); + continue; + } + if !notified { + // NOTE: we delete accounts even if we failed to notify them by email. + // Could we do better? (But if we do not delete in this case, + // spammers could perhaps prevent being deleted by providing + // non-working emails…) + eprintln!( + "WARN: grace period for {login} expired but we failed to send a \ + notification email. Deleting anyway..." + ); + } + if let Err(e) = try_purge_account(&forge, &login).await { eprintln!("Error while deleting spammer account {login}: {:?}", e) } else { @@ -123,3 +149,118 @@ pub async fn purge_spammer_accounts(forge: Arc, db: Arc>) { tokio::time::sleep(std::time::Duration::from_secs(3600)).await; } } + +// Lock a user account and send a notification email. +// Since this can fail, we put it into a worker that periodically retries on any +// user marked as spam and not already locked/notified. + +pub async fn lock_user_account(forge: &Forgejo, username: &str) -> anyhow::Result<()> { + let opts = forgejo_api::structs::EditUserOption { + // boilerplate: we do not change these settings + active: None, + admin: None, + allow_create_organization: None, + allow_git_hook: None, + allow_import_local: None, + description: None, + email: None, + full_name: None, + location: None, + login_name: None, + max_repo_creation: None, + must_change_password: None, + password: None, + pronouns: None, + restricted: None, + source_id: None, + website: None, + // lock the account and set its visibility to private: the user's + // description and info will not be publicly visible + prohibit_login: Some(true), + visibility: Some("private".to_string()), + }; + forge.admin_edit_user(username, opts).await?; + Ok(()) +} + +pub async fn try_lock_and_notify_user( + forge: &Forgejo, + mailer: &SmtpConfig, + db: Arc>, + user_id: UserId, +) -> anyhow::Result<()> { + let (login, email, is_spam) = { + let db = &db.lock().unwrap(); + let user = db.users.get(&user_id).unwrap(); + let is_spam = match db.is_spam.get(&user_id) { + Some(IsSpam::Spam { + classified_at, + locked, + notified, + }) => Some((*classified_at, *locked, *notified)), + _ => None, + }; + (user.login.clone(), user.email.clone(), is_spam) + }; + + if let Some((classified_at, locked, notified)) = is_spam { + if !locked { + lock_user_account(forge, &login).await?; + let db = &mut db.lock().unwrap(); + db.is_spam.insert( + user_id, + IsSpam::Spam { + classified_at, + locked: true, + notified, + }, + ); + db.store_to_path(Path::new("db.json")).unwrap(); // FIXME + } + + if !notified { + crate::email::send_locked_account_notice(mailer, &login, &email).await?; + let db = &mut db.lock().unwrap(); + db.is_spam.insert( + user_id, + IsSpam::Spam { + classified_at, + locked: true, + notified: true, + }, + ); + db.store_to_path(Path::new("db.json")).unwrap(); // FIXME + } + + Ok(()) + } else { + Err(anyhow!( + "Tried to lock user {} who was not classified as spam", + login + )) + } +} + +pub async fn lock_and_notify_users( + forge: Arc, + mailer: Arc, + db: Arc>, +) { + let mut spammers = Vec::new(); + { + let db = &db.lock().unwrap(); + for (id, user, is_spam) in db.classified_users() { + if is_spam.as_bool() { + spammers.push((id, user.login.clone())) + } + } + } + + for (user_id, login) in spammers { + try_lock_and_notify_user(&forge, &mailer, db.clone(), user_id) + .await + .unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}")); + } + + tokio::time::sleep(std::time::Duration::from_secs(3600)).await; +}