2024-12-18 12:22:56 +00:00
|
|
|
use crate::classifier::Classifier;
|
2024-12-20 19:48:13 +00:00
|
|
|
use crate::data::UserId;
|
2024-12-19 14:21:33 +00:00
|
|
|
use crate::db::{Db, IsSpam};
|
2024-12-20 20:14:43 +00:00
|
|
|
use crate::email;
|
2024-12-18 12:22:56 +00:00
|
|
|
use crate::scrape;
|
2024-12-20 19:48:13 +00:00
|
|
|
use anyhow::anyhow;
|
2024-12-19 11:49:58 +00:00
|
|
|
use forgejo_api::Forgejo;
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::path::Path;
|
|
|
|
use std::sync::{Arc, Mutex};
|
2024-12-18 12:22:56 +00:00
|
|
|
|
|
|
|
use crate::FORGEJO_POLL_DELAY;
|
2024-12-19 14:21:33 +00:00
|
|
|
use crate::GRACE_PERIOD;
|
2024-12-22 14:03:38 +00:00
|
|
|
use crate::{ActuallyBan, Config};
|
2024-12-18 12:22:56 +00:00
|
|
|
use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD};
|
|
|
|
|
2024-12-19 14:21:33 +00:00
|
|
|
// Worker to refresh user data by periodically polling Forgejo
|
|
|
|
|
2024-12-19 11:49:58 +00:00
|
|
|
async fn try_refresh_user_data(
|
|
|
|
forge: &Forgejo,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
classifier: Arc<Mutex<Classifier>>,
|
|
|
|
) -> anyhow::Result<()> {
|
2024-12-18 12:22:56 +00:00
|
|
|
{
|
|
|
|
let db = &db.lock().unwrap();
|
2024-12-19 10:49:08 +00:00
|
|
|
let d = db.last_scrape.elapsed()?;
|
2024-12-18 12:22:56 +00:00
|
|
|
if d < FORGEJO_POLL_DELAY {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
eprintln!("Fetching user data");
|
|
|
|
let users = scrape::get_user_data(forge).await?;
|
|
|
|
|
|
|
|
let db: &mut Db = &mut *db.lock().unwrap();
|
|
|
|
let classifier = &classifier.lock().unwrap();
|
|
|
|
|
|
|
|
// NB: Some user accounts may have been deleted since last fetch (hopefully
|
|
|
|
// they were spammers).
|
|
|
|
// Such users will appear in the current [db] but not in the new [users].
|
|
|
|
// We don't want to keep them in the database, so we rebuild a fresh [db]
|
|
|
|
// containing only data for users who still exist.
|
|
|
|
|
|
|
|
let mut newdb = Db::from_users(users, HashMap::new(), classifier);
|
|
|
|
|
|
|
|
// Import spam classification from the previous Db
|
|
|
|
for (&user_id, user_data) in &newdb.users {
|
|
|
|
let &score = newdb.score.get(&user_id).unwrap();
|
|
|
|
if let Some(&user_was_spam) = db.is_spam.get(&user_id) {
|
2024-12-19 11:49:58 +00:00
|
|
|
if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD)
|
|
|
|
|| (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD)
|
2024-12-18 12:22:56 +00:00
|
|
|
{
|
|
|
|
eprintln!(
|
|
|
|
"Score for user {} changed past threshold; discarding our current classification",
|
|
|
|
user_data.login
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
newdb.is_spam.insert(user_id, user_was_spam);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// switch to [newdb]
|
|
|
|
let _ = std::mem::replace(db, newdb);
|
|
|
|
|
|
|
|
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2024-12-19 11:49:58 +00:00
|
|
|
pub async fn refresh_user_data(
|
|
|
|
forge: Arc<Forgejo>,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
classifier: Arc<Mutex<Classifier>>,
|
|
|
|
) {
|
2024-12-18 12:22:56 +00:00
|
|
|
loop {
|
|
|
|
if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await {
|
|
|
|
eprintln!("Error refreshing user data: {:?}", e);
|
|
|
|
}
|
2024-12-20 09:19:54 +00:00
|
|
|
tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await;
|
2024-12-18 12:22:56 +00:00
|
|
|
}
|
|
|
|
}
|
2024-12-19 14:21:33 +00:00
|
|
|
|
|
|
|
// Worker to delete spam accounts after their grace period expired
|
|
|
|
|
2024-12-20 20:14:43 +00:00
|
|
|
async fn try_purge_account(
|
2024-12-22 14:03:38 +00:00
|
|
|
config: &Config,
|
2024-12-20 20:14:43 +00:00
|
|
|
forge: &Forgejo,
|
|
|
|
login: &str,
|
|
|
|
) -> anyhow::Result<()> {
|
2024-12-22 14:03:38 +00:00
|
|
|
if let ActuallyBan::No = config.actually_ban {
|
2024-12-20 20:14:43 +00:00
|
|
|
eprintln!("[Simulating: delete account of user {login}]");
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
eprintln!("Deleting account of user {login}");
|
2024-12-19 14:21:33 +00:00
|
|
|
forge
|
|
|
|
.admin_delete_user(
|
|
|
|
login,
|
|
|
|
forgejo_api::structs::AdminDeleteUserQuery { purge: Some(true) },
|
|
|
|
)
|
|
|
|
.await?;
|
2024-12-20 20:14:43 +00:00
|
|
|
eprintln!("Success");
|
2024-12-19 14:21:33 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2024-12-20 20:14:43 +00:00
|
|
|
pub async fn purge_spammer_accounts(
|
2024-12-22 14:03:38 +00:00
|
|
|
config: Arc<Config>,
|
2024-12-20 20:14:43 +00:00
|
|
|
forge: Arc<Forgejo>,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
) {
|
2024-12-19 14:21:33 +00:00
|
|
|
loop {
|
|
|
|
let mut classified_users = Vec::new();
|
|
|
|
{
|
|
|
|
let db = &db.lock().unwrap();
|
|
|
|
for (id, user, is_spam) in db.classified_users() {
|
|
|
|
classified_users.push((id, user.login.clone(), is_spam));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (user_id, login, is_spam) in classified_users {
|
2024-12-20 19:48:13 +00:00
|
|
|
if let IsSpam::Spam {
|
|
|
|
classified_at,
|
|
|
|
locked,
|
|
|
|
notified,
|
|
|
|
} = is_spam
|
|
|
|
{
|
2024-12-19 14:21:33 +00:00
|
|
|
match classified_at.elapsed() {
|
|
|
|
Ok(duration) if duration > GRACE_PERIOD => {
|
2024-12-20 19:48:13 +00:00
|
|
|
if !locked {
|
|
|
|
// NOTE: this is a minimum sanity check, but can this
|
|
|
|
// realistically happen? could we do better than printing a
|
|
|
|
// message in this case?
|
|
|
|
eprintln!("WARN: grace period for {login} expired but account is not locked! Skip user.");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if !notified {
|
|
|
|
// NOTE: we delete accounts even if we failed to notify them by email.
|
|
|
|
// Could we do better? (But if we do not delete in this case,
|
|
|
|
// spammers could perhaps prevent being deleted by providing
|
|
|
|
// non-working emails…)
|
|
|
|
eprintln!(
|
|
|
|
"WARN: grace period for {login} expired but we failed to send a \
|
|
|
|
notification email. Deleting anyway..."
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2024-12-22 14:03:38 +00:00
|
|
|
if let Err(e) = try_purge_account(&config, &forge, &login).await {
|
2024-12-19 14:21:33 +00:00
|
|
|
eprintln!("Error while deleting spammer account {login}: {:?}", e)
|
|
|
|
} else {
|
|
|
|
eprintln!("Deleted spammer account {login}");
|
|
|
|
let db = &mut db.lock().unwrap();
|
|
|
|
db.users.remove(&user_id);
|
|
|
|
db.is_spam.remove(&user_id);
|
|
|
|
db.score.remove(&user_id);
|
|
|
|
db.tokens.remove(&user_id);
|
|
|
|
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-12-20 09:19:54 +00:00
|
|
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(3600)).await;
|
2024-12-19 14:21:33 +00:00
|
|
|
}
|
|
|
|
}
|
2024-12-20 19:48:13 +00:00
|
|
|
|
|
|
|
// Lock a user account and send a notification email.
|
|
|
|
// Since this can fail, we put it into a worker that periodically retries on any
|
|
|
|
// user marked as spam and not already locked/notified.
|
|
|
|
|
2024-12-21 09:42:43 +00:00
|
|
|
async fn lock_user_account(forge: &Forgejo, username: &str) -> anyhow::Result<()> {
|
2024-12-20 19:48:13 +00:00
|
|
|
let opts = forgejo_api::structs::EditUserOption {
|
|
|
|
// boilerplate: we do not change these settings
|
|
|
|
active: None,
|
|
|
|
admin: None,
|
|
|
|
allow_create_organization: None,
|
|
|
|
allow_git_hook: None,
|
|
|
|
allow_import_local: None,
|
|
|
|
description: None,
|
|
|
|
email: None,
|
|
|
|
full_name: None,
|
|
|
|
location: None,
|
|
|
|
login_name: None,
|
|
|
|
max_repo_creation: None,
|
|
|
|
must_change_password: None,
|
|
|
|
password: None,
|
|
|
|
pronouns: None,
|
|
|
|
restricted: None,
|
|
|
|
source_id: None,
|
|
|
|
website: None,
|
|
|
|
// lock the account and set its visibility to private: the user's
|
|
|
|
// description and info will not be publicly visible
|
|
|
|
prohibit_login: Some(true),
|
|
|
|
visibility: Some("private".to_string()),
|
|
|
|
};
|
|
|
|
forge.admin_edit_user(username, opts).await?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn try_lock_and_notify_user(
|
2024-12-22 14:03:38 +00:00
|
|
|
config: &Config,
|
2024-12-20 19:48:13 +00:00
|
|
|
forge: &Forgejo,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
user_id: UserId,
|
|
|
|
) -> anyhow::Result<()> {
|
|
|
|
let (login, email, is_spam) = {
|
|
|
|
let db = &db.lock().unwrap();
|
|
|
|
let user = db.users.get(&user_id).unwrap();
|
|
|
|
let is_spam = match db.is_spam.get(&user_id) {
|
|
|
|
Some(IsSpam::Spam {
|
|
|
|
classified_at,
|
|
|
|
locked,
|
|
|
|
notified,
|
|
|
|
}) => Some((*classified_at, *locked, *notified)),
|
|
|
|
_ => None,
|
|
|
|
};
|
|
|
|
(user.login.clone(), user.email.clone(), is_spam)
|
|
|
|
};
|
|
|
|
|
|
|
|
if let Some((classified_at, locked, notified)) = is_spam {
|
|
|
|
if !locked {
|
2024-12-22 14:03:38 +00:00
|
|
|
match &config.actually_ban {
|
2024-12-20 20:14:43 +00:00
|
|
|
ActuallyBan::Yes { .. } => {
|
|
|
|
eprintln!("Locking account of user {login}");
|
|
|
|
lock_user_account(forge, &login).await?;
|
|
|
|
eprintln!("Success");
|
|
|
|
}
|
|
|
|
ActuallyBan::No => eprintln!("[Simulating: lock account of user {login}]"),
|
|
|
|
}
|
|
|
|
|
2024-12-20 19:48:13 +00:00
|
|
|
let db = &mut db.lock().unwrap();
|
|
|
|
db.is_spam.insert(
|
|
|
|
user_id,
|
|
|
|
IsSpam::Spam {
|
|
|
|
classified_at,
|
|
|
|
locked: true,
|
|
|
|
notified,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
|
|
|
|
}
|
|
|
|
|
|
|
|
if !notified {
|
2024-12-22 14:03:38 +00:00
|
|
|
match &config.actually_ban {
|
2024-12-20 20:14:43 +00:00
|
|
|
ActuallyBan::Yes { smtp } => {
|
|
|
|
eprintln!("Sending notification email to user {login}");
|
2024-12-22 14:03:38 +00:00
|
|
|
email::send_locked_account_notice(config, &smtp, &login, &email).await?;
|
2024-12-20 20:14:43 +00:00
|
|
|
eprintln!("Success");
|
|
|
|
}
|
|
|
|
ActuallyBan::No => {
|
|
|
|
eprintln!("[Simulating: send notification email to user {login}]")
|
|
|
|
}
|
|
|
|
}
|
2024-12-20 19:48:13 +00:00
|
|
|
let db = &mut db.lock().unwrap();
|
|
|
|
db.is_spam.insert(
|
|
|
|
user_id,
|
|
|
|
IsSpam::Spam {
|
|
|
|
classified_at,
|
|
|
|
locked: true,
|
|
|
|
notified: true,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
Err(anyhow!(
|
|
|
|
"Tried to lock user {} who was not classified as spam",
|
|
|
|
login
|
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn lock_and_notify_users(
|
2024-12-22 14:03:38 +00:00
|
|
|
config: Arc<Config>,
|
2024-12-20 19:48:13 +00:00
|
|
|
forge: Arc<Forgejo>,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
) {
|
|
|
|
let mut spammers = Vec::new();
|
|
|
|
{
|
|
|
|
let db = &db.lock().unwrap();
|
|
|
|
for (id, user, is_spam) in db.classified_users() {
|
|
|
|
if is_spam.as_bool() {
|
|
|
|
spammers.push((id, user.login.clone()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (user_id, login) in spammers {
|
2024-12-22 14:03:38 +00:00
|
|
|
try_lock_and_notify_user(&config, &forge, db.clone(), user_id)
|
2024-12-20 19:48:13 +00:00
|
|
|
.await
|
|
|
|
.unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}"));
|
|
|
|
}
|
|
|
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(3600)).await;
|
|
|
|
}
|