2024-12-18 12:22:56 +00:00
|
|
|
use crate::classifier::Classifier;
|
2024-12-19 11:49:58 +00:00
|
|
|
use crate::db::Db;
|
2024-12-18 12:22:56 +00:00
|
|
|
use crate::scrape;
|
2024-12-19 11:49:58 +00:00
|
|
|
use forgejo_api::Forgejo;
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::path::Path;
|
|
|
|
use std::sync::{Arc, Mutex};
|
2024-12-18 12:22:56 +00:00
|
|
|
|
|
|
|
use crate::FORGEJO_POLL_DELAY;
|
|
|
|
use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD};
|
|
|
|
|
2024-12-19 11:49:58 +00:00
|
|
|
async fn try_refresh_user_data(
|
|
|
|
forge: &Forgejo,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
classifier: Arc<Mutex<Classifier>>,
|
|
|
|
) -> anyhow::Result<()> {
|
2024-12-18 12:22:56 +00:00
|
|
|
{
|
|
|
|
let db = &db.lock().unwrap();
|
2024-12-19 10:49:08 +00:00
|
|
|
let d = db.last_scrape.elapsed()?;
|
2024-12-18 12:22:56 +00:00
|
|
|
if d < FORGEJO_POLL_DELAY {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
eprintln!("Fetching user data");
|
|
|
|
let users = scrape::get_user_data(forge).await?;
|
|
|
|
|
|
|
|
let db: &mut Db = &mut *db.lock().unwrap();
|
|
|
|
let classifier = &classifier.lock().unwrap();
|
|
|
|
|
|
|
|
// NB: Some user accounts may have been deleted since last fetch (hopefully
|
|
|
|
// they were spammers).
|
|
|
|
// Such users will appear in the current [db] but not in the new [users].
|
|
|
|
// We don't want to keep them in the database, so we rebuild a fresh [db]
|
|
|
|
// containing only data for users who still exist.
|
|
|
|
|
|
|
|
let mut newdb = Db::from_users(users, HashMap::new(), classifier);
|
|
|
|
|
|
|
|
// Import spam classification from the previous Db
|
|
|
|
for (&user_id, user_data) in &newdb.users {
|
|
|
|
let &score = newdb.score.get(&user_id).unwrap();
|
|
|
|
if let Some(&user_was_spam) = db.is_spam.get(&user_id) {
|
2024-12-19 11:49:58 +00:00
|
|
|
if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD)
|
|
|
|
|| (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD)
|
2024-12-18 12:22:56 +00:00
|
|
|
{
|
|
|
|
eprintln!(
|
|
|
|
"Score for user {} changed past threshold; discarding our current classification",
|
|
|
|
user_data.login
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
newdb.is_spam.insert(user_id, user_was_spam);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// switch to [newdb]
|
|
|
|
let _ = std::mem::replace(db, newdb);
|
|
|
|
|
|
|
|
db.store_to_path(Path::new("db.json")).unwrap(); // FIXME
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2024-12-19 11:49:58 +00:00
|
|
|
pub async fn refresh_user_data(
|
|
|
|
forge: Arc<Forgejo>,
|
|
|
|
db: Arc<Mutex<Db>>,
|
|
|
|
classifier: Arc<Mutex<Classifier>>,
|
|
|
|
) {
|
2024-12-18 12:22:56 +00:00
|
|
|
loop {
|
|
|
|
tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await;
|
|
|
|
if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await {
|
|
|
|
eprintln!("Error refreshing user data: {:?}", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|