use crate::classifier::Classifier; use crate::db::Db; use crate::scrape; use forgejo_api::Forgejo; use std::collections::HashMap; use std::path::Path; use std::sync::{Arc, Mutex}; use crate::FORGEJO_POLL_DELAY; use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD}; async fn try_refresh_user_data( forge: &Forgejo, db: Arc>, classifier: Arc>, ) -> anyhow::Result<()> { { let db = &db.lock().unwrap(); let d = db.last_scrape.elapsed()?; if d < FORGEJO_POLL_DELAY { return Ok(()); } } eprintln!("Fetching user data"); let users = scrape::get_user_data(forge).await?; let db: &mut Db = &mut *db.lock().unwrap(); let classifier = &classifier.lock().unwrap(); // NB: Some user accounts may have been deleted since last fetch (hopefully // they were spammers). // Such users will appear in the current [db] but not in the new [users]. // We don't want to keep them in the database, so we rebuild a fresh [db] // containing only data for users who still exist. let mut newdb = Db::from_users(users, HashMap::new(), classifier); // Import spam classification from the previous Db for (&user_id, user_data) in &newdb.users { let &score = newdb.score.get(&user_id).unwrap(); if let Some(&user_was_spam) = db.is_spam.get(&user_id) { if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD) || (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD) { eprintln!( "Score for user {} changed past threshold; discarding our current classification", user_data.login ); } else { newdb.is_spam.insert(user_id, user_was_spam); } } } // switch to [newdb] let _ = std::mem::replace(db, newdb); db.store_to_path(Path::new("db.json")).unwrap(); // FIXME Ok(()) } pub async fn refresh_user_data( forge: Arc, db: Arc>, classifier: Arc>, ) { loop { tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await; if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await { eprintln!("Error refreshing user data: {:?}", e); } } }