diff --git a/src/main.rs b/src/main.rs index a1f0884..08e7ecf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,8 +12,9 @@ use tera::Tera; mod classifier; mod data; -mod scrape; mod db; +mod scrape; +mod workers; use classifier::Classifier; use data::*; @@ -191,63 +192,6 @@ async fn apply(data: web::Data, req: web::Form>) .finish() } -async fn refresh_user_data(forge: &Forgejo, db: Arc>, classifier: Arc>) -> anyhow::Result<()> { - { - let db = &db.lock().unwrap(); - let d = db.last_scrape().elapsed()?; - if d < FORGEJO_POLL_DELAY { - return Ok(()); - } - } - - eprintln!("Fetching user data"); - let users = scrape::get_user_data(forge).await?; - - let db: &mut Db = &mut *db.lock().unwrap(); - let classifier = &classifier.lock().unwrap(); - - // NB: Some user accounts may have been deleted since last fetch (hopefully - // they were spammers). - // Such users will appear in the current [db] but not in the new [users]. - // We don't want to keep them in the database, so we rebuild a fresh [db] - // containing only data for users who still exist. - - let mut newdb = Db::from_users(users, HashMap::new(), classifier); - - // Import spam classification from the previous Db - for (&user_id, user_data) in &newdb.users { - let &score = newdb.score.get(&user_id).unwrap(); - if let Some(&user_was_spam) = db.is_spam.get(&user_id) { - if (user_was_spam && score < GUESS_SPAM_THRESHOLD) || - (! user_was_spam && score > GUESS_LEGIT_THRESHOLD) - { - eprintln!( - "Score for user {} changed past threshold; discarding our current classification", - user_data.login - ); - } else { - newdb.is_spam.insert(user_id, user_was_spam); - } - } - } - - // switch to [newdb] - let _ = std::mem::replace(db, newdb); - - db.store_to_path(Path::new("db.json")).unwrap(); // FIXME - - Ok(()) -} - -async fn refresh_user_data_loop(forge: Arc, db: Arc>, classifier: Arc>) { - loop { - tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await; - if let Err(e) = refresh_user_data(&forge, db.clone(), classifier.clone()).await { - eprintln!("Error refreshing user data: {:?}", e); - } - } -} - #[actix_web::main] async fn main() -> std::io::Result<()> { eprintln!("Eval templates"); @@ -265,7 +209,7 @@ async fn main() -> std::io::Result<()> { }); let _ = tokio::spawn(async move { - refresh_user_data_loop(forge.clone(), db.clone(), classifier.clone()) + workers::refresh_user_data(forge.clone(), db.clone(), classifier.clone()) }); println!("Listening on http://127.0.0.1:8080"); diff --git a/src/workers.rs b/src/workers.rs new file mode 100644 index 0000000..e4f3f48 --- /dev/null +++ b/src/workers.rs @@ -0,0 +1,67 @@ +use std::sync::{Arc, Mutex}; +use std::collections::HashMap; +use std::path::Path; +use forgejo_api::Forgejo; +use crate::db::Db; +use crate::classifier::Classifier; +use crate::scrape; + +use crate::FORGEJO_POLL_DELAY; +use crate::{GUESS_LEGIT_THRESHOLD, GUESS_SPAM_THRESHOLD}; + +async fn try_refresh_user_data(forge: &Forgejo, db: Arc>, classifier: Arc>) -> anyhow::Result<()> { + { + let db = &db.lock().unwrap(); + let d = db.last_scrape().elapsed()?; + if d < FORGEJO_POLL_DELAY { + return Ok(()); + } + } + + eprintln!("Fetching user data"); + let users = scrape::get_user_data(forge).await?; + + let db: &mut Db = &mut *db.lock().unwrap(); + let classifier = &classifier.lock().unwrap(); + + // NB: Some user accounts may have been deleted since last fetch (hopefully + // they were spammers). + // Such users will appear in the current [db] but not in the new [users]. + // We don't want to keep them in the database, so we rebuild a fresh [db] + // containing only data for users who still exist. + + let mut newdb = Db::from_users(users, HashMap::new(), classifier); + + // Import spam classification from the previous Db + for (&user_id, user_data) in &newdb.users { + let &score = newdb.score.get(&user_id).unwrap(); + if let Some(&user_was_spam) = db.is_spam.get(&user_id) { + if (user_was_spam && score < GUESS_SPAM_THRESHOLD) || + (! user_was_spam && score > GUESS_LEGIT_THRESHOLD) + { + eprintln!( + "Score for user {} changed past threshold; discarding our current classification", + user_data.login + ); + } else { + newdb.is_spam.insert(user_id, user_was_spam); + } + } + } + + // switch to [newdb] + let _ = std::mem::replace(db, newdb); + + db.store_to_path(Path::new("db.json")).unwrap(); // FIXME + + Ok(()) +} + +pub async fn refresh_user_data(forge: Arc, db: Arc>, classifier: Arc>) { + loop { + tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await; + if let Err(e) = try_refresh_user_data(&forge, db.clone(), classifier.clone()).await { + eprintln!("Error refreshing user data: {:?}", e); + } + } +}