diff --git a/Cargo.lock b/Cargo.lock index a6eb568..6ef4a62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1213,7 +1213,11 @@ dependencies = [ "serde_json", "tera", "tokio", + "tower", + "tower-http", "tower-sessions", + "tracing", + "tracing-subscriber", "unicode-segmentation", "url", ] @@ -2088,6 +2092,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -2177,6 +2191,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "p256" version = "0.11.1" @@ -2852,6 +2872,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -3119,6 +3148,16 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "time" version = "0.3.37" @@ -3264,6 +3303,22 @@ dependencies = [ "tower-service", ] +[[package]] +name = "tower-http" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697" +dependencies = [ + "bitflags 2.6.0", + "bytes", + "http 1.2.0", + "http-body 1.0.1", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -3357,6 +3412,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -3493,6 +3574,12 @@ version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3622,6 +3709,22 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -3631,6 +3734,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 1103bd6..fc6bd6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,10 @@ aws-config = { version = "1.1.7", features = ["behavior-version-latest"] } aws-sdk-s3 = "1.66.0" new_mime_guess = "4" tower-sessions = "0.14" +tower = "0.5.2" +tower-http = { version = "0.6.2", features = ["trace"] } +tracing-subscriber = "0.3" +tracing = "0.1.41" [profile.profiling] inherits = "dev" diff --git a/src/main.rs b/src/main.rs index 7ae00d7..2962d99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,10 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tera::Tera; +use tower::ServiceBuilder; +use tower_http::trace::TraceLayer; use tower_sessions::{MemoryStore, Session, SessionManagerLayer}; +use tracing::{error, info, warn}; use url::Url; mod classifier; @@ -155,7 +158,7 @@ fn set_spam(db: &Db, ids: &[(UserId, bool)], overwrite: bool) -> Vec { match db.with_userdb(|u| u.is_spam(user_id)) { Some(was_spam) if overwrite && was_spam.as_bool() != set_spam => { - eprintln!("User {login}: changing classification from {was_spam} to {set_spam}"); + info!("User {login}: changing classification from {was_spam} to {set_spam}"); db.set_spam(user_id, Some(IsSpam::from_bool(set_spam))); // We train the classifier again, which is somewhat hackish: we // already trained it on the previous classification, possibly @@ -172,9 +175,7 @@ fn set_spam(db: &Db, ids: &[(UserId, bool)], overwrite: bool) -> Vec { // Classification conflict between concurrent queries. // In this case we play it safe and discard the classification // for this user; the user will need to be manually classified again. - eprintln!( - "Classification conflict for user {login}; discarding our current classification" - ); + info!("Classification conflict for user {login}; discarding our current classification"); db.set_spam(user_id, None); } None => { @@ -230,7 +231,7 @@ async fn apply_classification( // It is ok for any of these calls to fail now: a worker will periodically retry workers::try_lock_and_notify_user(config, storage, forge, db, user) .await - .unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}")); + .unwrap_or_else(|err| warn!("Failed to lock or notify user {login}: {err}")); } } @@ -253,7 +254,7 @@ lazy_static! { match tera.add_raw_templates(files) { Ok(()) => tera, Err(e) => { - eprintln!("Parsing error(s): {}", e); + error!("Parsing error(s): {}", e); ::std::process::exit(1); } } @@ -287,10 +288,7 @@ async fn get_index( State(data): State>, Query(q): Query, _user: AuthenticatedUser, - OriginalUri(uri): OriginalUri, ) -> Result, AppError> { - eprintln!("GET {}", uri); - let db = &data.db; let mut users: Vec<(UserId, UserData, f32)> = db.with_userdb(|udb| { @@ -343,21 +341,17 @@ async fn get_index( &(users_count - classified_count), ); context.insert("total_users_count", &users_count); - eprintln!("rendering template..."); let page = TEMPLATES.render("index.html", &context)?; - eprintln!("done"); Ok(Html::from(page)) } async fn post_classified( State(data): State>, - _user: AuthenticatedUser, OriginalUri(uri): OriginalUri, + _user: AuthenticatedUser, Form(form): Form>, overwrite: bool, ) -> Result { - eprintln!("POST {}", uri); - let updates: Vec<(UserId, bool)> = form .iter() .map(|(id, classification)| (UserId(*id), classification == "spam")) @@ -375,8 +369,6 @@ async fn post_classified( storage::store_db(&data.storage, &data.db).await?; - eprintln!("done"); - Ok(( StatusCode::SEE_OTHER, [(header::LOCATION, uri.to_string())], @@ -386,29 +378,26 @@ async fn post_classified( async fn post_classified_index( data: State>, - user: AuthenticatedUser, uri: OriginalUri, + user: AuthenticatedUser, form: Form>, ) -> impl IntoResponse { - post_classified(data, user, uri, form, false).await + post_classified(data, uri, user, form, false).await } async fn post_classified_edit( data: State>, - user: AuthenticatedUser, uri: OriginalUri, + user: AuthenticatedUser, form: Form>, ) -> impl IntoResponse { - post_classified(data, user, uri, form, true).await + post_classified(data, uri, user, form, true).await } async fn get_classified( State(data): State>, _user: AuthenticatedUser, - OriginalUri(uri): OriginalUri, ) -> Result, AppError> { - eprintln!("GET {}", uri); - let db = &data.db; let mut users: Vec<(UserId, UserData, f32, bool)> = db.with_userdb(|udb| { udb.classified_users() @@ -427,20 +416,13 @@ async fn get_classified( let mut context = tera::Context::new(); context.insert("forge_url", &data.config.forge_url.to_string()); context.insert("users", &users); - eprintln!("rendering template..."); let page = TEMPLATES.render("classified.html", &context)?; - eprintln!("done"); Ok(Html::from(page)) } const STATIC_DIR: include_dir::Dir = include_dir::include_dir!("static"); -async fn get_static_( - Path(filename): Path, - OriginalUri(uri): OriginalUri, -) -> impl IntoResponse { - eprintln!("GET {}", uri); - +async fn get_static_(Path(filename): Path) -> impl IntoResponse { match STATIC_DIR.get_file(&filename) { None => (StatusCode::NOT_FOUND, "404 Not found").into_response(), Some(page) => { @@ -450,11 +432,7 @@ async fn get_static_( } } -async fn get_auth( - State(data): State>, - OriginalUri(uri): OriginalUri, -) -> Result, AppError> { - eprintln!("GET {}", uri); +async fn get_auth(State(data): State>) -> Result, AppError> { let mut context = tera::Context::new(); context.insert("forge_url", &data.config.forge_url.to_string()); let page = TEMPLATES.render("auth.html", &context)?; @@ -466,12 +444,7 @@ struct AuthForm { token: String, } -async fn post_auth( - OriginalUri(uri): OriginalUri, - session: Session, - Form(form): Form, -) -> impl IntoResponse { - eprintln!("POST {}", uri); +async fn post_auth(session: Session, Form(form): Form) -> impl IntoResponse { session .insert(session::TOKEN_KEY, &form.token) .await @@ -479,14 +452,15 @@ async fn post_auth( (StatusCode::SEE_OTHER, [(header::LOCATION, "/")]) } -async fn post_logout(OriginalUri(uri): OriginalUri, session: Session) -> impl IntoResponse { - eprintln!("POST {}", uri); +async fn post_logout(session: Session) -> impl IntoResponse { session.flush().await.unwrap(); (StatusCode::SEE_OTHER, [(header::LOCATION, "/auth")]) } #[tokio::main] async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt::init(); + let config = Arc::new(Config::from_env().await?); let storage = Arc::new(Storage::from_env().await?); let forge = Arc::new(Forgejo::new( @@ -494,10 +468,10 @@ async fn main() -> anyhow::Result<()> { config.forge_url.clone(), )?); - eprintln!("Eval templates"); + // Compute the templates now let _ = *TEMPLATES; - eprintln!("Load users and repos"); + info!("Loading users and repos"); let db = load_db(&storage, &forge).await?; let shared_state = Arc::new(AppState { @@ -534,8 +508,6 @@ async fn main() -> anyhow::Result<()> { println!("Listening on http://{}", &config.bind_addr); - let session_store = MemoryStore::default(); - let session_layer = SessionManagerLayer::new(session_store); let app = Router::new() .route("/", get(get_index).post(post_classified_index)) .route( @@ -546,7 +518,11 @@ async fn main() -> anyhow::Result<()> { .route("/auth", get(get_auth).post(post_auth)) .route("/logout", post(post_logout)) .with_state(shared_state) - .layer(session_layer); + .layer( + ServiceBuilder::new() + .layer(TraceLayer::new_for_http()) + .layer(SessionManagerLayer::new(MemoryStore::default())), + ); let webserver = { let listener = tokio::net::TcpListener::bind(&config.bind_addr) diff --git a/src/scrape.rs b/src/scrape.rs index 9338ba8..439cec9 100644 --- a/src/scrape.rs +++ b/src/scrape.rs @@ -1,6 +1,7 @@ use forgejo_api::Forgejo; use std::collections::HashMap; use tokio::time::{sleep, Duration}; +use tracing::{info, warn}; use crate::data::*; @@ -83,19 +84,19 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result Some(s), }; - eprintln!("Fetching users..."); + info!("Fetching users..."); for user in scrape_users(forge).await? { let Some(id) = user.id else { - eprintln!("WARN: user with no id"); + warn!("user with no id"); continue; }; let Some(login) = user.login else { - eprintln!("WARN: missing login for user {id}"); + warn!("missing login for user {id}"); continue; }; let Some(email) = user.email else { - eprintln!("WARN: missing email for user {id}"); + warn!("missing email for user {id}"); continue; }; @@ -114,28 +115,28 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result anyhow::Result a } } - eprintln!("Fetching user data"); + info!("worker: fetching user data..."); let users = scrape::get_user_data(forge).await?; { @@ -51,7 +52,7 @@ async fn try_refresh_user_data(storage: &Storage, forge: &Forgejo, db: &Db) -> a if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD) || (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD) { - eprintln!( + info!( "Score for user {login} changed past threshold; discarding our current classification", ); } else { @@ -73,7 +74,7 @@ async fn try_refresh_user_data(storage: &Storage, forge: &Forgejo, db: &Db) -> a pub async fn refresh_user_data(storage: Arc, forge: Arc, db: Db) { loop { if let Err(e) = try_refresh_user_data(&storage, &forge, &db).await { - eprintln!("Error refreshing user data: {:?}", e); + warn!("Error refreshing user data: {:?}", e); } tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await; } @@ -83,18 +84,18 @@ pub async fn refresh_user_data(storage: Arc, forge: Arc, db: D async fn try_purge_account(config: &Config, forge: &Forgejo, login: &str) -> anyhow::Result<()> { if let ActuallyBan::No = config.actually_ban { - eprintln!("[Simulating: delete account of user {login}]"); + info!("[Simulating: delete account of user {login}]"); return Ok(()); } - eprintln!("Deleting account of user {login}"); + debug!("Deleting account of user {login}..."); forge .admin_delete_user( login, forgejo_api::structs::AdminDeleteUserQuery { purge: Some(true) }, ) .await?; - eprintln!("Success"); + info!("Deleted account of user {login}"); Ok(()) } @@ -125,7 +126,7 @@ pub async fn purge_spammer_accounts( // NOTE: this is a minimum sanity check, but can this // realistically happen? could we do better than printing a // message in this case? - eprintln!("WARN: grace period for {login} expired but account is not locked! Skip user."); + warn!("grace period for {login} expired but account is not locked! Skip user."); continue; } if !notified { @@ -133,16 +134,16 @@ pub async fn purge_spammer_accounts( // Could we do better? (But if we do not delete in this case, // spammers could perhaps prevent being deleted by providing // non-working emails…) - eprintln!( - "WARN: grace period for {login} expired but we failed to send a \ - notification email. Deleting anyway..." + warn!( + "grace period for {login} expired but we failed to send a \ + notification email. Deleting anyway..." ); } if let Err(e) = try_purge_account(&config, &forge, &login).await { - eprintln!("Error while deleting spammer account {login}: {:?}", e) + warn!("Error while deleting spammer account {login}: {:?}", e) } else { - eprintln!("Deleted spammer account {login}"); + info!("Deleted spammer account {login}"); { db.remove_user(user_id); } @@ -216,11 +217,11 @@ pub async fn try_lock_and_notify_user( if !locked { match &config.actually_ban { ActuallyBan::Yes { .. } => { - eprintln!("Locking account of user {login}"); + debug!("Locking account of user {login}..."); lock_user_account(forge, &login).await?; - eprintln!("Success"); + info!("Locked account of user {login}"); } - ActuallyBan::No => eprintln!("[Simulating: lock account of user {login}]"), + ActuallyBan::No => info!("[Simulating: lock account of user {login}]"), } db.set_spam( @@ -242,7 +243,7 @@ pub async fn try_lock_and_notify_user( admin_contact_email, org_name, } => { - eprintln!("Sending notification email to user {login}"); + debug!("Sending notification email to user {login}..."); email::send_locked_account_notice( config, smtp, @@ -252,10 +253,10 @@ pub async fn try_lock_and_notify_user( &email, ) .await?; - eprintln!("Success"); + info!("Sent notification email to user {login}"); } ActuallyBan::No => { - eprintln!("[Simulating: send notification email to user {login}]") + info!("[Simulating: send notification email to user {login}]") } } @@ -300,7 +301,7 @@ pub async fn lock_and_notify_users( for (user_id, login) in spammers { try_lock_and_notify_user(&config, &storage, &forge, &db, user_id) .await - .unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}")); + .unwrap_or_else(|err| warn!("Failed to lock or notify user {login}: {err}")); } tokio::time::sleep(std::time::Duration::from_secs(3600)).await;