use tracing for the application & webserver logs

This commit is contained in:
Armaël Guéneau 2025-02-03 18:24:14 +01:00
parent 0974d02d02
commit 6342620f70
5 changed files with 176 additions and 85 deletions

109
Cargo.lock generated
View file

@ -1213,7 +1213,11 @@ dependencies = [
"serde_json",
"tera",
"tokio",
"tower",
"tower-http",
"tower-sessions",
"tracing",
"tracing-subscriber",
"unicode-segmentation",
"url",
]
@ -2088,6 +2092,16 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-conv"
version = "0.1.0"
@ -2177,6 +2191,12 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "p256"
version = "0.11.1"
@ -2852,6 +2872,15 @@ dependencies = [
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.3.0"
@ -3119,6 +3148,16 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "time"
version = "0.3.37"
@ -3264,6 +3303,22 @@ dependencies = [
"tower-service",
]
[[package]]
name = "tower-http"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697"
dependencies = [
"bitflags 2.6.0",
"bytes",
"http 1.2.0",
"http-body 1.0.1",
"pin-project-lite",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
@ -3357,6 +3412,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
dependencies = [
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
@ -3493,6 +3574,12 @@ version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c"
[[package]]
name = "valuable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "vcpkg"
version = "0.2.15"
@ -3622,6 +3709,22 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.9"
@ -3631,6 +3734,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.52.0"

View file

@ -25,6 +25,10 @@ aws-config = { version = "1.1.7", features = ["behavior-version-latest"] }
aws-sdk-s3 = "1.66.0"
new_mime_guess = "4"
tower-sessions = "0.14"
tower = "0.5.2"
tower-http = { version = "0.6.2", features = ["trace"] }
tracing-subscriber = "0.3"
tracing = "0.1.41"
[profile.profiling]
inherits = "dev"

View file

@ -14,7 +14,10 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use tera::Tera;
use tower::ServiceBuilder;
use tower_http::trace::TraceLayer;
use tower_sessions::{MemoryStore, Session, SessionManagerLayer};
use tracing::{error, info, warn};
use url::Url;
mod classifier;
@ -155,7 +158,7 @@ fn set_spam(db: &Db, ids: &[(UserId, bool)], overwrite: bool) -> Vec<UserId> {
match db.with_userdb(|u| u.is_spam(user_id)) {
Some(was_spam) if overwrite && was_spam.as_bool() != set_spam => {
eprintln!("User {login}: changing classification from {was_spam} to {set_spam}");
info!("User {login}: changing classification from {was_spam} to {set_spam}");
db.set_spam(user_id, Some(IsSpam::from_bool(set_spam)));
// We train the classifier again, which is somewhat hackish: we
// already trained it on the previous classification, possibly
@ -172,9 +175,7 @@ fn set_spam(db: &Db, ids: &[(UserId, bool)], overwrite: bool) -> Vec<UserId> {
// Classification conflict between concurrent queries.
// In this case we play it safe and discard the classification
// for this user; the user will need to be manually classified again.
eprintln!(
"Classification conflict for user {login}; discarding our current classification"
);
info!("Classification conflict for user {login}; discarding our current classification");
db.set_spam(user_id, None);
}
None => {
@ -230,7 +231,7 @@ async fn apply_classification(
// It is ok for any of these calls to fail now: a worker will periodically retry
workers::try_lock_and_notify_user(config, storage, forge, db, user)
.await
.unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}"));
.unwrap_or_else(|err| warn!("Failed to lock or notify user {login}: {err}"));
}
}
@ -253,7 +254,7 @@ lazy_static! {
match tera.add_raw_templates(files) {
Ok(()) => tera,
Err(e) => {
eprintln!("Parsing error(s): {}", e);
error!("Parsing error(s): {}", e);
::std::process::exit(1);
}
}
@ -287,10 +288,7 @@ async fn get_index(
State(data): State<Arc<AppState>>,
Query(q): Query<SortSetting>,
_user: AuthenticatedUser,
OriginalUri(uri): OriginalUri,
) -> Result<Html<String>, AppError> {
eprintln!("GET {}", uri);
let db = &data.db;
let mut users: Vec<(UserId, UserData, f32)> = db.with_userdb(|udb| {
@ -343,21 +341,17 @@ async fn get_index(
&(users_count - classified_count),
);
context.insert("total_users_count", &users_count);
eprintln!("rendering template...");
let page = TEMPLATES.render("index.html", &context)?;
eprintln!("done");
Ok(Html::from(page))
}
async fn post_classified(
State(data): State<Arc<AppState>>,
_user: AuthenticatedUser,
OriginalUri(uri): OriginalUri,
_user: AuthenticatedUser,
Form(form): Form<HashMap<i64, String>>,
overwrite: bool,
) -> Result<impl IntoResponse, AppError> {
eprintln!("POST {}", uri);
let updates: Vec<(UserId, bool)> = form
.iter()
.map(|(id, classification)| (UserId(*id), classification == "spam"))
@ -375,8 +369,6 @@ async fn post_classified(
storage::store_db(&data.storage, &data.db).await?;
eprintln!("done");
Ok((
StatusCode::SEE_OTHER,
[(header::LOCATION, uri.to_string())],
@ -386,29 +378,26 @@ async fn post_classified(
async fn post_classified_index(
data: State<Arc<AppState>>,
user: AuthenticatedUser,
uri: OriginalUri,
user: AuthenticatedUser,
form: Form<HashMap<i64, String>>,
) -> impl IntoResponse {
post_classified(data, user, uri, form, false).await
post_classified(data, uri, user, form, false).await
}
async fn post_classified_edit(
data: State<Arc<AppState>>,
user: AuthenticatedUser,
uri: OriginalUri,
user: AuthenticatedUser,
form: Form<HashMap<i64, String>>,
) -> impl IntoResponse {
post_classified(data, user, uri, form, true).await
post_classified(data, uri, user, form, true).await
}
async fn get_classified(
State(data): State<Arc<AppState>>,
_user: AuthenticatedUser,
OriginalUri(uri): OriginalUri,
) -> Result<Html<String>, AppError> {
eprintln!("GET {}", uri);
let db = &data.db;
let mut users: Vec<(UserId, UserData, f32, bool)> = db.with_userdb(|udb| {
udb.classified_users()
@ -427,20 +416,13 @@ async fn get_classified(
let mut context = tera::Context::new();
context.insert("forge_url", &data.config.forge_url.to_string());
context.insert("users", &users);
eprintln!("rendering template...");
let page = TEMPLATES.render("classified.html", &context)?;
eprintln!("done");
Ok(Html::from(page))
}
const STATIC_DIR: include_dir::Dir = include_dir::include_dir!("static");
async fn get_static_(
Path(filename): Path<String>,
OriginalUri(uri): OriginalUri,
) -> impl IntoResponse {
eprintln!("GET {}", uri);
async fn get_static_(Path(filename): Path<String>) -> impl IntoResponse {
match STATIC_DIR.get_file(&filename) {
None => (StatusCode::NOT_FOUND, "404 Not found").into_response(),
Some(page) => {
@ -450,11 +432,7 @@ async fn get_static_(
}
}
async fn get_auth(
State(data): State<Arc<AppState>>,
OriginalUri(uri): OriginalUri,
) -> Result<Html<String>, AppError> {
eprintln!("GET {}", uri);
async fn get_auth(State(data): State<Arc<AppState>>) -> Result<Html<String>, AppError> {
let mut context = tera::Context::new();
context.insert("forge_url", &data.config.forge_url.to_string());
let page = TEMPLATES.render("auth.html", &context)?;
@ -466,12 +444,7 @@ struct AuthForm {
token: String,
}
async fn post_auth(
OriginalUri(uri): OriginalUri,
session: Session,
Form(form): Form<AuthForm>,
) -> impl IntoResponse {
eprintln!("POST {}", uri);
async fn post_auth(session: Session, Form(form): Form<AuthForm>) -> impl IntoResponse {
session
.insert(session::TOKEN_KEY, &form.token)
.await
@ -479,14 +452,15 @@ async fn post_auth(
(StatusCode::SEE_OTHER, [(header::LOCATION, "/")])
}
async fn post_logout(OriginalUri(uri): OriginalUri, session: Session) -> impl IntoResponse {
eprintln!("POST {}", uri);
async fn post_logout(session: Session) -> impl IntoResponse {
session.flush().await.unwrap();
(StatusCode::SEE_OTHER, [(header::LOCATION, "/auth")])
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt::init();
let config = Arc::new(Config::from_env().await?);
let storage = Arc::new(Storage::from_env().await?);
let forge = Arc::new(Forgejo::new(
@ -494,10 +468,10 @@ async fn main() -> anyhow::Result<()> {
config.forge_url.clone(),
)?);
eprintln!("Eval templates");
// Compute the templates now
let _ = *TEMPLATES;
eprintln!("Load users and repos");
info!("Loading users and repos");
let db = load_db(&storage, &forge).await?;
let shared_state = Arc::new(AppState {
@ -534,8 +508,6 @@ async fn main() -> anyhow::Result<()> {
println!("Listening on http://{}", &config.bind_addr);
let session_store = MemoryStore::default();
let session_layer = SessionManagerLayer::new(session_store);
let app = Router::new()
.route("/", get(get_index).post(post_classified_index))
.route(
@ -546,7 +518,11 @@ async fn main() -> anyhow::Result<()> {
.route("/auth", get(get_auth).post(post_auth))
.route("/logout", post(post_logout))
.with_state(shared_state)
.layer(session_layer);
.layer(
ServiceBuilder::new()
.layer(TraceLayer::new_for_http())
.layer(SessionManagerLayer::new(MemoryStore::default())),
);
let webserver = {
let listener = tokio::net::TcpListener::bind(&config.bind_addr)

View file

@ -1,6 +1,7 @@
use forgejo_api::Forgejo;
use std::collections::HashMap;
use tokio::time::{sleep, Duration};
use tracing::{info, warn};
use crate::data::*;
@ -83,19 +84,19 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, Us
Some(s) => Some(s),
};
eprintln!("Fetching users...");
info!("Fetching users...");
for user in scrape_users(forge).await? {
let Some(id) = user.id else {
eprintln!("WARN: user with no id");
warn!("user with no id");
continue;
};
let Some(login) = user.login else {
eprintln!("WARN: missing login for user {id}");
warn!("missing login for user {id}");
continue;
};
let Some(email) = user.email else {
eprintln!("WARN: missing email for user {id}");
warn!("missing email for user {id}");
continue;
};
@ -114,28 +115,28 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, Us
);
}
eprintln!("Fetching repos...");
info!("Fetching repos...");
for repo in scrape_repos(forge).await? {
let Some(id) = repo.id else {
eprintln!("WARN: repo with no id");
warn!("repo with no id");
continue;
};
let Some(owner) = repo.owner else {
eprintln!("WARN: repo {} with no owner", id);
warn!("repo {} with no owner", id);
continue;
};
let Some(owner_id) = owner.id else {
eprintln!("WARN: owner for repo {} has no id", id);
warn!("owner for repo {} has no id", id);
continue;
};
let Some(repo_name) = repo.name else {
eprintln!("WARN: repo {} has no name", id);
warn!("repo {} has no name", id);
continue;
};
let Some(forge_owner) = data.get_mut(&UserId(owner_id)) else {
// this currently happens for repos owned by organizations
eprintln!(
"WARN: repo owner {} for repo {} is not in database",
warn!(
"repo owner {} for repo {} is not in database",
owner.login.unwrap_or_default(),
repo_name
);
@ -150,23 +151,23 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, Us
));
}
eprintln!("Fetching issues...");
info!("Fetching issues...");
for issue in scrape_issues(forge).await? {
let Some(id) = issue.id else {
eprintln!("WARN: issue with no id");
warn!("issue with no id");
continue;
};
let Some(user) = issue.user else {
eprintln!("WARN: issue {} has no owner", id);
warn!("issue {} has no owner", id);
continue;
};
let Some(user_id) = user.id else {
eprintln!("WARN: user for issue {} has no id", id);
warn!("user for issue {} has no id", id);
continue;
};
let Some(forge_user) = data.get_mut(&UserId(user_id)) else {
eprintln!(
"WARN: issue user {} {} for issue {} is not in database",
warn!(
"issue user {} {} for issue {} is not in database",
user.login.unwrap_or_default(),
user_id,
issue

View file

@ -9,6 +9,7 @@ use forgejo_api::Forgejo;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::SystemTime;
use tracing::{debug, info, warn};
use crate::FORGEJO_POLL_DELAY;
use crate::GRACE_PERIOD;
@ -25,7 +26,7 @@ async fn try_refresh_user_data(storage: &Storage, forge: &Forgejo, db: &Db) -> a
}
}
eprintln!("Fetching user data");
info!("worker: fetching user data...");
let users = scrape::get_user_data(forge).await?;
{
@ -51,7 +52,7 @@ async fn try_refresh_user_data(storage: &Storage, forge: &Forgejo, db: &Db) -> a
if (user_was_spam.as_bool() && score < GUESS_SPAM_THRESHOLD)
|| (!user_was_spam.as_bool() && score > GUESS_LEGIT_THRESHOLD)
{
eprintln!(
info!(
"Score for user {login} changed past threshold; discarding our current classification",
);
} else {
@ -73,7 +74,7 @@ async fn try_refresh_user_data(storage: &Storage, forge: &Forgejo, db: &Db) -> a
pub async fn refresh_user_data(storage: Arc<Storage>, forge: Arc<Forgejo>, db: Db) {
loop {
if let Err(e) = try_refresh_user_data(&storage, &forge, &db).await {
eprintln!("Error refreshing user data: {:?}", e);
warn!("Error refreshing user data: {:?}", e);
}
tokio::time::sleep(FORGEJO_POLL_DELAY.mul_f32(0.1)).await;
}
@ -83,18 +84,18 @@ pub async fn refresh_user_data(storage: Arc<Storage>, forge: Arc<Forgejo>, db: D
async fn try_purge_account(config: &Config, forge: &Forgejo, login: &str) -> anyhow::Result<()> {
if let ActuallyBan::No = config.actually_ban {
eprintln!("[Simulating: delete account of user {login}]");
info!("[Simulating: delete account of user {login}]");
return Ok(());
}
eprintln!("Deleting account of user {login}");
debug!("Deleting account of user {login}...");
forge
.admin_delete_user(
login,
forgejo_api::structs::AdminDeleteUserQuery { purge: Some(true) },
)
.await?;
eprintln!("Success");
info!("Deleted account of user {login}");
Ok(())
}
@ -125,7 +126,7 @@ pub async fn purge_spammer_accounts(
// NOTE: this is a minimum sanity check, but can this
// realistically happen? could we do better than printing a
// message in this case?
eprintln!("WARN: grace period for {login} expired but account is not locked! Skip user.");
warn!("grace period for {login} expired but account is not locked! Skip user.");
continue;
}
if !notified {
@ -133,16 +134,16 @@ pub async fn purge_spammer_accounts(
// Could we do better? (But if we do not delete in this case,
// spammers could perhaps prevent being deleted by providing
// non-working emails…)
eprintln!(
"WARN: grace period for {login} expired but we failed to send a \
warn!(
"grace period for {login} expired but we failed to send a \
notification email. Deleting anyway..."
);
}
if let Err(e) = try_purge_account(&config, &forge, &login).await {
eprintln!("Error while deleting spammer account {login}: {:?}", e)
warn!("Error while deleting spammer account {login}: {:?}", e)
} else {
eprintln!("Deleted spammer account {login}");
info!("Deleted spammer account {login}");
{
db.remove_user(user_id);
}
@ -216,11 +217,11 @@ pub async fn try_lock_and_notify_user(
if !locked {
match &config.actually_ban {
ActuallyBan::Yes { .. } => {
eprintln!("Locking account of user {login}");
debug!("Locking account of user {login}...");
lock_user_account(forge, &login).await?;
eprintln!("Success");
info!("Locked account of user {login}");
}
ActuallyBan::No => eprintln!("[Simulating: lock account of user {login}]"),
ActuallyBan::No => info!("[Simulating: lock account of user {login}]"),
}
db.set_spam(
@ -242,7 +243,7 @@ pub async fn try_lock_and_notify_user(
admin_contact_email,
org_name,
} => {
eprintln!("Sending notification email to user {login}");
debug!("Sending notification email to user {login}...");
email::send_locked_account_notice(
config,
smtp,
@ -252,10 +253,10 @@ pub async fn try_lock_and_notify_user(
&email,
)
.await?;
eprintln!("Success");
info!("Sent notification email to user {login}");
}
ActuallyBan::No => {
eprintln!("[Simulating: send notification email to user {login}]")
info!("[Simulating: send notification email to user {login}]")
}
}
@ -300,7 +301,7 @@ pub async fn lock_and_notify_users(
for (user_id, login) in spammers {
try_lock_and_notify_user(&config, &storage, &forge, &db, user_id)
.await
.unwrap_or_else(|err| eprintln!("Failed to lock or notify user {login}: {err}"));
.unwrap_or_else(|err| warn!("Failed to lock or notify user {login}: {err}"));
}
tokio::time::sleep(std::time::Duration::from_secs(3600)).await;