cargo clippy
This commit is contained in:
parent
13f8e76ae3
commit
af38eae2c3
7 changed files with 68 additions and 51 deletions
|
@ -20,7 +20,7 @@ lazy_static = "1"
|
||||||
actix-files = "0.6"
|
actix-files = "0.6"
|
||||||
unicode-segmentation = "1"
|
unicode-segmentation = "1"
|
||||||
lettre = { version = "0.11", features = ["builder", "smtp-transport", "rustls-tls"], default-features = false }
|
lettre = { version = "0.11", features = ["builder", "smtp-transport", "rustls-tls"], default-features = false }
|
||||||
include_dir = "0.7.4"
|
include_dir = "0.7"
|
||||||
|
|
||||||
[profile.profiling]
|
[profile.profiling]
|
||||||
inherits = "dev"
|
inherits = "dev"
|
||||||
|
|
|
@ -84,7 +84,7 @@ impl Classifier {
|
||||||
/// Compute the probability of `tokens` to be part of a spam.
|
/// Compute the probability of `tokens` to be part of a spam.
|
||||||
fn rate_words(&self, tokens: &[String]) -> Vec<f32> {
|
fn rate_words(&self, tokens: &[String]) -> Vec<f32> {
|
||||||
tokens
|
tokens
|
||||||
.into_iter()
|
.iter()
|
||||||
.map(|word| {
|
.map(|word| {
|
||||||
// If word was previously added in the model
|
// If word was previously added in the model
|
||||||
if let Some(counter) = self.token_table.get(word) {
|
if let Some(counter) = self.token_table.get(word) {
|
||||||
|
|
|
@ -57,17 +57,17 @@ impl UserData {
|
||||||
}
|
}
|
||||||
|
|
||||||
match &self.location {
|
match &self.location {
|
||||||
Some(s) => add(&s),
|
Some(s) => add(s),
|
||||||
None => add("__NO_LOCATION__"),
|
None => add("__NO_LOCATION__"),
|
||||||
}
|
}
|
||||||
|
|
||||||
match &self.website {
|
match &self.website {
|
||||||
Some(s) => add(&s),
|
Some(s) => add(s),
|
||||||
None => add("__NO_WEBSITE__"),
|
None => add("__NO_WEBSITE__"),
|
||||||
}
|
}
|
||||||
|
|
||||||
match &self.description {
|
match &self.description {
|
||||||
Some(s) => add(&s),
|
Some(s) => add(s),
|
||||||
None => add("__NO_USER_DESCRIPTION__"),
|
None => add("__NO_USER_DESCRIPTION__"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -116,20 +116,20 @@ impl Db {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn unclassified_users<'a>(&'a self) -> Vec<(UserId, &'a UserData)> {
|
pub fn unclassified_users(&self) -> Vec<(UserId, &UserData)> {
|
||||||
self.users
|
self.users
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(user_id, _)| !self.is_spam.contains_key(&user_id))
|
.filter(|(user_id, _)| !self.is_spam.contains_key(user_id))
|
||||||
.map(|(id, d)| (*id, d))
|
.map(|(id, d)| (*id, d))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn classified_users<'a>(&'a self) -> Vec<(UserId, &'a UserData, IsSpam)> {
|
pub fn classified_users(&self) -> Vec<(UserId, &UserData, IsSpam)> {
|
||||||
self.users
|
self.users
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(user_id, user_data)| {
|
.filter_map(|(user_id, user_data)| {
|
||||||
self.is_spam
|
self.is_spam
|
||||||
.get(&user_id)
|
.get(user_id)
|
||||||
.map(|is_spam| (user_id, user_data, *is_spam))
|
.map(|is_spam| (user_id, user_data, *is_spam))
|
||||||
})
|
})
|
||||||
.map(|(id, d, s)| (*id, d, s))
|
.map(|(id, d, s)| (*id, d, s))
|
||||||
|
|
73
src/main.rs
73
src/main.rs
|
@ -60,19 +60,15 @@ impl Config {
|
||||||
let admin_contact_email = std::env::var("ADMIN_CONTACT_EMAIL")
|
let admin_contact_email = std::env::var("ADMIN_CONTACT_EMAIL")
|
||||||
.context("reading the ADMIN_CONTACT_EMAIL environment variable")?;
|
.context("reading the ADMIN_CONTACT_EMAIL environment variable")?;
|
||||||
|
|
||||||
let actually_ban = match std::env::var("ACTUALLY_BAN_USERS") {
|
let actually_ban = match std::env::var("ACTUALLY_BAN_USERS").as_deref() {
|
||||||
Ok(s) => {
|
Ok("true") => ActuallyBan::Yes {
|
||||||
if &s == "true" {
|
smtp: SmtpConfig::from_env().await?,
|
||||||
ActuallyBan::Yes {
|
},
|
||||||
smtp: SmtpConfig::from_env().await?,
|
Ok("false") => ActuallyBan::No,
|
||||||
}
|
Ok(_) => {
|
||||||
} else if &s == "false" {
|
return Err(anyhow!(
|
||||||
ActuallyBan::No
|
"ACTUALLY_BAN_USERS: unknown value (expected: true/false)"
|
||||||
} else {
|
));
|
||||||
return Err(anyhow!(
|
|
||||||
"ACTUALLY_BAN_USERS: unknown value (expected: true/false)"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(_) => ActuallyBan::No,
|
Err(_) => ActuallyBan::No,
|
||||||
};
|
};
|
||||||
|
@ -125,7 +121,7 @@ async fn load_db(forge: &Forgejo) -> anyhow::Result<(Db, Classifier)> {
|
||||||
Db::from_path(db_path, &classifier)?
|
Db::from_path(db_path, &classifier)?
|
||||||
} else {
|
} else {
|
||||||
let db = Db::from_users(
|
let db = Db::from_users(
|
||||||
scrape::get_user_data(&forge).await?,
|
scrape::get_user_data(forge).await?,
|
||||||
HashMap::new(),
|
HashMap::new(),
|
||||||
&classifier,
|
&classifier,
|
||||||
);
|
);
|
||||||
|
@ -216,7 +212,7 @@ fn set_spam(
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("recomputing user scores");
|
eprintln!("recomputing user scores");
|
||||||
db.recompute_scores(&classifier);
|
db.recompute_scores(classifier);
|
||||||
|
|
||||||
spammers
|
spammers
|
||||||
}
|
}
|
||||||
|
@ -225,11 +221,14 @@ async fn apply_classification(
|
||||||
config: &Config,
|
config: &Config,
|
||||||
forge: &Forgejo,
|
forge: &Forgejo,
|
||||||
db: Arc<Mutex<Db>>,
|
db: Arc<Mutex<Db>>,
|
||||||
classifier: &mut Classifier,
|
classifier: Arc<Mutex<Classifier>>,
|
||||||
ids: &[(UserId, bool)],
|
ids: &[(UserId, bool)],
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
) {
|
) {
|
||||||
let spammers = set_spam(&mut db.lock().unwrap(), classifier, ids, overwrite);
|
let spammers = {
|
||||||
|
let classifier = &mut classifier.lock().unwrap();
|
||||||
|
set_spam(&mut db.lock().unwrap(), classifier, ids, overwrite)
|
||||||
|
};
|
||||||
|
|
||||||
for user in spammers {
|
for user in spammers {
|
||||||
let login = db.lock().unwrap().users.get(&user).unwrap().login.clone();
|
let login = db.lock().unwrap().users.get(&user).unwrap().login.clone();
|
||||||
|
@ -247,7 +246,6 @@ lazy_static! {
|
||||||
pub static ref TEMPLATES: Tera = {
|
pub static ref TEMPLATES: Tera = {
|
||||||
let files: Vec<_> = TEMPLATES_DIR
|
let files: Vec<_> = TEMPLATES_DIR
|
||||||
.files()
|
.files()
|
||||||
.into_iter()
|
|
||||||
.map(|f| {
|
.map(|f| {
|
||||||
(
|
(
|
||||||
f.path().to_str().unwrap(),
|
f.path().to_str().unwrap(),
|
||||||
|
@ -310,7 +308,7 @@ async fn index(
|
||||||
|
|
||||||
users.shuffle(&mut rng);
|
users.shuffle(&mut rng);
|
||||||
|
|
||||||
let sorting_req = q.sort.as_ref().map(|s| s.as_str());
|
let sorting_req = q.sort.as_deref();
|
||||||
match &sorting_req {
|
match &sorting_req {
|
||||||
// sort "legit first": by increasing score
|
// sort "legit first": by increasing score
|
||||||
Some("legit") => users.sort_by_key(|(_, _, score)| (score * 1000.) as u64),
|
Some("legit") => users.sort_by_key(|(_, _, score)| (score * 1000.) as u64),
|
||||||
|
@ -364,9 +362,6 @@ async fn post_classified(
|
||||||
) -> impl Responder {
|
) -> impl Responder {
|
||||||
eprintln!("POST {}", req.uri());
|
eprintln!("POST {}", req.uri());
|
||||||
|
|
||||||
let classifier = &mut data.classifier.lock().unwrap();
|
|
||||||
let db = data.db.clone();
|
|
||||||
|
|
||||||
let updates: Vec<(UserId, bool)> = form
|
let updates: Vec<(UserId, bool)> = form
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(id, classification)| (UserId(*id), classification == "spam"))
|
.map(|(id, classification)| (UserId(*id), classification == "spam"))
|
||||||
|
@ -376,17 +371,21 @@ async fn post_classified(
|
||||||
&data.config,
|
&data.config,
|
||||||
&data.forge,
|
&data.forge,
|
||||||
data.db.clone(),
|
data.db.clone(),
|
||||||
classifier,
|
data.classifier.clone(),
|
||||||
&updates,
|
&updates,
|
||||||
overwrite,
|
overwrite,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
db.lock()
|
data.db
|
||||||
|
.lock()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.store_to_path(Path::new("db.json"))
|
.store_to_path(Path::new("db.json"))
|
||||||
.unwrap(); // FIXME
|
.unwrap(); // FIXME
|
||||||
classifier
|
|
||||||
|
data.classifier
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
.save(&mut File::create(Path::new("model.json")).unwrap(), false)
|
.save(&mut File::create(Path::new("model.json")).unwrap(), false)
|
||||||
.unwrap(); // FIXME
|
.unwrap(); // FIXME
|
||||||
|
|
||||||
|
@ -479,28 +478,30 @@ async fn main() -> anyhow::Result<()> {
|
||||||
config: config.clone(),
|
config: config.clone(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let mut workers = tokio::task::JoinSet::new();
|
||||||
|
|
||||||
let _ = {
|
let _ = {
|
||||||
let forge = forge.clone();
|
let forge = forge.clone();
|
||||||
let db = db.clone();
|
let db = db.clone();
|
||||||
let classifier = classifier.clone();
|
let classifier = classifier.clone();
|
||||||
tokio::spawn(async move { workers::refresh_user_data(forge, db, classifier) })
|
workers.spawn(async move { workers::refresh_user_data(forge, db, classifier).await })
|
||||||
};
|
};
|
||||||
let _ = {
|
let _ = {
|
||||||
let config = config.clone();
|
let config = config.clone();
|
||||||
let forge = forge.clone();
|
let forge = forge.clone();
|
||||||
let db = db.clone();
|
let db = db.clone();
|
||||||
tokio::spawn(async move { workers::purge_spammer_accounts(config, forge, db) })
|
workers.spawn(async move { workers::purge_spammer_accounts(config, forge, db).await })
|
||||||
};
|
};
|
||||||
let _ = {
|
let _ = {
|
||||||
let config = config.clone();
|
let config = config.clone();
|
||||||
let forge = forge.clone();
|
let forge = forge.clone();
|
||||||
let db = db.clone();
|
let db = db.clone();
|
||||||
tokio::spawn(async move { workers::lock_and_notify_users(config, forge, db) })
|
workers.spawn(async move { workers::lock_and_notify_users(config, forge, db).await })
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("Listening on http://127.0.0.1:8080");
|
println!("Listening on http://127.0.0.1:8080");
|
||||||
|
|
||||||
HttpServer::new(move || {
|
let webserver = HttpServer::new(move || {
|
||||||
App::new()
|
App::new()
|
||||||
.app_data(st.clone())
|
.app_data(st.clone())
|
||||||
.service(static_)
|
.service(static_)
|
||||||
|
@ -510,8 +511,18 @@ async fn main() -> anyhow::Result<()> {
|
||||||
.service(post_classified_edit)
|
.service(post_classified_edit)
|
||||||
})
|
})
|
||||||
.bind(("127.0.0.1", 8080))?
|
.bind(("127.0.0.1", 8080))?
|
||||||
.run()
|
.run();
|
||||||
.await?;
|
|
||||||
|
tokio::select! {
|
||||||
|
_ = workers.join_all() => {
|
||||||
|
unreachable!()
|
||||||
|
},
|
||||||
|
_ = tokio::signal::ctrl_c() => {
|
||||||
|
},
|
||||||
|
res = webserver => {
|
||||||
|
res?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,8 +6,10 @@ use crate::data::*;
|
||||||
|
|
||||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
||||||
let mut repos = Vec::new();
|
let mut repos = Vec::new();
|
||||||
let mut query = forgejo_api::structs::RepoSearchQuery::default();
|
let mut query = forgejo_api::structs::RepoSearchQuery {
|
||||||
query.limit = Some(50);
|
limit: Some(50),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
let mut page: u32 = 1;
|
let mut page: u32 = 1;
|
||||||
loop {
|
loop {
|
||||||
query.page = Some(page);
|
query.page = Some(page);
|
||||||
|
@ -29,8 +31,10 @@ async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::struct
|
||||||
|
|
||||||
async fn scrape_issues(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Issue>> {
|
async fn scrape_issues(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Issue>> {
|
||||||
let mut issues = Vec::new();
|
let mut issues = Vec::new();
|
||||||
let mut query = forgejo_api::structs::IssueSearchIssuesQuery::default();
|
let mut query = forgejo_api::structs::IssueSearchIssuesQuery {
|
||||||
query.limit = Some(50);
|
limit: Some(50),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
let mut page: u32 = 1;
|
let mut page: u32 = 1;
|
||||||
loop {
|
loop {
|
||||||
query.page = Some(page);
|
query.page = Some(page);
|
||||||
|
@ -47,8 +51,10 @@ async fn scrape_issues(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::struc
|
||||||
|
|
||||||
async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::User>> {
|
async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::User>> {
|
||||||
let mut users = Vec::new();
|
let mut users = Vec::new();
|
||||||
let mut query = forgejo_api::structs::UserSearchQuery::default();
|
let mut query = forgejo_api::structs::UserSearchQuery {
|
||||||
query.limit = Some(50);
|
limit: Some(50),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
let mut page: u32 = 1;
|
let mut page: u32 = 1;
|
||||||
loop {
|
loop {
|
||||||
query.page = Some(page);
|
query.page = Some(page);
|
||||||
|
@ -78,7 +84,7 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, Us
|
||||||
};
|
};
|
||||||
|
|
||||||
eprintln!("Fetching users...");
|
eprintln!("Fetching users...");
|
||||||
for user in scrape_users(&forge).await? {
|
for user in scrape_users(forge).await? {
|
||||||
let Some(id) = user.id else {
|
let Some(id) = user.id else {
|
||||||
eprintln!("WARN: user with no id");
|
eprintln!("WARN: user with no id");
|
||||||
continue;
|
continue;
|
||||||
|
@ -109,7 +115,7 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, Us
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("Fetching repos...");
|
eprintln!("Fetching repos...");
|
||||||
for repo in scrape_repos(&forge).await? {
|
for repo in scrape_repos(forge).await? {
|
||||||
let Some(id) = repo.id else {
|
let Some(id) = repo.id else {
|
||||||
eprintln!("WARN: repo with no id");
|
eprintln!("WARN: repo with no id");
|
||||||
continue;
|
continue;
|
||||||
|
@ -145,7 +151,7 @@ pub async fn get_user_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, Us
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("Fetching issues...");
|
eprintln!("Fetching issues...");
|
||||||
for issue in scrape_issues(&forge).await? {
|
for issue in scrape_issues(forge).await? {
|
||||||
let Some(id) = issue.id else {
|
let Some(id) = issue.id else {
|
||||||
eprintln!("WARN: issue with no id");
|
eprintln!("WARN: issue with no id");
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -32,7 +32,7 @@ async fn try_refresh_user_data(
|
||||||
eprintln!("Fetching user data");
|
eprintln!("Fetching user data");
|
||||||
let users = scrape::get_user_data(forge).await?;
|
let users = scrape::get_user_data(forge).await?;
|
||||||
|
|
||||||
let db: &mut Db = &mut *db.lock().unwrap();
|
let db: &mut Db = &mut db.lock().unwrap();
|
||||||
let classifier = &classifier.lock().unwrap();
|
let classifier = &classifier.lock().unwrap();
|
||||||
|
|
||||||
// NB: Some user accounts may have been deleted since last fetch (hopefully
|
// NB: Some user accounts may have been deleted since last fetch (hopefully
|
||||||
|
@ -238,7 +238,7 @@ pub async fn try_lock_and_notify_user(
|
||||||
match &config.actually_ban {
|
match &config.actually_ban {
|
||||||
ActuallyBan::Yes { smtp } => {
|
ActuallyBan::Yes { smtp } => {
|
||||||
eprintln!("Sending notification email to user {login}");
|
eprintln!("Sending notification email to user {login}");
|
||||||
email::send_locked_account_notice(config, &smtp, &login, &email).await?;
|
email::send_locked_account_notice(config, smtp, &login, &email).await?;
|
||||||
eprintln!("Success");
|
eprintln!("Success");
|
||||||
}
|
}
|
||||||
ActuallyBan::No => {
|
ActuallyBan::No => {
|
||||||
|
|
Loading…
Reference in a new issue