store user classifications
This commit is contained in:
parent
3f4f93826c
commit
0f8368031f
5 changed files with 45 additions and 15 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -216,6 +216,7 @@ dependencies = [
|
||||||
"forgejo-api",
|
"forgejo-api",
|
||||||
"reqwest 0.12.9",
|
"reqwest 0.12.9",
|
||||||
"serde",
|
"serde",
|
||||||
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
|
@ -13,3 +13,4 @@ forgejo-api = "0.4"
|
||||||
url = "2"
|
url = "2"
|
||||||
anyhow = "1.0.93"
|
anyhow = "1.0.93"
|
||||||
bayespam = "1.1.0"
|
bayespam = "1.1.0"
|
||||||
|
serde_json = "1.0.133"
|
||||||
|
|
1
classification.json
Normal file
1
classification.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"users":{"5847":"Spam","5637":"Spam","4640":"Spam","3590":"Spam","137":"Legit","2176":"Spam","3489":"Spam","4357":"Spam","1985":"Legit","1905":"Spam","4683":"Spam","5006":"Spam","4248":"Spam","4780":"Spam","1790":"Spam","5778":"Spam","2101":"Spam","768":"Legit","2117":"Spam","5516":"Spam","1552":"Legit","946":"Legit","5968":"Spam","3077":"Spam","1376":"Legit","5571":"Spam","4832":"Spam","5513":"Spam","5620":"Spam","3879":"Spam","5366":"Spam","3299":"Spam","12":"Legit","4940":"Spam","5611":"Spam","5524":"Spam","3760":"Spam","4759":"Spam","5184":"Spam","400":"Legit","5695":"Spam","4629":"Spam","5235":"Spam"}}
|
File diff suppressed because one or more lines are too long
57
src/main.rs
57
src/main.rs
|
@ -37,6 +37,7 @@ enum Classification {
|
||||||
Legit,
|
Legit,
|
||||||
Unknown,
|
Unknown,
|
||||||
}
|
}
|
||||||
|
use Classification::*;
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
struct Db {
|
struct Db {
|
||||||
|
@ -81,6 +82,14 @@ impl UserData {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Db {
|
||||||
|
fn new() -> Db {
|
||||||
|
Db {
|
||||||
|
users: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
||||||
let mut repos = Vec::new();
|
let mut repos = Vec::new();
|
||||||
let mut query = RepoSearchQuery::default();
|
let mut query = RepoSearchQuery::default();
|
||||||
|
@ -196,10 +205,6 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
let forge = Forgejo::new(Auth::None, url::Url::parse("https://git.deuxfleurs.fr")?)?;
|
|
||||||
let data = get_users_repos(&forge).await?;
|
|
||||||
println!("got {} users", data.len());
|
|
||||||
|
|
||||||
let model_path = Path::new("model.json");
|
let model_path = Path::new("model.json");
|
||||||
let mut classifier = if model_path.is_file() {
|
let mut classifier = if model_path.is_file() {
|
||||||
Classifier::new_from_pre_trained(&mut File::open(model_path)?)?
|
Classifier::new_from_pre_trained(&mut File::open(model_path)?)?
|
||||||
|
@ -207,37 +212,59 @@ async fn main() -> anyhow::Result<()> {
|
||||||
Classifier::new()
|
Classifier::new()
|
||||||
};
|
};
|
||||||
|
|
||||||
for (_, user) in data {
|
let db_path = Path::new("classification.json");
|
||||||
|
let mut db = if db_path.is_file() {
|
||||||
|
let file = File::open(db_path)?;
|
||||||
|
let reader = std::io::BufReader::new(file);
|
||||||
|
serde_json::from_reader(reader)?
|
||||||
|
} else {
|
||||||
|
Db::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
let forge = Forgejo::new(Auth::None, url::Url::parse("https://git.deuxfleurs.fr")?)?;
|
||||||
|
let data = get_users_repos(&forge).await?;
|
||||||
|
println!("got {} users", data.len());
|
||||||
|
|
||||||
|
for (user_id, user) in data {
|
||||||
|
if db.users.contains_key(&user_id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
println!("{:#?}", user);
|
println!("{:#?}", user);
|
||||||
let user_text = user.to_text();
|
let user_text = user.to_text();
|
||||||
|
|
||||||
println!("SCORE: {}", classifier.score(&user_text));
|
println!("SCORE: {}", classifier.score(&user_text));
|
||||||
|
|
||||||
let is_spam = {
|
let c = {
|
||||||
let mut resp = String::new();
|
let mut resp = String::new();
|
||||||
loop {
|
loop {
|
||||||
println!("SPAM? (y/n/?) ");
|
println!("SPAM? (y/n/?) ");
|
||||||
std::io::stdin().read_line(&mut resp)?;
|
std::io::stdin().read_line(&mut resp)?;
|
||||||
match resp.as_str() {
|
match resp.as_str() {
|
||||||
"y\n" => break Some(true),
|
"y\n" => break Spam,
|
||||||
"n\n" => break Some(false),
|
"n\n" => break Legit,
|
||||||
"?\n" => break None,
|
"?\n" => break Unknown,
|
||||||
_ => resp.clear()
|
_ => resp.clear()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
match is_spam {
|
match c {
|
||||||
Some(true) => classifier.train_spam(&user_text),
|
Spam => classifier.train_spam(&user_text),
|
||||||
Some(false) => classifier.train_ham(&user_text),
|
Legit => classifier.train_ham(&user_text),
|
||||||
None => ()
|
Unknown => ()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
db.users.insert(user_id, c);
|
||||||
|
|
||||||
{
|
{
|
||||||
classifier.save(&mut File::create(model_path)?, false)?;
|
classifier.save(&mut File::create(model_path)?, false)?;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let file = File::create(db_path)?;
|
||||||
|
let writer = std::io::BufWriter::new(file);
|
||||||
|
serde_json::to_writer(writer, &db)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue