store user classifications
This commit is contained in:
parent
3f4f93826c
commit
0f8368031f
5 changed files with 45 additions and 15 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -216,6 +216,7 @@ dependencies = [
|
|||
"forgejo-api",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
|
|
@ -13,3 +13,4 @@ forgejo-api = "0.4"
|
|||
url = "2"
|
||||
anyhow = "1.0.93"
|
||||
bayespam = "1.1.0"
|
||||
serde_json = "1.0.133"
|
||||
|
|
1
classification.json
Normal file
1
classification.json
Normal file
|
@ -0,0 +1 @@
|
|||
{"users":{"5847":"Spam","5637":"Spam","4640":"Spam","3590":"Spam","137":"Legit","2176":"Spam","3489":"Spam","4357":"Spam","1985":"Legit","1905":"Spam","4683":"Spam","5006":"Spam","4248":"Spam","4780":"Spam","1790":"Spam","5778":"Spam","2101":"Spam","768":"Legit","2117":"Spam","5516":"Spam","1552":"Legit","946":"Legit","5968":"Spam","3077":"Spam","1376":"Legit","5571":"Spam","4832":"Spam","5513":"Spam","5620":"Spam","3879":"Spam","5366":"Spam","3299":"Spam","12":"Legit","4940":"Spam","5611":"Spam","5524":"Spam","3760":"Spam","4759":"Spam","5184":"Spam","400":"Legit","5695":"Spam","4629":"Spam","5235":"Spam"}}
|
File diff suppressed because one or more lines are too long
57
src/main.rs
57
src/main.rs
|
@ -37,6 +37,7 @@ enum Classification {
|
|||
Legit,
|
||||
Unknown,
|
||||
}
|
||||
use Classification::*;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct Db {
|
||||
|
@ -81,6 +82,14 @@ impl UserData {
|
|||
}
|
||||
}
|
||||
|
||||
impl Db {
|
||||
fn new() -> Db {
|
||||
Db {
|
||||
users: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
||||
let mut repos = Vec::new();
|
||||
let mut query = RepoSearchQuery::default();
|
||||
|
@ -196,10 +205,6 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let forge = Forgejo::new(Auth::None, url::Url::parse("https://git.deuxfleurs.fr")?)?;
|
||||
let data = get_users_repos(&forge).await?;
|
||||
println!("got {} users", data.len());
|
||||
|
||||
let model_path = Path::new("model.json");
|
||||
let mut classifier = if model_path.is_file() {
|
||||
Classifier::new_from_pre_trained(&mut File::open(model_path)?)?
|
||||
|
@ -207,37 +212,59 @@ async fn main() -> anyhow::Result<()> {
|
|||
Classifier::new()
|
||||
};
|
||||
|
||||
for (_, user) in data {
|
||||
let db_path = Path::new("classification.json");
|
||||
let mut db = if db_path.is_file() {
|
||||
let file = File::open(db_path)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
serde_json::from_reader(reader)?
|
||||
} else {
|
||||
Db::new()
|
||||
};
|
||||
|
||||
let forge = Forgejo::new(Auth::None, url::Url::parse("https://git.deuxfleurs.fr")?)?;
|
||||
let data = get_users_repos(&forge).await?;
|
||||
println!("got {} users", data.len());
|
||||
|
||||
for (user_id, user) in data {
|
||||
if db.users.contains_key(&user_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
println!("{:#?}", user);
|
||||
let user_text = user.to_text();
|
||||
|
||||
println!("SCORE: {}", classifier.score(&user_text));
|
||||
|
||||
let is_spam = {
|
||||
let c = {
|
||||
let mut resp = String::new();
|
||||
loop {
|
||||
println!("SPAM? (y/n/?) ");
|
||||
std::io::stdin().read_line(&mut resp)?;
|
||||
match resp.as_str() {
|
||||
"y\n" => break Some(true),
|
||||
"n\n" => break Some(false),
|
||||
"?\n" => break None,
|
||||
"y\n" => break Spam,
|
||||
"n\n" => break Legit,
|
||||
"?\n" => break Unknown,
|
||||
_ => resp.clear()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match is_spam {
|
||||
Some(true) => classifier.train_spam(&user_text),
|
||||
Some(false) => classifier.train_ham(&user_text),
|
||||
None => ()
|
||||
match c {
|
||||
Spam => classifier.train_spam(&user_text),
|
||||
Legit => classifier.train_ham(&user_text),
|
||||
Unknown => ()
|
||||
}
|
||||
|
||||
db.users.insert(user_id, c);
|
||||
|
||||
{
|
||||
classifier.save(&mut File::create(model_path)?, false)?;
|
||||
}
|
||||
}
|
||||
|
||||
let file = File::create(db_path)?;
|
||||
let writer = std::io::BufWriter::new(file);
|
||||
serde_json::to_writer(writer, &db)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue