also fetch issues created by users
This commit is contained in:
parent
0f8368031f
commit
4aa2aeb1fc
4 changed files with 87 additions and 11 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1 +1,4 @@
|
|||
/target
|
||||
classification.json
|
||||
model.json
|
||||
api_token
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
{"users":{"5847":"Spam","5637":"Spam","4640":"Spam","3590":"Spam","137":"Legit","2176":"Spam","3489":"Spam","4357":"Spam","1985":"Legit","1905":"Spam","4683":"Spam","5006":"Spam","4248":"Spam","4780":"Spam","1790":"Spam","5778":"Spam","2101":"Spam","768":"Legit","2117":"Spam","5516":"Spam","1552":"Legit","946":"Legit","5968":"Spam","3077":"Spam","1376":"Legit","5571":"Spam","4832":"Spam","5513":"Spam","5620":"Spam","3879":"Spam","5366":"Spam","3299":"Spam","12":"Legit","4940":"Spam","5611":"Spam","5524":"Spam","3760":"Spam","4759":"Spam","5184":"Spam","400":"Legit","5695":"Spam","4629":"Spam","5235":"Spam"}}
|
File diff suppressed because one or more lines are too long
93
src/main.rs
93
src/main.rs
|
@ -1,4 +1,3 @@
|
|||
use forgejo_api::structs::{RepoSearchQuery, Repository, User, UserSearchQuery};
|
||||
use forgejo_api::{Auth, Forgejo};
|
||||
use std::collections::HashMap;
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
@ -17,6 +16,16 @@ struct RepoData {
|
|||
description: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct IssueId(i64);
|
||||
|
||||
#[derive(Debug)]
|
||||
struct IssueData {
|
||||
title: String,
|
||||
body: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct UserId(i64);
|
||||
|
@ -28,7 +37,9 @@ struct UserData {
|
|||
location: Option<String>,
|
||||
website: Option<String>,
|
||||
description: Option<String>,
|
||||
// TODO: visibility
|
||||
repos: Vec<(RepoId, RepoData)>,
|
||||
issues: Vec<(IssueId, IssueData)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
@ -78,6 +89,11 @@ impl UserData {
|
|||
}
|
||||
}
|
||||
|
||||
for (_id, issue) in &self.issues {
|
||||
add(&issue.title);
|
||||
add(&issue.body);
|
||||
}
|
||||
|
||||
text
|
||||
}
|
||||
}
|
||||
|
@ -90,9 +106,9 @@ impl Db {
|
|||
}
|
||||
}
|
||||
|
||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
||||
let mut repos = Vec::new();
|
||||
let mut query = RepoSearchQuery::default();
|
||||
let mut query = forgejo_api::structs::RepoSearchQuery::default();
|
||||
query.limit = Some(50);
|
||||
let mut page: u32 = 1;
|
||||
loop {
|
||||
|
@ -113,9 +129,27 @@ async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
|||
Ok(repos)
|
||||
}
|
||||
|
||||
async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<User>> {
|
||||
async fn scrape_issues(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Issue>> {
|
||||
let mut issues = Vec::new();
|
||||
let mut query = forgejo_api::structs::IssueSearchIssuesQuery::default();
|
||||
query.limit = Some(50);
|
||||
let mut page: u32 = 1;
|
||||
loop {
|
||||
query.page = Some(page);
|
||||
let mut resp = forge.issue_search_issues(query.clone()).await?;
|
||||
if resp.is_empty() {
|
||||
break;
|
||||
}
|
||||
issues.append(&mut resp);
|
||||
page += 1;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
Ok(issues)
|
||||
}
|
||||
|
||||
async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::User>> {
|
||||
let mut users = Vec::new();
|
||||
let mut query = UserSearchQuery::default();
|
||||
let mut query = forgejo_api::structs::UserSearchQuery::default();
|
||||
query.limit = Some(50);
|
||||
let mut page: u32 = 1;
|
||||
loop {
|
||||
|
@ -136,8 +170,10 @@ async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<User>> {
|
|||
Ok(users)
|
||||
}
|
||||
|
||||
async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, UserData>> {
|
||||
async fn get_users_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, UserData>> {
|
||||
let mut data = HashMap::new();
|
||||
|
||||
eprintln!("Fetching users...");
|
||||
for user in scrape_users(&forge).await? {
|
||||
let Some(id) = user.id else {
|
||||
eprintln!("WARN: user with no id");
|
||||
|
@ -153,6 +189,7 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
|||
eprintln!("WARN: missing email for user {id}");
|
||||
continue;
|
||||
};
|
||||
|
||||
data.insert(
|
||||
UserId(id),
|
||||
UserData {
|
||||
|
@ -162,10 +199,12 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
|||
website: user.website,
|
||||
description: user.description,
|
||||
repos: Vec::new(),
|
||||
issues: Vec::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
eprintln!("Fetching repos...");
|
||||
for repo in scrape_repos(&forge).await? {
|
||||
let Some(id) = repo.id else {
|
||||
eprintln!("WARN: repo with no id");
|
||||
|
@ -187,7 +226,7 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
|||
// this currently happens for repos owned by organizations
|
||||
eprintln!(
|
||||
"WARN: repo owner {} for repo {} is not in database",
|
||||
owner.login.unwrap(),
|
||||
owner.login.unwrap_or_default(),
|
||||
repo_name
|
||||
);
|
||||
continue;
|
||||
|
@ -200,6 +239,36 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
|||
}));
|
||||
}
|
||||
|
||||
eprintln!("Fetching issues...");
|
||||
for issue in scrape_issues(&forge).await? {
|
||||
let Some(id) = issue.id else {
|
||||
eprintln!("WARN: issue with no id");
|
||||
continue;
|
||||
};
|
||||
let Some(user) = issue.user else {
|
||||
eprintln!("WARN: issue {} has no owner", id);
|
||||
continue;
|
||||
};
|
||||
let Some(user_id) = user.id else {
|
||||
eprintln!("WARN: user for issue {} has no id", id);
|
||||
continue;
|
||||
};
|
||||
let Some(forge_user) = data.get_mut(&UserId(user_id)) else {
|
||||
eprintln!("WARN: issue user {} {} for issue {} is not in database",
|
||||
user.login.unwrap_or_default(),
|
||||
user_id,
|
||||
issue.html_url.map_or(String::from(""), |url| url.as_str().to_string())
|
||||
);
|
||||
continue;
|
||||
};
|
||||
forge_user.issues.push((
|
||||
IssueId(id),
|
||||
IssueData {
|
||||
title: issue.title.unwrap_or_default(),
|
||||
body: issue.body.unwrap_or_default(),
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
|
@ -221,8 +290,14 @@ async fn main() -> anyhow::Result<()> {
|
|||
Db::new()
|
||||
};
|
||||
|
||||
let forge = Forgejo::new(Auth::None, url::Url::parse("https://git.deuxfleurs.fr")?)?;
|
||||
let data = get_users_repos(&forge).await?;
|
||||
let api_token =
|
||||
std::fs::read_to_string(Path::new("api_token"))?
|
||||
.trim().to_string();
|
||||
let forge = Forgejo::new(
|
||||
Auth::Token(&api_token),
|
||||
url::Url::parse("https://git.deuxfleurs.fr")?
|
||||
)?;
|
||||
let data = get_users_data(&forge).await?;
|
||||
println!("got {} users", data.len());
|
||||
|
||||
for (user_id, user) in data {
|
||||
|
|
Loading…
Reference in a new issue