also fetch issues created by users
This commit is contained in:
parent
0f8368031f
commit
4aa2aeb1fc
4 changed files with 87 additions and 11 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1 +1,4 @@
|
||||||
/target
|
/target
|
||||||
|
classification.json
|
||||||
|
model.json
|
||||||
|
api_token
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
{"users":{"5847":"Spam","5637":"Spam","4640":"Spam","3590":"Spam","137":"Legit","2176":"Spam","3489":"Spam","4357":"Spam","1985":"Legit","1905":"Spam","4683":"Spam","5006":"Spam","4248":"Spam","4780":"Spam","1790":"Spam","5778":"Spam","2101":"Spam","768":"Legit","2117":"Spam","5516":"Spam","1552":"Legit","946":"Legit","5968":"Spam","3077":"Spam","1376":"Legit","5571":"Spam","4832":"Spam","5513":"Spam","5620":"Spam","3879":"Spam","5366":"Spam","3299":"Spam","12":"Legit","4940":"Spam","5611":"Spam","5524":"Spam","3760":"Spam","4759":"Spam","5184":"Spam","400":"Legit","5695":"Spam","4629":"Spam","5235":"Spam"}}
|
|
File diff suppressed because one or more lines are too long
93
src/main.rs
93
src/main.rs
|
@ -1,4 +1,3 @@
|
||||||
use forgejo_api::structs::{RepoSearchQuery, Repository, User, UserSearchQuery};
|
|
||||||
use forgejo_api::{Auth, Forgejo};
|
use forgejo_api::{Auth, Forgejo};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
@ -17,6 +16,16 @@ struct RepoData {
|
||||||
description: Option<String>,
|
description: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct IssueId(i64);
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct IssueData {
|
||||||
|
title: String,
|
||||||
|
body: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
struct UserId(i64);
|
struct UserId(i64);
|
||||||
|
@ -28,7 +37,9 @@ struct UserData {
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
website: Option<String>,
|
website: Option<String>,
|
||||||
description: Option<String>,
|
description: Option<String>,
|
||||||
|
// TODO: visibility
|
||||||
repos: Vec<(RepoId, RepoData)>,
|
repos: Vec<(RepoId, RepoData)>,
|
||||||
|
issues: Vec<(IssueId, IssueData)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
@ -78,6 +89,11 @@ impl UserData {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (_id, issue) in &self.issues {
|
||||||
|
add(&issue.title);
|
||||||
|
add(&issue.body);
|
||||||
|
}
|
||||||
|
|
||||||
text
|
text
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -90,9 +106,9 @@ impl Db {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Repository>> {
|
||||||
let mut repos = Vec::new();
|
let mut repos = Vec::new();
|
||||||
let mut query = RepoSearchQuery::default();
|
let mut query = forgejo_api::structs::RepoSearchQuery::default();
|
||||||
query.limit = Some(50);
|
query.limit = Some(50);
|
||||||
let mut page: u32 = 1;
|
let mut page: u32 = 1;
|
||||||
loop {
|
loop {
|
||||||
|
@ -113,9 +129,27 @@ async fn scrape_repos(forge: &Forgejo) -> anyhow::Result<Vec<Repository>> {
|
||||||
Ok(repos)
|
Ok(repos)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<User>> {
|
async fn scrape_issues(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::Issue>> {
|
||||||
|
let mut issues = Vec::new();
|
||||||
|
let mut query = forgejo_api::structs::IssueSearchIssuesQuery::default();
|
||||||
|
query.limit = Some(50);
|
||||||
|
let mut page: u32 = 1;
|
||||||
|
loop {
|
||||||
|
query.page = Some(page);
|
||||||
|
let mut resp = forge.issue_search_issues(query.clone()).await?;
|
||||||
|
if resp.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
issues.append(&mut resp);
|
||||||
|
page += 1;
|
||||||
|
sleep(Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
Ok(issues)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<forgejo_api::structs::User>> {
|
||||||
let mut users = Vec::new();
|
let mut users = Vec::new();
|
||||||
let mut query = UserSearchQuery::default();
|
let mut query = forgejo_api::structs::UserSearchQuery::default();
|
||||||
query.limit = Some(50);
|
query.limit = Some(50);
|
||||||
let mut page: u32 = 1;
|
let mut page: u32 = 1;
|
||||||
loop {
|
loop {
|
||||||
|
@ -136,8 +170,10 @@ async fn scrape_users(forge: &Forgejo) -> anyhow::Result<Vec<User>> {
|
||||||
Ok(users)
|
Ok(users)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, UserData>> {
|
async fn get_users_data(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, UserData>> {
|
||||||
let mut data = HashMap::new();
|
let mut data = HashMap::new();
|
||||||
|
|
||||||
|
eprintln!("Fetching users...");
|
||||||
for user in scrape_users(&forge).await? {
|
for user in scrape_users(&forge).await? {
|
||||||
let Some(id) = user.id else {
|
let Some(id) = user.id else {
|
||||||
eprintln!("WARN: user with no id");
|
eprintln!("WARN: user with no id");
|
||||||
|
@ -153,6 +189,7 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
||||||
eprintln!("WARN: missing email for user {id}");
|
eprintln!("WARN: missing email for user {id}");
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
data.insert(
|
data.insert(
|
||||||
UserId(id),
|
UserId(id),
|
||||||
UserData {
|
UserData {
|
||||||
|
@ -162,10 +199,12 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
||||||
website: user.website,
|
website: user.website,
|
||||||
description: user.description,
|
description: user.description,
|
||||||
repos: Vec::new(),
|
repos: Vec::new(),
|
||||||
|
issues: Vec::new(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eprintln!("Fetching repos...");
|
||||||
for repo in scrape_repos(&forge).await? {
|
for repo in scrape_repos(&forge).await? {
|
||||||
let Some(id) = repo.id else {
|
let Some(id) = repo.id else {
|
||||||
eprintln!("WARN: repo with no id");
|
eprintln!("WARN: repo with no id");
|
||||||
|
@ -187,7 +226,7 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
||||||
// this currently happens for repos owned by organizations
|
// this currently happens for repos owned by organizations
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"WARN: repo owner {} for repo {} is not in database",
|
"WARN: repo owner {} for repo {} is not in database",
|
||||||
owner.login.unwrap(),
|
owner.login.unwrap_or_default(),
|
||||||
repo_name
|
repo_name
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
|
@ -200,6 +239,36 @@ async fn get_users_repos(forge: &Forgejo) -> anyhow::Result<HashMap<UserId, User
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eprintln!("Fetching issues...");
|
||||||
|
for issue in scrape_issues(&forge).await? {
|
||||||
|
let Some(id) = issue.id else {
|
||||||
|
eprintln!("WARN: issue with no id");
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(user) = issue.user else {
|
||||||
|
eprintln!("WARN: issue {} has no owner", id);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(user_id) = user.id else {
|
||||||
|
eprintln!("WARN: user for issue {} has no id", id);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(forge_user) = data.get_mut(&UserId(user_id)) else {
|
||||||
|
eprintln!("WARN: issue user {} {} for issue {} is not in database",
|
||||||
|
user.login.unwrap_or_default(),
|
||||||
|
user_id,
|
||||||
|
issue.html_url.map_or(String::from(""), |url| url.as_str().to_string())
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
forge_user.issues.push((
|
||||||
|
IssueId(id),
|
||||||
|
IssueData {
|
||||||
|
title: issue.title.unwrap_or_default(),
|
||||||
|
body: issue.body.unwrap_or_default(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
Ok(data)
|
Ok(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -221,8 +290,14 @@ async fn main() -> anyhow::Result<()> {
|
||||||
Db::new()
|
Db::new()
|
||||||
};
|
};
|
||||||
|
|
||||||
let forge = Forgejo::new(Auth::None, url::Url::parse("https://git.deuxfleurs.fr")?)?;
|
let api_token =
|
||||||
let data = get_users_repos(&forge).await?;
|
std::fs::read_to_string(Path::new("api_token"))?
|
||||||
|
.trim().to_string();
|
||||||
|
let forge = Forgejo::new(
|
||||||
|
Auth::Token(&api_token),
|
||||||
|
url::Url::parse("https://git.deuxfleurs.fr")?
|
||||||
|
)?;
|
||||||
|
let data = get_users_data(&forge).await?;
|
||||||
println!("got {} users", data.len());
|
println!("got {} users", data.len());
|
||||||
|
|
||||||
for (user_id, user) in data {
|
for (user_id, user) in data {
|
||||||
|
|
Loading…
Reference in a new issue