cache scrape results (no logic to update the cache yet)

This commit is contained in:
Armaël Guéneau 2024-11-19 13:40:24 +01:00
parent 4aa2aeb1fc
commit b57ead4a5c

View file

@ -5,12 +5,14 @@ use bayespam::classifier::Classifier;
use serde::{Serialize, Deserialize}; use serde::{Serialize, Deserialize};
use std::path::Path; use std::path::Path;
use std::fs::File; use std::fs::File;
use std::io::{BufReader, BufWriter};
#[derive(Debug, Hash, PartialEq, Eq)] #[derive(Debug, Hash, PartialEq, Eq)]
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
struct RepoId(i64); struct RepoId(i64);
#[derive(Debug)] #[derive(Debug)]
#[derive(Serialize, Deserialize)]
struct RepoData { struct RepoData {
name: String, name: String,
description: Option<String>, description: Option<String>,
@ -21,6 +23,7 @@ struct RepoData {
struct IssueId(i64); struct IssueId(i64);
#[derive(Debug)] #[derive(Debug)]
#[derive(Serialize, Deserialize)]
struct IssueData { struct IssueData {
title: String, title: String,
body: String, body: String,
@ -31,6 +34,7 @@ struct IssueData {
struct UserId(i64); struct UserId(i64);
#[derive(Debug)] #[derive(Debug)]
#[derive(Serialize, Deserialize)]
struct UserData { struct UserData {
// login: String, // login: String,
email: String, email: String,
@ -284,8 +288,7 @@ async fn main() -> anyhow::Result<()> {
let db_path = Path::new("classification.json"); let db_path = Path::new("classification.json");
let mut db = if db_path.is_file() { let mut db = if db_path.is_file() {
let file = File::open(db_path)?; let file = File::open(db_path)?;
let reader = std::io::BufReader::new(file); serde_json::from_reader(BufReader::new(file))?
serde_json::from_reader(reader)?
} else { } else {
Db::new() Db::new()
}; };
@ -297,7 +300,17 @@ async fn main() -> anyhow::Result<()> {
Auth::Token(&api_token), Auth::Token(&api_token),
url::Url::parse("https://git.deuxfleurs.fr")? url::Url::parse("https://git.deuxfleurs.fr")?
)?; )?;
let data_path = Path::new("data.json");
let data = if data_path.is_file() {
let file = File::open(data_path)?;
serde_json::from_reader(BufReader::new(file))?
} else {
let data = get_users_data(&forge).await?; let data = get_users_data(&forge).await?;
let file = File::create(data_path)?;
serde_json::to_writer(BufWriter::new(file), &db)?;
data
};
println!("got {} users", data.len()); println!("got {} users", data.len());
for (user_id, user) in data { for (user_id, user) in data {
@ -336,8 +349,7 @@ async fn main() -> anyhow::Result<()> {
classifier.save(&mut File::create(model_path)?, false)?; classifier.save(&mut File::create(model_path)?, false)?;
let file = File::create(db_path)?; let file = File::create(db_path)?;
let writer = std::io::BufWriter::new(file); serde_json::to_writer(BufWriter::new(file), &db)?;
serde_json::to_writer(writer, &db)?;
} }
} }