forgery/src/data.rs

90 lines
2.2 KiB
Rust
Raw Permalink Normal View History

use crate::classifier::Classifier;
2024-12-19 11:49:58 +00:00
use serde::{Deserialize, Serialize};
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub struct UserId(pub i64);
#[derive(Debug, Serialize, Deserialize)]
pub struct UserData {
pub login: String,
pub email: String,
pub full_name: Option<String>,
pub location: Option<String>,
pub website: Option<String>,
pub description: Option<String>,
// TODO: visibility
pub repos: Vec<(RepoId, RepoData)>,
pub issues: Vec<(IssueId, IssueData)>,
}
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub struct RepoId(pub i64);
#[derive(Debug, Serialize, Deserialize)]
pub struct RepoData {
pub name: String,
pub description: Option<String>,
}
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub struct IssueId(pub i64);
#[derive(Debug, Serialize, Deserialize)]
pub struct IssueData {
pub title: String,
pub body: String,
}
impl UserData {
pub fn is_empty(&self) -> bool {
self.full_name.is_none()
&& self.location.is_none()
&& self.website.is_none()
&& self.description.is_none()
&& self.repos.is_empty()
&& self.issues.is_empty()
}
pub fn to_tokens(&self) -> Vec<String> {
let mut text = String::new();
let mut add = |s: &str| {
text += s;
text += " "
};
for email_part in self.email.split('@') {
add(email_part)
}
match &self.location {
Some(s) => add(&s),
None => add("__NO_LOCATION__"),
}
match &self.website {
Some(s) => add(&s),
None => add("__NO_WEBSITE__"),
}
match &self.description {
Some(s) => add(&s),
None => add("__NO_USER_DESCRIPTION__"),
}
for (_id, repo) in &self.repos {
add(&repo.name);
match &repo.description {
Some(s) => add(s),
None => add("__NO_REPO_DESCRIPTION__"),
}
}
for (_id, issue) in &self.issues {
add(&issue.title);
add(&issue.body);
}
Classifier::into_word_list(&text)
}
}