forgery/src/data.rs

use crate::classifier::Classifier;
use serde::{Deserialize, Serialize};

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub struct UserId(pub i64);

#[derive(Debug, Serialize, Deserialize)]
pub struct UserData {
    pub login: String,
    pub email: String,
    pub full_name: Option<String>,
    pub location: Option<String>,
    pub website: Option<String>,
    pub description: Option<String>,
    // TODO: visibility
    pub repos: Vec<(RepoId, RepoData)>,
    pub issues: Vec<(IssueId, IssueData)>,
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub struct RepoId(pub i64);

#[derive(Debug, Serialize, Deserialize)]
pub struct RepoData {
    pub name: String,
    pub description: Option<String>,
}

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub struct IssueId(pub i64);

#[derive(Debug, Serialize, Deserialize)]
pub struct IssueData {
    pub title: String,
    pub body: String,
}

impl UserData {
    pub fn is_empty(&self) -> bool {
        self.full_name.is_none()
            && self.location.is_none()
            && self.website.is_none()
            && self.description.is_none()
            && self.repos.is_empty()
            && self.issues.is_empty()
    }

    pub fn to_tokens(&self) -> Vec<String> {
        let mut text = String::new();
        let mut add = |s: &str| {
            text += s;
            text += " "
        };

        for email_part in self.email.split('@') {
            add(email_part)
        }

        match &self.location {
            Some(s) => add(&s),
            None => add("__NO_LOCATION__"),
        }

        match &self.website {
            Some(s) => add(&s),
            None => add("__NO_WEBSITE__"),
        }

        match &self.description {
            Some(s) => add(&s),
            None => add("__NO_USER_DESCRIPTION__"),
        }

        for (_id, repo) in &self.repos {
            add(&repo.name);
            match &repo.description {
                Some(s) => add(s),
                None => add("__NO_REPO_DESCRIPTION__"),
            }
        }

        for (_id, issue) in &self.issues {
            add(&issue.title);
            add(&issue.body);
        }

        Classifier::into_word_list(&text)
    }
}
refactoring: split off parts of main.rs into auxiliary files 2024-12-18 08:28:07 +00:00			`use crate::classifier::Classifier;`
cargo fmt 2024-12-19 11:49:58 +00:00			`use serde::{Deserialize, Serialize};`
refactoring: split off parts of main.rs into auxiliary files 2024-12-18 08:28:07 +00:00
			`#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]`
			`pub struct UserId(pub i64);`

			`#[derive(Debug, Serialize, Deserialize)]`
			`pub struct UserData {`
			`pub login: String,`
			`pub email: String,`
			`pub full_name: Option<String>,`
			`pub location: Option<String>,`
			`pub website: Option<String>,`
			`pub description: Option<String>,`
			`// TODO: visibility`
			`pub repos: Vec<(RepoId, RepoData)>,`
			`pub issues: Vec<(IssueId, IssueData)>,`
			`}`

			`#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]`
			`pub struct RepoId(pub i64);`

			`#[derive(Debug, Serialize, Deserialize)]`
			`pub struct RepoData {`
			`pub name: String,`
			`pub description: Option<String>,`
			`}`

			`#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]`
			`pub struct IssueId(pub i64);`

			`#[derive(Debug, Serialize, Deserialize)]`
			`pub struct IssueData {`
			`pub title: String,`
			`pub body: String,`
			`}`

			`impl UserData {`
			`pub fn is_empty(&self) -> bool {`
			`self.full_name.is_none()`
			`&& self.location.is_none()`
			`&& self.website.is_none()`
			`&& self.description.is_none()`
			`&& self.repos.is_empty()`
			`&& self.issues.is_empty()`
			`}`

			`pub fn to_tokens(&self) -> Vec<String> {`
			`let mut text = String::new();`
			`let mut add = \|s: &str\| {`
			`text += s;`
			`text += " "`
			`};`

			`for email_part in self.email.split('@') {`
			`add(email_part)`
			`}`

			`match &self.location {`
			`Some(s) => add(&s),`
			`None => add("__NO_LOCATION__"),`
			`}`

			`match &self.website {`
			`Some(s) => add(&s),`
			`None => add("__NO_WEBSITE__"),`
			`}`

			`match &self.description {`
			`Some(s) => add(&s),`
			`None => add("__NO_USER_DESCRIPTION__"),`
			`}`

			`for (_id, repo) in &self.repos {`
			`add(&repo.name);`
			`match &repo.description {`
			`Some(s) => add(s),`
			`None => add("__NO_REPO_DESCRIPTION__"),`
			`}`
			`}`

			`for (_id, issue) in &self.issues {`
			`add(&issue.title);`
			`add(&issue.body);`
			`}`

			`Classifier::into_word_list(&text)`
			`}`
			`}`