nettext/src/dec/decode.rs

268 lines
7.8 KiB
Rust

use std::collections::HashMap;
use nom::{
branch::alt,
bytes::complete::{tag, take_while, take_while1},
combinator::{opt, map},
multi::{separated_list0, separated_list1},
IResult,
InputLength,
};
use crate::dec::{NonListTerm, Term, debug};
const DICT_OPEN: &[u8] = b"{";
const DICT_CLOSE: &[u8] = b"}";
const DICT_ASSIGN: &[u8] = b"=";
const DICT_DELIM: &[u8] = b",";
const STR_EXTRA_CHARS: &[u8] = b"._-*?";
// ----
#[derive(Eq, PartialEq)]
pub enum Error<'a> {
Garbage(&'a [u8]),
IncompleteInput,
NomError(&'a [u8], nom::error::ErrorKind),
}
impl<'a> std::fmt::Debug for Error<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
match self {
Error::Garbage(g) => write!(f, "Garbage: `{}`", debug(g)),
Error::IncompleteInput => write!(f, "Incomplete input"),
Error::NomError(s, e) => write!(f, "Nom: {:?}, at: `{}`", e, debug(s)),
}
}
}
pub type Result<'a, T> = std::result::Result<T, Error<'a>>;
impl<'a> From<nom::Err<nom::error::Error<&'a [u8]>>> for Error<'a> {
fn from(e: nom::Err<nom::error::Error<&'a [u8]>>) -> Error<'a> {
match e {
nom::Err::Incomplete(_) => Error::IncompleteInput,
nom::Err::Error(e) | nom::Err::Failure(e) => Error::NomError(e.input, e.code),
}
}
}
// ----
pub fn decode<'a>(input: &'a [u8]) -> Result<'a, Term<'a>> {
let (rest, term) = decode_term(input)?;
let (end, _) = take_while(is_whitespace)(rest)?;
if !end.is_empty() {
return Err(Error::Garbage(end));
}
Ok(term)
}
pub fn decode_term<'a>(input: &'a [u8]) -> IResult<&'a [u8], Term<'a>> {
eprintln!("DT: `{}`", debug(input));
let (start, _) = take_while(is_whitespace)(input)?;
eprintln!("DT2: `{}`", debug(start));
let (rest, list) = separated_list1(take_while1(is_whitespace), decode_nonlist_term)(start)?;
eprintln!("DT3: `{}`", debug(rest));
if list.len() == 1 {
Ok((rest, list.into_iter().next().unwrap().into()))
} else {
let raw_len = start.input_len() - rest.input_len();
let list_raw = &start[..raw_len];
Ok((rest, Term::List(list_raw, list)))
}
}
pub fn decode_nonlist_term<'a>(input: &'a [u8]) -> IResult<&'a [u8], NonListTerm<'a>> {
eprintln!("DNLT: `{}`", debug(input));
let (rest, term) = alt((
map(decode_str, NonListTerm::Str),
map(decode_dict, |(raw, d)| NonListTerm::Dict(raw, d)),
))(input)?;
eprintln!("DNLTend: `{}` {:?}", debug(rest), term);
Ok((rest, term))
}
fn decode_str<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
eprintln!("DS: `{}`", debug(input));
let (rest, data) = take_while1(is_string_char)(input)?;
Ok((rest, data))
}
type DictType<'a> = (&'a [u8], HashMap<&'a [u8], Term<'a>>);
fn decode_dict<'a>(dict_begin: &'a [u8]) -> IResult<&'a [u8], DictType<'a>> {
eprintln!("DDbegin: `{}`", debug(dict_begin));
let (d, _) = tag(DICT_OPEN)(dict_begin)?;
eprintln!("DD2: `{}`", debug(d));
let (d, items) = separated_list0(dict_separator, decode_dict_item)(d)?;
eprintln!("DD3: `{}`", debug(d));
let (d, _) = opt(dict_separator)(d)?;
let (d, _) = take_while(is_whitespace)(d)?;
eprintln!("DD4: `{}`", debug(d));
let (dict_end, _) = tag(DICT_CLOSE)(d)?;
eprintln!("DDend: `{}`", debug(dict_end));
let dict = items.into_iter().collect::<HashMap<_, _>>();
let raw_len = dict_begin.input_len() - dict_end.input_len();
let dict_raw = &dict_begin[..raw_len];
Ok((dict_end, (dict_raw, dict)))
}
fn dict_separator<'a>(d: &'a [u8]) -> IResult<&'a [u8], ()> {
let (d, _) = take_while(is_whitespace)(d)?;
let (d, _) = tag(DICT_DELIM)(d)?;
Ok((d, ()))
}
fn decode_dict_item<'a>(d: &'a [u8]) -> IResult<&'a [u8], (&'a [u8], Term<'a>)> {
eprintln!("DDI: `{}`", debug(d));
let (d, _) = take_while(is_whitespace)(d)?;
eprintln!("DDI1: `{}`", debug(d));
let (d, key) = decode_str(d)?;
eprintln!("DDI2: `{}`", debug(d));
let (d, _) = take_while(is_whitespace)(d)?;
let (d, _) = tag(DICT_ASSIGN)(d)?;
eprintln!("DDI3: `{}`", debug(d));
let (d, value) = decode_term(d)?;
eprintln!("DDI4: `{}`", debug(d));
Ok((d, (key, value)))
}
fn is_string_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
}
fn is_whitespace(c: u8) -> bool {
c.is_ascii_whitespace()
}
// ----
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_str() {
let bytes = b" plop ";
assert_eq!(decode(bytes), Ok(Term::Str(b"plop")));
}
#[test]
fn list_of_str_str() {
let bytes = b" plop plap plip ploup ";
assert_eq!(
decode(bytes),
Ok(Term::List(
b"plop plap plip ploup",
vec![
NonListTerm::Str(b"plop"),
NonListTerm::Str(b"plap"),
NonListTerm::Str(b"plip"),
NonListTerm::Str(b"ploup"),
]
))
);
}
#[test]
fn simple_dict() {
let bytes = b" { aze = hello, by = bojzkz pipo, ccde = ke } ";
assert_eq!(
decode(bytes),
Ok(Term::Dict(
b"{ aze = hello, by = bojzkz pipo, ccde = ke }",
[
(&b"aze"[..], Term::Str(b"hello")),
(
&b"by"[..],
Term::List(
b"bojzkz pipo",
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
)
),
(&b"ccde"[..], Term::Str(b"ke")),
]
.into_iter()
.collect()
))
);
}
#[test]
fn simple_dict_2() {
let bytes = b" { aze = hello, by = bojzkz pipo , ccde = ke , } ";
assert_eq!(
decode(bytes),
Ok(Term::Dict(
b"{ aze = hello, by = bojzkz pipo , ccde = ke , }",
[
(&b"aze"[..], Term::Str(b"hello")),
(
&b"by"[..],
Term::List(
b"bojzkz pipo",
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
)
),
(&b"ccde"[..], Term::Str(b"ke")),
]
.into_iter()
.collect()
))
);
}
#[test]
fn real_world_1() {
let bytes = b"HEAD alexpubkey";
assert_eq!(
decode(bytes),
Ok(Term::List(
b"HEAD alexpubkey",
vec![NonListTerm::Str(b"HEAD"), NonListTerm::Str(b"alexpubkey")]
)),
);
}
#[test]
fn real_world_2() {
let bytes = b"STANCE sthash stsign { author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }";
assert_eq!(
decode(bytes),
Ok(Term::List(
&bytes[..],
vec![
NonListTerm::Str(b"STANCE"),
NonListTerm::Str(b"sthash"),
NonListTerm::Str(b"stsign"),
NonListTerm::Dict(b"{ author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }",
[
(&b"author"[..], Term::Str(b"alexpubkey")),
(&b"height"[..], Term::Str(b"12")),
(&b"parent"[..], Term::Str(b"parenthash")),
(&b"data"[..], Term::List(
b"MESSAGE { text = hello }",
vec![
NonListTerm::Str(b"MESSAGE"),
NonListTerm::Dict(
b"{ text = hello }",
[
(&b"text"[..], Term::Str(b"hello")),
]
.into_iter()
.collect()
)
]
))
].into_iter().collect()
),
])),
);
}
}