nettext/src/dec/decode.rs

277 lines
8.4 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
use nom::{
branch::alt,
bytes::complete::{tag, take_while, take_while1},
combinator::{map, opt},
multi::{separated_list0, separated_list1},
IResult, InputLength,
};
use crate::dec::{debug, AnyTerm, NonListTerm, Term};
const DICT_OPEN: &[u8] = b"{";
const DICT_CLOSE: &[u8] = b"}";
const DICT_ASSIGN: &[u8] = b"=";
const DICT_DELIM: &[u8] = b",";
const STR_EXTRA_CHARS: &[u8] = b"._-*?";
// ----
/// The error kind returned by the `decode` function.
#[derive(Eq, PartialEq)]
pub enum DecodeError<'a> {
/// Indicates that there is trailing garbage at the end of the decoded string
Garbage(&'a [u8]),
/// Indicates that the entered string does not represent a complete NetText term
IncompleteInput,
/// Indicates a syntax error in the decoded term
NomError(&'a [u8], nom::error::ErrorKind),
}
impl<'a> std::fmt::Debug for DecodeError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
match self {
DecodeError::Garbage(g) => write!(f, "Garbage: `{}`", debug(g)),
DecodeError::IncompleteInput => write!(f, "Incomplete input"),
DecodeError::NomError(s, e) => write!(f, "Nom: {:?}, at: `{}`", e, debug(s)),
}
}
}
/// The result type returned by the `decode` function
pub type DecodeResult<'a, T> = std::result::Result<T, DecodeError<'a>>;
impl<'a> From<nom::Err<nom::error::Error<&'a [u8]>>> for DecodeError<'a> {
fn from(e: nom::Err<nom::error::Error<&'a [u8]>>) -> DecodeError<'a> {
match e {
nom::Err::Incomplete(_) => DecodeError::IncompleteInput,
nom::Err::Error(e) | nom::Err::Failure(e) => DecodeError::NomError(e.input, e.code),
}
}
}
// ----
/// Decodes a NetText string into the term it represents.
2022-11-17 14:02:33 +00:00
pub fn decode(input: &[u8]) -> DecodeResult<'_, Term<'_, 'static>> {
let (rest, term) = decode_term(input)?;
let (end, _) = take_while(is_whitespace)(rest)?;
if !end.is_empty() {
return Err(DecodeError::Garbage(end));
}
Ok(Term(term))
}
2022-11-17 14:02:33 +00:00
fn decode_term(input: &[u8]) -> IResult<&'_ [u8], AnyTerm<'_, 'static>> {
eprintln!("DT: `{}`", debug(input));
let (start, _) = take_while(is_whitespace)(input)?;
eprintln!("DT2: `{}`", debug(start));
let (rest, list) = separated_list1(take_while1(is_whitespace), decode_nonlist_term)(start)?;
eprintln!("DT3: `{}`", debug(rest));
if list.len() == 1 {
Ok((rest, list.into_iter().next().unwrap().into()))
} else {
let raw_len = start.input_len() - rest.input_len();
let list_raw = &start[..raw_len];
Ok((rest, AnyTerm::List(list_raw, list)))
}
}
2022-11-17 14:02:33 +00:00
fn decode_nonlist_term(input: &[u8]) -> IResult<&'_ [u8], NonListTerm<'_, 'static>> {
eprintln!("DNLT: `{}`", debug(input));
let (rest, term) = alt((
map(decode_str, NonListTerm::Str),
map(decode_dict, |(raw, d)| NonListTerm::Dict(raw, d)),
))(input)?;
eprintln!("DNLTend: `{}` {:?}", debug(rest), term);
Ok((rest, term))
}
2022-11-17 14:02:33 +00:00
fn decode_str(input: &[u8]) -> IResult<&'_ [u8], &'_ [u8]> {
eprintln!("DS: `{}`", debug(input));
let (rest, data) = take_while1(is_string_char)(input)?;
Ok((rest, data))
}
type DictType<'a> = (&'a [u8], HashMap<&'a [u8], AnyTerm<'a, 'static>>);
2022-11-17 14:02:33 +00:00
fn decode_dict(dict_begin: &[u8]) -> IResult<&'_ [u8], DictType<'_>> {
eprintln!("DDbegin: `{}`", debug(dict_begin));
let (d, _) = tag(DICT_OPEN)(dict_begin)?;
eprintln!("DD2: `{}`", debug(d));
let (d, items) = separated_list0(dict_separator, decode_dict_item)(d)?;
eprintln!("DD3: `{}`", debug(d));
let (d, _) = opt(dict_separator)(d)?;
let (d, _) = take_while(is_whitespace)(d)?;
eprintln!("DD4: `{}`", debug(d));
let (dict_end, _) = tag(DICT_CLOSE)(d)?;
eprintln!("DDend: `{}`", debug(dict_end));
let dict = items.into_iter().collect::<HashMap<_, _>>();
let raw_len = dict_begin.input_len() - dict_end.input_len();
let dict_raw = &dict_begin[..raw_len];
Ok((dict_end, (dict_raw, dict)))
}
2022-11-17 14:02:33 +00:00
fn dict_separator(d: &[u8]) -> IResult<&'_ [u8], ()> {
let (d, _) = take_while(is_whitespace)(d)?;
let (d, _) = tag(DICT_DELIM)(d)?;
Ok((d, ()))
}
2022-11-17 14:02:33 +00:00
fn decode_dict_item(d: &[u8]) -> IResult<&'_ [u8], (&'_ [u8], AnyTerm<'_, 'static>)> {
eprintln!("DDI: `{}`", debug(d));
let (d, _) = take_while(is_whitespace)(d)?;
eprintln!("DDI1: `{}`", debug(d));
let (d, key) = decode_str(d)?;
eprintln!("DDI2: `{}`", debug(d));
let (d, _) = take_while(is_whitespace)(d)?;
let (d, _) = tag(DICT_ASSIGN)(d)?;
eprintln!("DDI3: `{}`", debug(d));
let (d, value) = decode_term(d)?;
eprintln!("DDI4: `{}`", debug(d));
Ok((d, (key, value)))
}
fn is_string_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
}
fn is_whitespace(c: u8) -> bool {
c.is_ascii_whitespace()
}
// ----
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_str() {
let bytes = b" plop ";
assert_eq!(decode(bytes), Ok(AnyTerm::Str(b"plop").into()));
}
#[test]
fn list_of_str_str() {
let bytes = b" plop plap plip ploup ";
assert_eq!(
decode(bytes),
Ok(AnyTerm::List(
b"plop plap plip ploup",
vec![
NonListTerm::Str(b"plop"),
NonListTerm::Str(b"plap"),
NonListTerm::Str(b"plip"),
NonListTerm::Str(b"ploup"),
]
)
.into())
);
}
#[test]
fn simple_dict() {
let bytes = b" { aze = hello, by = bojzkz pipo, ccde = ke } ";
assert_eq!(
decode(bytes),
Ok(AnyTerm::Dict(
b"{ aze = hello, by = bojzkz pipo, ccde = ke }",
[
(&b"aze"[..], AnyTerm::Str(b"hello")),
(
&b"by"[..],
AnyTerm::List(
b"bojzkz pipo",
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
)
),
(&b"ccde"[..], AnyTerm::Str(b"ke")),
]
.into_iter()
.collect()
)
.into())
);
}
#[test]
fn simple_dict_2() {
let bytes = b" { aze = hello, by = bojzkz pipo , ccde = ke , } ";
assert_eq!(
decode(bytes),
Ok(AnyTerm::Dict(
b"{ aze = hello, by = bojzkz pipo , ccde = ke , }",
[
(&b"aze"[..], AnyTerm::Str(b"hello")),
(
&b"by"[..],
AnyTerm::List(
b"bojzkz pipo",
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
)
),
(&b"ccde"[..], AnyTerm::Str(b"ke")),
]
.into_iter()
.collect()
)
.into())
);
}
#[test]
fn real_world_1() {
let bytes = b"HEAD alexpubkey";
assert_eq!(
decode(bytes),
Ok(AnyTerm::List(
b"HEAD alexpubkey",
vec![NonListTerm::Str(b"HEAD"), NonListTerm::Str(b"alexpubkey")]
)
.into()),
);
}
#[test]
fn real_world_2() {
let bytes = b"STANCE sthash stsign { author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }";
assert_eq!(
decode(bytes),
Ok(AnyTerm::List(
&bytes[..],
vec![
NonListTerm::Str(b"STANCE"),
NonListTerm::Str(b"sthash"),
NonListTerm::Str(b"stsign"),
NonListTerm::Dict(b"{ author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }",
[
(&b"author"[..], AnyTerm::Str(b"alexpubkey")),
(&b"height"[..], AnyTerm::Str(b"12")),
(&b"parent"[..], AnyTerm::Str(b"parenthash")),
(&b"data"[..], AnyTerm::List(
b"MESSAGE { text = hello }",
vec![
NonListTerm::Str(b"MESSAGE"),
NonListTerm::Dict(
b"{ text = hello }",
[
(&b"text"[..], AnyTerm::Str(b"hello")),
]
.into_iter()
.collect()
)
]
))
].into_iter().collect()
),
]).into(),
));
}
}