First commit including a decoder and a readme file
This commit is contained in:
commit
911da57d74
6 changed files with 427 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
/Cargo.lock
|
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
[package]
|
||||||
|
name = "nettext"
|
||||||
|
description = "A text-based data format for cryptographic network protocols"
|
||||||
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
license = "AGPL-3.0"
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
nom = "7.1"
|
74
README.md
Normal file
74
README.md
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# NetText
|
||||||
|
|
||||||
|
A text-based data format for cryptographic network protocols.
|
||||||
|
|
||||||
|
## Principles
|
||||||
|
|
||||||
|
- Only uses a limited subset of ASCII characters
|
||||||
|
- Has a minimal set of fundamental data types
|
||||||
|
- Retains the raw representation of complex data structures for hashing and cryptographic signing
|
||||||
|
- Minimal value data type: a string type that can only be used to represent identifiers, numbers and base64-encoded byte strings.
|
||||||
|
|
||||||
|
## Fundamental types
|
||||||
|
|
||||||
|
A term can be of any of the following kinds:
|
||||||
|
|
||||||
|
- a string, which may contain only ASCII alphanumeric terms and `.-_*?`
|
||||||
|
- a dict, which maps strings (as defined above) to any term type
|
||||||
|
- a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
|
||||||
|
|
||||||
|
Dicts are represented as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
key1 = value1,
|
||||||
|
key2 = value2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Lists are represented as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
term1 term2 term3
|
||||||
|
```
|
||||||
|
|
||||||
|
As a consequence, complex data structures can be defined as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
SENDTO alex {
|
||||||
|
topic = blah,
|
||||||
|
body = blah blah
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The raw representation of a parsed dict or list is retained for hashing purposes.
|
||||||
|
It in the sequence of bytes, in the encoded string, trimmed from whitespace at extremities,
|
||||||
|
that represents the encoded dict or list in that string.
|
||||||
|
|
||||||
|
In the complex stance example above, here are the lists and dicts and their raw representation:
|
||||||
|
|
||||||
|
- the toplevel term is a list, whose raw representation is the entire encoded string (assuming no whitespace at beginning or end)
|
||||||
|
- the third term of the list is a dict, whose raw representation starts at `{` and ends at `}`
|
||||||
|
- the second mapping of the dict is a list, whose raw representation is exactly `blah blah`.
|
||||||
|
|
||||||
|
Since strings cannot contain whitespace, they are always equivalent to their raw representation.
|
||||||
|
|
||||||
|
## Structural mappings
|
||||||
|
|
||||||
|
Terms can be interpreted in a number of different ways, depending on the context:
|
||||||
|
|
||||||
|
- RAW: the term is interpreted as its raw encoding (see above)
|
||||||
|
- STRING: if the term is a string or a list composed exclusively of strings, the term is interpreted as its raw encoding
|
||||||
|
- VARIANT: if the term is a list whose first item is a string, it is interpreted as a variant with the following properties:
|
||||||
|
- a discriminator (the first item)
|
||||||
|
- a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
|
||||||
|
- DICT: if the term is a dict, interpret it as such
|
||||||
|
- LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
|
||||||
|
|
||||||
|
## Data mappings
|
||||||
|
|
||||||
|
Terms further have mappings as different data types:
|
||||||
|
|
||||||
|
- BYTES: if the term maps as a STRING, decode it using base64
|
||||||
|
- INT: if the term maps as a STRING, decode it as an integer written in decimal notation
|
||||||
|
- HASH, PUBKEY, SECKEY, SIGNATURE, ENCKEY, DECKEY, SYMKEY: a bunch of things that interpret BYTES as specific cryptographic items
|
267
src/dec/decode.rs
Normal file
267
src/dec/decode.rs
Normal file
|
@ -0,0 +1,267 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use nom::{
|
||||||
|
branch::alt,
|
||||||
|
bytes::complete::{tag, take_while, take_while1},
|
||||||
|
combinator::{opt, map},
|
||||||
|
multi::{separated_list0, separated_list1},
|
||||||
|
IResult,
|
||||||
|
InputLength,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::dec::{NonListTerm, Term, debug};
|
||||||
|
|
||||||
|
const DICT_OPEN: &[u8] = b"{";
|
||||||
|
const DICT_CLOSE: &[u8] = b"}";
|
||||||
|
const DICT_ASSIGN: &[u8] = b"=";
|
||||||
|
const DICT_DELIM: &[u8] = b",";
|
||||||
|
const STR_EXTRA_CHARS: &[u8] = b"._-*?";
|
||||||
|
|
||||||
|
// ----
|
||||||
|
|
||||||
|
#[derive(Eq, PartialEq)]
|
||||||
|
pub enum Error<'a> {
|
||||||
|
Garbage(&'a [u8]),
|
||||||
|
IncompleteInput,
|
||||||
|
NomError(&'a [u8], nom::error::ErrorKind),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> std::fmt::Debug for Error<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
|
||||||
|
match self {
|
||||||
|
Error::Garbage(g) => write!(f, "Garbage: `{}`", debug(g)),
|
||||||
|
Error::IncompleteInput => write!(f, "Incomplete input"),
|
||||||
|
Error::NomError(s, e) => write!(f, "Nom: {:?}, at: `{}`", e, debug(s)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Result<'a, T> = std::result::Result<T, Error<'a>>;
|
||||||
|
|
||||||
|
impl<'a> From<nom::Err<nom::error::Error<&'a [u8]>>> for Error<'a> {
|
||||||
|
fn from(e: nom::Err<nom::error::Error<&'a [u8]>>) -> Error<'a> {
|
||||||
|
match e {
|
||||||
|
nom::Err::Incomplete(_) => Error::IncompleteInput,
|
||||||
|
nom::Err::Error(e) | nom::Err::Failure(e) => Error::NomError(e.input, e.code),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----
|
||||||
|
|
||||||
|
pub fn decode<'a>(input: &'a [u8]) -> Result<'a, Term<'a>> {
|
||||||
|
let (rest, term) = decode_term(input)?;
|
||||||
|
let (end, _) = take_while(is_whitespace)(rest)?;
|
||||||
|
if !end.is_empty() {
|
||||||
|
return Err(Error::Garbage(end));
|
||||||
|
}
|
||||||
|
Ok(term)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_term<'a>(input: &'a [u8]) -> IResult<&'a [u8], Term<'a>> {
|
||||||
|
eprintln!("DT: `{}`", debug(input));
|
||||||
|
let (start, _) = take_while(is_whitespace)(input)?;
|
||||||
|
eprintln!("DT2: `{}`", debug(start));
|
||||||
|
let (rest, list) = separated_list1(take_while1(is_whitespace), decode_nonlist_term)(start)?;
|
||||||
|
eprintln!("DT3: `{}`", debug(rest));
|
||||||
|
|
||||||
|
if list.len() == 1 {
|
||||||
|
Ok((rest, list.into_iter().next().unwrap().into()))
|
||||||
|
} else {
|
||||||
|
let raw_len = start.input_len() - rest.input_len();
|
||||||
|
let list_raw = &start[..raw_len];
|
||||||
|
Ok((rest, Term::List(list_raw, list)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_nonlist_term<'a>(input: &'a [u8]) -> IResult<&'a [u8], NonListTerm<'a>> {
|
||||||
|
eprintln!("DNLT: `{}`", debug(input));
|
||||||
|
let (rest, term) = alt((
|
||||||
|
map(decode_str, NonListTerm::Str),
|
||||||
|
map(decode_dict, |(raw, d)| NonListTerm::Dict(raw, d)),
|
||||||
|
))(input)?;
|
||||||
|
eprintln!("DNLTend: `{}` {:?}", debug(rest), term);
|
||||||
|
Ok((rest, term))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_str<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||||
|
eprintln!("DS: `{}`", debug(input));
|
||||||
|
let (rest, data) = take_while1(is_string_char)(input)?;
|
||||||
|
Ok((rest, data))
|
||||||
|
}
|
||||||
|
|
||||||
|
type DictType<'a> = (&'a [u8], HashMap<&'a [u8], Term<'a>>);
|
||||||
|
|
||||||
|
fn decode_dict<'a>(dict_begin: &'a [u8]) -> IResult<&'a [u8], DictType<'a>> {
|
||||||
|
eprintln!("DDbegin: `{}`", debug(dict_begin));
|
||||||
|
let (d, _) = tag(DICT_OPEN)(dict_begin)?;
|
||||||
|
eprintln!("DD2: `{}`", debug(d));
|
||||||
|
let (d, items) = separated_list0(dict_separator, decode_dict_item)(d)?;
|
||||||
|
eprintln!("DD3: `{}`", debug(d));
|
||||||
|
let (d, _) = opt(dict_separator)(d)?;
|
||||||
|
let (d, _) = take_while(is_whitespace)(d)?;
|
||||||
|
eprintln!("DD4: `{}`", debug(d));
|
||||||
|
let (dict_end, _) = tag(DICT_CLOSE)(d)?;
|
||||||
|
eprintln!("DDend: `{}`", debug(dict_end));
|
||||||
|
|
||||||
|
let dict = items.into_iter().collect::<HashMap<_, _>>();
|
||||||
|
|
||||||
|
let raw_len = dict_begin.input_len() - dict_end.input_len();
|
||||||
|
let dict_raw = &dict_begin[..raw_len];
|
||||||
|
|
||||||
|
Ok((dict_end, (dict_raw, dict)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dict_separator<'a>(d: &'a [u8]) -> IResult<&'a [u8], ()> {
|
||||||
|
let (d, _) = take_while(is_whitespace)(d)?;
|
||||||
|
let (d, _) = tag(DICT_DELIM)(d)?;
|
||||||
|
Ok((d, ()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_dict_item<'a>(d: &'a [u8]) -> IResult<&'a [u8], (&'a [u8], Term<'a>)> {
|
||||||
|
eprintln!("DDI: `{}`", debug(d));
|
||||||
|
let (d, _) = take_while(is_whitespace)(d)?;
|
||||||
|
eprintln!("DDI1: `{}`", debug(d));
|
||||||
|
let (d, key) = decode_str(d)?;
|
||||||
|
eprintln!("DDI2: `{}`", debug(d));
|
||||||
|
let (d, _) = take_while(is_whitespace)(d)?;
|
||||||
|
let (d, _) = tag(DICT_ASSIGN)(d)?;
|
||||||
|
eprintln!("DDI3: `{}`", debug(d));
|
||||||
|
let (d, value) = decode_term(d)?;
|
||||||
|
eprintln!("DDI4: `{}`", debug(d));
|
||||||
|
Ok((d, (key, value)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_string_char(c: u8) -> bool {
|
||||||
|
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_whitespace(c: u8) -> bool {
|
||||||
|
c.is_ascii_whitespace()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_str() {
|
||||||
|
let bytes = b" plop ";
|
||||||
|
assert_eq!(decode(bytes), Ok(Term::Str(b"plop")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn list_of_str_str() {
|
||||||
|
let bytes = b" plop plap plip ploup ";
|
||||||
|
assert_eq!(
|
||||||
|
decode(bytes),
|
||||||
|
Ok(Term::List(
|
||||||
|
b"plop plap plip ploup",
|
||||||
|
vec![
|
||||||
|
NonListTerm::Str(b"plop"),
|
||||||
|
NonListTerm::Str(b"plap"),
|
||||||
|
NonListTerm::Str(b"plip"),
|
||||||
|
NonListTerm::Str(b"ploup"),
|
||||||
|
]
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_dict() {
|
||||||
|
let bytes = b" { aze = hello, by = bojzkz pipo, ccde = ke } ";
|
||||||
|
assert_eq!(
|
||||||
|
decode(bytes),
|
||||||
|
Ok(Term::Dict(
|
||||||
|
b"{ aze = hello, by = bojzkz pipo, ccde = ke }",
|
||||||
|
[
|
||||||
|
(&b"aze"[..], Term::Str(b"hello")),
|
||||||
|
(
|
||||||
|
&b"by"[..],
|
||||||
|
Term::List(
|
||||||
|
b"bojzkz pipo",
|
||||||
|
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(&b"ccde"[..], Term::Str(b"ke")),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_dict_2() {
|
||||||
|
let bytes = b" { aze = hello, by = bojzkz pipo , ccde = ke , } ";
|
||||||
|
assert_eq!(
|
||||||
|
decode(bytes),
|
||||||
|
Ok(Term::Dict(
|
||||||
|
b"{ aze = hello, by = bojzkz pipo , ccde = ke , }",
|
||||||
|
[
|
||||||
|
(&b"aze"[..], Term::Str(b"hello")),
|
||||||
|
(
|
||||||
|
&b"by"[..],
|
||||||
|
Term::List(
|
||||||
|
b"bojzkz pipo",
|
||||||
|
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(&b"ccde"[..], Term::Str(b"ke")),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn real_world_1() {
|
||||||
|
let bytes = b"HEAD alexpubkey";
|
||||||
|
assert_eq!(
|
||||||
|
decode(bytes),
|
||||||
|
Ok(Term::List(
|
||||||
|
b"HEAD alexpubkey",
|
||||||
|
vec![NonListTerm::Str(b"HEAD"), NonListTerm::Str(b"alexpubkey")]
|
||||||
|
)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn real_world_2() {
|
||||||
|
let bytes = b"STANCE sthash stsign { author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }";
|
||||||
|
assert_eq!(
|
||||||
|
decode(bytes),
|
||||||
|
Ok(Term::List(
|
||||||
|
&bytes[..],
|
||||||
|
vec![
|
||||||
|
NonListTerm::Str(b"STANCE"),
|
||||||
|
NonListTerm::Str(b"sthash"),
|
||||||
|
NonListTerm::Str(b"stsign"),
|
||||||
|
NonListTerm::Dict(b"{ author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }",
|
||||||
|
[
|
||||||
|
(&b"author"[..], Term::Str(b"alexpubkey")),
|
||||||
|
(&b"height"[..], Term::Str(b"12")),
|
||||||
|
(&b"parent"[..], Term::Str(b"parenthash")),
|
||||||
|
(&b"data"[..], Term::List(
|
||||||
|
b"MESSAGE { text = hello }",
|
||||||
|
vec![
|
||||||
|
NonListTerm::Str(b"MESSAGE"),
|
||||||
|
NonListTerm::Dict(
|
||||||
|
b"{ text = hello }",
|
||||||
|
[
|
||||||
|
(&b"text"[..], Term::Str(b"hello")),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
|
)
|
||||||
|
]
|
||||||
|
))
|
||||||
|
].into_iter().collect()
|
||||||
|
),
|
||||||
|
])),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
70
src/dec/mod.rs
Normal file
70
src/dec/mod.rs
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
mod decode;
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
pub use decode::*;
|
||||||
|
|
||||||
|
#[derive(Eq, PartialEq)]
|
||||||
|
pub enum Term<'a> {
|
||||||
|
Str(&'a [u8]),
|
||||||
|
Dict(&'a [u8], HashMap<&'a [u8], Term<'a>>),
|
||||||
|
List(&'a [u8], Vec<NonListTerm<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Eq, PartialEq)]
|
||||||
|
pub enum NonListTerm<'a> {
|
||||||
|
Str(&'a [u8]),
|
||||||
|
Dict(&'a [u8], HashMap<&'a [u8], Term<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<NonListTerm<'a>> for Term<'a> {
|
||||||
|
fn from(x: NonListTerm<'a>) -> Term<'a> {
|
||||||
|
match x {
|
||||||
|
NonListTerm::Str(s) => Term::Str(s),
|
||||||
|
NonListTerm::Dict(raw, d) => Term::Dict(raw, d),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----
|
||||||
|
|
||||||
|
pub fn debug<'a>(x: &'a [u8]) -> &'a str {
|
||||||
|
std::str::from_utf8(x).unwrap_or("<invalid ascii>")
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> std::fmt::Debug for Term<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
|
||||||
|
match self {
|
||||||
|
Term::Str(s) => write!(f, "Str(`{}`)", debug(s)),
|
||||||
|
Term::Dict(raw, d) => {
|
||||||
|
write!(f, "Dict<`{}`", debug(raw))?;
|
||||||
|
for (k, v) in d.iter() {
|
||||||
|
write!(f, "\n `{}`={:?}", debug(k), v)?;
|
||||||
|
}
|
||||||
|
write!(f, ">")
|
||||||
|
}
|
||||||
|
Term::List(raw, l) => {
|
||||||
|
write!(f, "List[`{}`", debug(raw))?;
|
||||||
|
for i in l.iter() {
|
||||||
|
write!(f, "\n {:?}", i)?;
|
||||||
|
}
|
||||||
|
write!(f, "]")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> std::fmt::Debug for NonListTerm<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
|
||||||
|
match self {
|
||||||
|
NonListTerm::Str(s) => write!(f, "Str(`{}`)", debug(s)),
|
||||||
|
NonListTerm::Dict(raw, d) => {
|
||||||
|
write!(f, "Dict<`{}`", debug(raw))?;
|
||||||
|
for (k, v) in d.iter() {
|
||||||
|
write!(f, "\n `{}`={:?}", debug(k), v)?;
|
||||||
|
}
|
||||||
|
write!(f, ">")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
1
src/lib.rs
Normal file
1
src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
||||||
|
pub mod dec;
|
Loading…
Reference in a new issue