First commit including a decoder and a readme file
This commit is contained in:
commit
911da57d74
6 changed files with 427 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
/Cargo.lock
|
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "nettext"
|
||||
description = "A text-based data format for cryptographic network protocols"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "AGPL-3.0"
|
||||
readme = "README.md"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1"
|
74
README.md
Normal file
74
README.md
Normal file
|
@ -0,0 +1,74 @@
|
|||
# NetText
|
||||
|
||||
A text-based data format for cryptographic network protocols.
|
||||
|
||||
## Principles
|
||||
|
||||
- Only uses a limited subset of ASCII characters
|
||||
- Has a minimal set of fundamental data types
|
||||
- Retains the raw representation of complex data structures for hashing and cryptographic signing
|
||||
- Minimal value data type: a string type that can only be used to represent identifiers, numbers and base64-encoded byte strings.
|
||||
|
||||
## Fundamental types
|
||||
|
||||
A term can be of any of the following kinds:
|
||||
|
||||
- a string, which may contain only ASCII alphanumeric terms and `.-_*?`
|
||||
- a dict, which maps strings (as defined above) to any term type
|
||||
- a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
|
||||
|
||||
Dicts are represented as follows:
|
||||
|
||||
```
|
||||
{
|
||||
key1 = value1,
|
||||
key2 = value2
|
||||
}
|
||||
```
|
||||
|
||||
Lists are represented as follows:
|
||||
|
||||
```
|
||||
term1 term2 term3
|
||||
```
|
||||
|
||||
As a consequence, complex data structures can be defined as follows:
|
||||
|
||||
```
|
||||
SENDTO alex {
|
||||
topic = blah,
|
||||
body = blah blah
|
||||
}
|
||||
```
|
||||
|
||||
The raw representation of a parsed dict or list is retained for hashing purposes.
|
||||
It in the sequence of bytes, in the encoded string, trimmed from whitespace at extremities,
|
||||
that represents the encoded dict or list in that string.
|
||||
|
||||
In the complex stance example above, here are the lists and dicts and their raw representation:
|
||||
|
||||
- the toplevel term is a list, whose raw representation is the entire encoded string (assuming no whitespace at beginning or end)
|
||||
- the third term of the list is a dict, whose raw representation starts at `{` and ends at `}`
|
||||
- the second mapping of the dict is a list, whose raw representation is exactly `blah blah`.
|
||||
|
||||
Since strings cannot contain whitespace, they are always equivalent to their raw representation.
|
||||
|
||||
## Structural mappings
|
||||
|
||||
Terms can be interpreted in a number of different ways, depending on the context:
|
||||
|
||||
- RAW: the term is interpreted as its raw encoding (see above)
|
||||
- STRING: if the term is a string or a list composed exclusively of strings, the term is interpreted as its raw encoding
|
||||
- VARIANT: if the term is a list whose first item is a string, it is interpreted as a variant with the following properties:
|
||||
- a discriminator (the first item)
|
||||
- a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
|
||||
- DICT: if the term is a dict, interpret it as such
|
||||
- LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
|
||||
|
||||
## Data mappings
|
||||
|
||||
Terms further have mappings as different data types:
|
||||
|
||||
- BYTES: if the term maps as a STRING, decode it using base64
|
||||
- INT: if the term maps as a STRING, decode it as an integer written in decimal notation
|
||||
- HASH, PUBKEY, SECKEY, SIGNATURE, ENCKEY, DECKEY, SYMKEY: a bunch of things that interpret BYTES as specific cryptographic items
|
267
src/dec/decode.rs
Normal file
267
src/dec/decode.rs
Normal file
|
@ -0,0 +1,267 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_while, take_while1},
|
||||
combinator::{opt, map},
|
||||
multi::{separated_list0, separated_list1},
|
||||
IResult,
|
||||
InputLength,
|
||||
};
|
||||
|
||||
use crate::dec::{NonListTerm, Term, debug};
|
||||
|
||||
const DICT_OPEN: &[u8] = b"{";
|
||||
const DICT_CLOSE: &[u8] = b"}";
|
||||
const DICT_ASSIGN: &[u8] = b"=";
|
||||
const DICT_DELIM: &[u8] = b",";
|
||||
const STR_EXTRA_CHARS: &[u8] = b"._-*?";
|
||||
|
||||
// ----
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub enum Error<'a> {
|
||||
Garbage(&'a [u8]),
|
||||
IncompleteInput,
|
||||
NomError(&'a [u8], nom::error::ErrorKind),
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for Error<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
|
||||
match self {
|
||||
Error::Garbage(g) => write!(f, "Garbage: `{}`", debug(g)),
|
||||
Error::IncompleteInput => write!(f, "Incomplete input"),
|
||||
Error::NomError(s, e) => write!(f, "Nom: {:?}, at: `{}`", e, debug(s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<'a, T> = std::result::Result<T, Error<'a>>;
|
||||
|
||||
impl<'a> From<nom::Err<nom::error::Error<&'a [u8]>>> for Error<'a> {
|
||||
fn from(e: nom::Err<nom::error::Error<&'a [u8]>>) -> Error<'a> {
|
||||
match e {
|
||||
nom::Err::Incomplete(_) => Error::IncompleteInput,
|
||||
nom::Err::Error(e) | nom::Err::Failure(e) => Error::NomError(e.input, e.code),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
pub fn decode<'a>(input: &'a [u8]) -> Result<'a, Term<'a>> {
|
||||
let (rest, term) = decode_term(input)?;
|
||||
let (end, _) = take_while(is_whitespace)(rest)?;
|
||||
if !end.is_empty() {
|
||||
return Err(Error::Garbage(end));
|
||||
}
|
||||
Ok(term)
|
||||
}
|
||||
|
||||
pub fn decode_term<'a>(input: &'a [u8]) -> IResult<&'a [u8], Term<'a>> {
|
||||
eprintln!("DT: `{}`", debug(input));
|
||||
let (start, _) = take_while(is_whitespace)(input)?;
|
||||
eprintln!("DT2: `{}`", debug(start));
|
||||
let (rest, list) = separated_list1(take_while1(is_whitespace), decode_nonlist_term)(start)?;
|
||||
eprintln!("DT3: `{}`", debug(rest));
|
||||
|
||||
if list.len() == 1 {
|
||||
Ok((rest, list.into_iter().next().unwrap().into()))
|
||||
} else {
|
||||
let raw_len = start.input_len() - rest.input_len();
|
||||
let list_raw = &start[..raw_len];
|
||||
Ok((rest, Term::List(list_raw, list)))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_nonlist_term<'a>(input: &'a [u8]) -> IResult<&'a [u8], NonListTerm<'a>> {
|
||||
eprintln!("DNLT: `{}`", debug(input));
|
||||
let (rest, term) = alt((
|
||||
map(decode_str, NonListTerm::Str),
|
||||
map(decode_dict, |(raw, d)| NonListTerm::Dict(raw, d)),
|
||||
))(input)?;
|
||||
eprintln!("DNLTend: `{}` {:?}", debug(rest), term);
|
||||
Ok((rest, term))
|
||||
}
|
||||
|
||||
fn decode_str<'a>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||
eprintln!("DS: `{}`", debug(input));
|
||||
let (rest, data) = take_while1(is_string_char)(input)?;
|
||||
Ok((rest, data))
|
||||
}
|
||||
|
||||
type DictType<'a> = (&'a [u8], HashMap<&'a [u8], Term<'a>>);
|
||||
|
||||
fn decode_dict<'a>(dict_begin: &'a [u8]) -> IResult<&'a [u8], DictType<'a>> {
|
||||
eprintln!("DDbegin: `{}`", debug(dict_begin));
|
||||
let (d, _) = tag(DICT_OPEN)(dict_begin)?;
|
||||
eprintln!("DD2: `{}`", debug(d));
|
||||
let (d, items) = separated_list0(dict_separator, decode_dict_item)(d)?;
|
||||
eprintln!("DD3: `{}`", debug(d));
|
||||
let (d, _) = opt(dict_separator)(d)?;
|
||||
let (d, _) = take_while(is_whitespace)(d)?;
|
||||
eprintln!("DD4: `{}`", debug(d));
|
||||
let (dict_end, _) = tag(DICT_CLOSE)(d)?;
|
||||
eprintln!("DDend: `{}`", debug(dict_end));
|
||||
|
||||
let dict = items.into_iter().collect::<HashMap<_, _>>();
|
||||
|
||||
let raw_len = dict_begin.input_len() - dict_end.input_len();
|
||||
let dict_raw = &dict_begin[..raw_len];
|
||||
|
||||
Ok((dict_end, (dict_raw, dict)))
|
||||
}
|
||||
|
||||
fn dict_separator<'a>(d: &'a [u8]) -> IResult<&'a [u8], ()> {
|
||||
let (d, _) = take_while(is_whitespace)(d)?;
|
||||
let (d, _) = tag(DICT_DELIM)(d)?;
|
||||
Ok((d, ()))
|
||||
}
|
||||
|
||||
fn decode_dict_item<'a>(d: &'a [u8]) -> IResult<&'a [u8], (&'a [u8], Term<'a>)> {
|
||||
eprintln!("DDI: `{}`", debug(d));
|
||||
let (d, _) = take_while(is_whitespace)(d)?;
|
||||
eprintln!("DDI1: `{}`", debug(d));
|
||||
let (d, key) = decode_str(d)?;
|
||||
eprintln!("DDI2: `{}`", debug(d));
|
||||
let (d, _) = take_while(is_whitespace)(d)?;
|
||||
let (d, _) = tag(DICT_ASSIGN)(d)?;
|
||||
eprintln!("DDI3: `{}`", debug(d));
|
||||
let (d, value) = decode_term(d)?;
|
||||
eprintln!("DDI4: `{}`", debug(d));
|
||||
Ok((d, (key, value)))
|
||||
}
|
||||
|
||||
fn is_string_char(c: u8) -> bool {
|
||||
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
|
||||
}
|
||||
|
||||
fn is_whitespace(c: u8) -> bool {
|
||||
c.is_ascii_whitespace()
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_str() {
|
||||
let bytes = b" plop ";
|
||||
assert_eq!(decode(bytes), Ok(Term::Str(b"plop")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_of_str_str() {
|
||||
let bytes = b" plop plap plip ploup ";
|
||||
assert_eq!(
|
||||
decode(bytes),
|
||||
Ok(Term::List(
|
||||
b"plop plap plip ploup",
|
||||
vec![
|
||||
NonListTerm::Str(b"plop"),
|
||||
NonListTerm::Str(b"plap"),
|
||||
NonListTerm::Str(b"plip"),
|
||||
NonListTerm::Str(b"ploup"),
|
||||
]
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_dict() {
|
||||
let bytes = b" { aze = hello, by = bojzkz pipo, ccde = ke } ";
|
||||
assert_eq!(
|
||||
decode(bytes),
|
||||
Ok(Term::Dict(
|
||||
b"{ aze = hello, by = bojzkz pipo, ccde = ke }",
|
||||
[
|
||||
(&b"aze"[..], Term::Str(b"hello")),
|
||||
(
|
||||
&b"by"[..],
|
||||
Term::List(
|
||||
b"bojzkz pipo",
|
||||
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
|
||||
)
|
||||
),
|
||||
(&b"ccde"[..], Term::Str(b"ke")),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_dict_2() {
|
||||
let bytes = b" { aze = hello, by = bojzkz pipo , ccde = ke , } ";
|
||||
assert_eq!(
|
||||
decode(bytes),
|
||||
Ok(Term::Dict(
|
||||
b"{ aze = hello, by = bojzkz pipo , ccde = ke , }",
|
||||
[
|
||||
(&b"aze"[..], Term::Str(b"hello")),
|
||||
(
|
||||
&b"by"[..],
|
||||
Term::List(
|
||||
b"bojzkz pipo",
|
||||
vec![NonListTerm::Str(b"bojzkz"), NonListTerm::Str(b"pipo")]
|
||||
)
|
||||
),
|
||||
(&b"ccde"[..], Term::Str(b"ke")),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn real_world_1() {
|
||||
let bytes = b"HEAD alexpubkey";
|
||||
assert_eq!(
|
||||
decode(bytes),
|
||||
Ok(Term::List(
|
||||
b"HEAD alexpubkey",
|
||||
vec![NonListTerm::Str(b"HEAD"), NonListTerm::Str(b"alexpubkey")]
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn real_world_2() {
|
||||
let bytes = b"STANCE sthash stsign { author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }";
|
||||
assert_eq!(
|
||||
decode(bytes),
|
||||
Ok(Term::List(
|
||||
&bytes[..],
|
||||
vec![
|
||||
NonListTerm::Str(b"STANCE"),
|
||||
NonListTerm::Str(b"sthash"),
|
||||
NonListTerm::Str(b"stsign"),
|
||||
NonListTerm::Dict(b"{ author = alexpubkey, height = 12, parent = parenthash, data = MESSAGE { text = hello } }",
|
||||
[
|
||||
(&b"author"[..], Term::Str(b"alexpubkey")),
|
||||
(&b"height"[..], Term::Str(b"12")),
|
||||
(&b"parent"[..], Term::Str(b"parenthash")),
|
||||
(&b"data"[..], Term::List(
|
||||
b"MESSAGE { text = hello }",
|
||||
vec![
|
||||
NonListTerm::Str(b"MESSAGE"),
|
||||
NonListTerm::Dict(
|
||||
b"{ text = hello }",
|
||||
[
|
||||
(&b"text"[..], Term::Str(b"hello")),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
)
|
||||
]
|
||||
))
|
||||
].into_iter().collect()
|
||||
),
|
||||
])),
|
||||
);
|
||||
}
|
||||
}
|
70
src/dec/mod.rs
Normal file
70
src/dec/mod.rs
Normal file
|
@ -0,0 +1,70 @@
|
|||
mod decode;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub use decode::*;
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub enum Term<'a> {
|
||||
Str(&'a [u8]),
|
||||
Dict(&'a [u8], HashMap<&'a [u8], Term<'a>>),
|
||||
List(&'a [u8], Vec<NonListTerm<'a>>),
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
pub enum NonListTerm<'a> {
|
||||
Str(&'a [u8]),
|
||||
Dict(&'a [u8], HashMap<&'a [u8], Term<'a>>),
|
||||
}
|
||||
|
||||
impl<'a> From<NonListTerm<'a>> for Term<'a> {
|
||||
fn from(x: NonListTerm<'a>) -> Term<'a> {
|
||||
match x {
|
||||
NonListTerm::Str(s) => Term::Str(s),
|
||||
NonListTerm::Dict(raw, d) => Term::Dict(raw, d),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
pub fn debug<'a>(x: &'a [u8]) -> &'a str {
|
||||
std::str::from_utf8(x).unwrap_or("<invalid ascii>")
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for Term<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
|
||||
match self {
|
||||
Term::Str(s) => write!(f, "Str(`{}`)", debug(s)),
|
||||
Term::Dict(raw, d) => {
|
||||
write!(f, "Dict<`{}`", debug(raw))?;
|
||||
for (k, v) in d.iter() {
|
||||
write!(f, "\n `{}`={:?}", debug(k), v)?;
|
||||
}
|
||||
write!(f, ">")
|
||||
}
|
||||
Term::List(raw, l) => {
|
||||
write!(f, "List[`{}`", debug(raw))?;
|
||||
for i in l.iter() {
|
||||
write!(f, "\n {:?}", i)?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for NonListTerm<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
|
||||
match self {
|
||||
NonListTerm::Str(s) => write!(f, "Str(`{}`)", debug(s)),
|
||||
NonListTerm::Dict(raw, d) => {
|
||||
write!(f, "Dict<`{}`", debug(raw))?;
|
||||
for (k, v) in d.iter() {
|
||||
write!(f, "\n `{}`={:?}", debug(k), v)?;
|
||||
}
|
||||
write!(f, ">")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1
src/lib.rs
Normal file
1
src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod dec;
|
Loading…
Reference in a new issue