Fixes for bytes, fix outer newlines, NESTED datatypes

This commit is contained in:
Alex 2022-11-18 01:59:00 +01:00
parent 22fe9568bd
commit e7ea915121
No known key found for this signature in database
GPG key ID: 09EC5284AA804D3C
4 changed files with 126 additions and 27 deletions

View file

@ -17,6 +17,9 @@ A term can be of any of the following kinds:
- a dict, which maps strings (as defined above) to any term type
- a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
Nested lists can be represented using a special dictionnary with a single key, `.`,
for instance `TEST a { . = 0 4 2 1 9 7 0 } c`.
Dicts are represented as follows:
```
@ -64,6 +67,7 @@ Terms can be interpreted in a number of different ways, depending on the context
- a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
- DICT: if the term is a dict, interpret it as such
- LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
- NESTED: if the term is a dict with a single key `.`, interpret it as the term associated to that key
## Data mappings

View file

@ -356,6 +356,31 @@ impl<'a, 'b> Term<'a, 'b> {
}
}
/// Checks term is a dictionary with a single key `.`,
/// and returns the associated value.
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term = decode(b"{ . = a b c d e }").unwrap();
/// assert_eq!(term.nested().unwrap().raw(), b"a b c d e");
/// ```
pub fn nested(&self) -> Result<Term<'a, '_>, TypeError> {
match self.0.mkref() {
AnyTerm::DictRef(_, d) if d.len() == 1 => {
let (k, v) = d.iter().next().unwrap();
if k != b"." {
Err(TypeError::WrongType("NESTED"))
} else {
Ok(Term(v.mkref()))
}
}
_ => Err(TypeError::WrongType("NESTED")),
}
}
// ---- TYPE CASTS ----
/// Try to interpret this str as an i64

View file

@ -22,6 +22,7 @@ pub enum Error {
InvalidCharacter(u8),
InvalidRaw,
NotADictionnary,
ListInList,
}
// ---- helpers to transform datatypes into encoder terms ----
@ -84,6 +85,7 @@ pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Term<'a> {
for t in terms {
match t.0 {
T::Err(e) => return Term(T::Err(e)),
T::List(_) => return Term(T::Err(Error::ListInList)),
x => tmp.push(x),
}
}
@ -113,7 +115,7 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
Term(T::Dict(tmp))
}
/// Term corresponding to a byte slice,
/// Term corresponding to a byte slice,
/// encoding using base64 url-safe encoding without padding
///
/// Example:
@ -124,6 +126,16 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
/// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ");
/// ```
pub fn bytes(bytes: &[u8]) -> Term<'static> {
Term(T::OwnedStr(
base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
))
}
/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
/// and encodes each chunk separately, putting them in a list of terms.
/// Usefull for long byte slices to have cleaner representations,
/// mainly usefull for dictionnary keys.
pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
let chunks = bytes
.chunks(48)
.map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
@ -138,6 +150,7 @@ pub fn bytes(bytes: &[u8]) -> Term<'static> {
impl<'a> Term<'a> {
/// Append a term to an existing term.
/// Transforms the initial term into a list if necessary.
#[must_use]
pub fn append(self, t: Term<'a>) -> Term<'a> {
match t.0 {
T::Err(e) => Term(T::Err(e)),
@ -153,6 +166,7 @@ impl<'a> Term<'a> {
/// Inserts a key-value pair into a term that is a dictionnary.
/// Fails if `self` is not a dictionnary.
#[must_use]
pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> {
match v.0 {
T::Err(e) => Term(T::Err(e)),
@ -165,6 +179,21 @@ impl<'a> Term<'a> {
},
}
}
/// Makes a NESTED term of this term, by putting it in a dict
/// with a single key `.`.
///
/// Example:
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(list([string("hello"), string("world")]).nested()).unwrap(), b"{ . = hello world }");
/// ```
#[must_use]
pub fn nested(self) -> Term<'a> {
dict([(".", self)])
}
}
// ---- encoding function ----
@ -172,38 +201,54 @@ impl<'a> Term<'a> {
/// Generate the nettext representation of a term
pub fn encode(t: Term<'_>) -> Result<Vec<u8>, Error> {
let mut buf = Vec::with_capacity(128);
encode_aux(&mut buf, t.0, 0)?;
encode_aux(&mut buf, t.0, 0, true)?;
Ok(buf)
}
fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error> {
fn encode_aux(
buf: &mut Vec<u8>,
term: T<'_>,
indent: usize,
is_toplevel: bool,
) -> Result<(), Error> {
match term {
T::Str(s) => buf.extend_from_slice(s),
T::OwnedStr(s) => buf.extend_from_slice(&s),
T::Dict(mut d) => {
buf.extend_from_slice(b"{\n");
let indent2 = indent + 2;
let mut keys = d.keys().cloned().collect::<Vec<_>>();
keys.sort();
for k in keys {
let v = d.remove(k).unwrap();
for _ in 0..indent2 {
buf.push(b' ');
}
if d.is_empty() {
buf.extend_from_slice(b"{}");
} else if d.len() == 1 {
buf.extend_from_slice(b"{ ");
let (k, v) = d.into_iter().next().unwrap();
buf.extend_from_slice(k);
buf.extend_from_slice(b" = ");
encode_aux(buf, v, indent2)?;
buf.extend_from_slice(b",\n");
encode_aux(buf, v, indent + 2, false)?;
buf.extend_from_slice(b" }");
} else {
buf.extend_from_slice(b"{\n");
let indent2 = indent + 2;
let mut keys = d.keys().cloned().collect::<Vec<_>>();
keys.sort();
for k in keys {
let v = d.remove(k).unwrap();
for _ in 0..indent2 {
buf.push(b' ');
}
buf.extend_from_slice(k);
buf.extend_from_slice(b" = ");
encode_aux(buf, v, indent2, false)?;
buf.extend_from_slice(b",\n");
}
for _ in 0..indent {
buf.push(b' ');
}
buf.push(b'}');
}
for _ in 0..indent {
buf.push(b' ');
}
buf.push(b'}');
}
T::List(l) => {
let indent2 = indent + 2;
for (i, v) in l.into_iter().enumerate() {
if buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
buf.push(b'\n');
for _ in 0..indent2 {
buf.push(b' ');
@ -211,7 +256,7 @@ fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error
} else if i > 0 {
buf.push(b' ');
}
encode_aux(buf, v, indent2)?;
encode_aux(buf, v, indent2, is_toplevel)?;
}
}
T::Err(e) => return Err(e),
@ -244,4 +289,24 @@ mod tests {
eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
assert_eq!(&enc, &expected[..]);
}
#[test]
fn nested() {
assert!(encode(list([
string("a"),
string("b"),
list([string("c"), string("d")])
]))
.is_err());
assert_eq!(
encode(list([
string("a"),
string("b"),
list([string("c"), string("d")]).nested()
]))
.unwrap(),
b"a b { . = c d }"
);
}
}

View file

@ -15,6 +15,7 @@
//! ("a", string("hello")),
//! ("b", string("world")),
//! ("c", raw(b"{ a = 12, b = 42 }")),
//! ("d", bytes_split(&((0..128u8).collect::<Vec<_>>()))),
//! ]),
//! keypair.public.term(),
//! ])).unwrap();
@ -55,21 +56,25 @@
//! a = hello,
//! b = world,
//! c = { a = 12, b = 42 },
//! } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4
//! d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
//! MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
//! YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
//! } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo
//! ```
//!
//! And the value of `text2` would be as follows:
//! ```raw
//! {
//! hash = IT4ay3XM4SycgYjxV8_Ioxqqt9JwdFK0sZqd-TOhOl9IGxbTQwK8vPy409h59xCV
//! NrMjDC1YIS7bXIrrv_Tvbw,
//! hash = Se6Wmbh3fbFQ9_ilE6zGbxNaEd9v5CHAb30p46Fxpi74iblRb9fXmGAiMkXnSe4DePTwb16zGAz_Ux4ZAG9s3w,
//! payload = CALL myfunction {
//! a = hello,
//! b = world,
//! c = { a = 12, b = 42 },
//! } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4,
//! signature = UFje_N6vnrN23-ygB1yr8LwSipSwxrMLEB2ov6bvU4rR9BmfLjxyq8zTzKxb_VNw
//! UABMRcy-KiITwpY_b3UdBg,
//! d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
//! MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
//! YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
//! } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo,
//! signature = 8mo3aeQD7JAdqbDcm7oVdaU0XamDwg03JtC3mfsWhEy_ZkNmWBFZefIDlzBR3XpnF0szTzEwtoPFfnR1fz6fAA,
//! }
//! ```
//!
@ -86,7 +91,7 @@ pub(crate) const DICT_OPEN: u8 = b'{';
pub(crate) const DICT_CLOSE: u8 = b'}';
pub(crate) const DICT_ASSIGN: u8 = b'=';
pub(crate) const DICT_DELIM: u8 = b',';
pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-*?";
pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?";
pub(crate) fn is_string_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)