From e7ea9151211925da6e379c713b2977bdd5b193d3 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 18 Nov 2022 01:59:00 +0100 Subject: [PATCH] Fixes for bytes, fix outer newlines, NESTED datatypes --- README.md | 4 ++ src/dec/mod.rs | 25 ++++++++++++ src/enc/mod.rs | 105 +++++++++++++++++++++++++++++++++++++++---------- src/lib.rs | 19 +++++---- 4 files changed, 126 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index a0a532d..c76ef64 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,9 @@ A term can be of any of the following kinds: - a dict, which maps strings (as defined above) to any term type - a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace +Nested lists can be represented using a special dictionnary with a single key, `.`, +for instance `TEST a { . = 0 4 2 1 9 7 0 } c`. + Dicts are represented as follows: ``` @@ -64,6 +67,7 @@ Terms can be interpreted in a number of different ways, depending on the context - a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two - DICT: if the term is a dict, interpret it as such - LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms. +- NESTED: if the term is a dict with a single key `.`, interpret it as the term associated to that key ## Data mappings diff --git a/src/dec/mod.rs b/src/dec/mod.rs index 5c50550..4d9e6f7 100644 --- a/src/dec/mod.rs +++ b/src/dec/mod.rs @@ -356,6 +356,31 @@ impl<'a, 'b> Term<'a, 'b> { } } + /// Checks term is a dictionary with a single key `.`, + /// and returns the associated value. + /// + /// Example: + /// + /// ``` + /// use nettext::dec::decode; + /// + /// let term = decode(b"{ . = a b c d e }").unwrap(); + /// assert_eq!(term.nested().unwrap().raw(), b"a b c d e"); + /// ``` + pub fn nested(&self) -> Result, TypeError> { + match self.0.mkref() { + AnyTerm::DictRef(_, d) if d.len() == 1 => { + let (k, v) = d.iter().next().unwrap(); + if k != b"." { + Err(TypeError::WrongType("NESTED")) + } else { + Ok(Term(v.mkref())) + } + } + _ => Err(TypeError::WrongType("NESTED")), + } + } + // ---- TYPE CASTS ---- /// Try to interpret this str as an i64 diff --git a/src/enc/mod.rs b/src/enc/mod.rs index 03029f8..711bff7 100644 --- a/src/enc/mod.rs +++ b/src/enc/mod.rs @@ -22,6 +22,7 @@ pub enum Error { InvalidCharacter(u8), InvalidRaw, NotADictionnary, + ListInList, } // ---- helpers to transform datatypes into encoder terms ---- @@ -84,6 +85,7 @@ pub fn list<'a, I: IntoIterator>>(terms: I) -> Term<'a> { for t in terms { match t.0 { T::Err(e) => return Term(T::Err(e)), + T::List(_) => return Term(T::Err(Error::ListInList)), x => tmp.push(x), } } @@ -113,7 +115,7 @@ pub fn dict<'a, I: IntoIterator)>>(pairs: I) -> Term<' Term(T::Dict(tmp)) } -/// Term corresponding to a byte slice, +/// Term corresponding to a byte slice, /// encoding using base64 url-safe encoding without padding /// /// Example: @@ -124,6 +126,16 @@ pub fn dict<'a, I: IntoIterator)>>(pairs: I) -> Term<' /// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ"); /// ``` pub fn bytes(bytes: &[u8]) -> Term<'static> { + Term(T::OwnedStr( + base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(), + )) +} + +/// Same as `bytes()`, but splits the byte slice in 48-byte chunks +/// and encodes each chunk separately, putting them in a list of terms. +/// Usefull for long byte slices to have cleaner representations, +/// mainly usefull for dictionnary keys. +pub fn bytes_split(bytes: &[u8]) -> Term<'static> { let chunks = bytes .chunks(48) .map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes())) @@ -138,6 +150,7 @@ pub fn bytes(bytes: &[u8]) -> Term<'static> { impl<'a> Term<'a> { /// Append a term to an existing term. /// Transforms the initial term into a list if necessary. + #[must_use] pub fn append(self, t: Term<'a>) -> Term<'a> { match t.0 { T::Err(e) => Term(T::Err(e)), @@ -153,6 +166,7 @@ impl<'a> Term<'a> { /// Inserts a key-value pair into a term that is a dictionnary. /// Fails if `self` is not a dictionnary. + #[must_use] pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> { match v.0 { T::Err(e) => Term(T::Err(e)), @@ -165,6 +179,21 @@ impl<'a> Term<'a> { }, } } + + /// Makes a NESTED term of this term, by putting it in a dict + /// with a single key `.`. + /// + /// Example: + /// + /// ``` + /// use nettext::enc::*; + /// + /// assert_eq!(encode(list([string("hello"), string("world")]).nested()).unwrap(), b"{ . = hello world }"); + /// ``` + #[must_use] + pub fn nested(self) -> Term<'a> { + dict([(".", self)]) + } } // ---- encoding function ---- @@ -172,38 +201,54 @@ impl<'a> Term<'a> { /// Generate the nettext representation of a term pub fn encode(t: Term<'_>) -> Result, Error> { let mut buf = Vec::with_capacity(128); - encode_aux(&mut buf, t.0, 0)?; + encode_aux(&mut buf, t.0, 0, true)?; Ok(buf) } -fn encode_aux(buf: &mut Vec, term: T<'_>, indent: usize) -> Result<(), Error> { +fn encode_aux( + buf: &mut Vec, + term: T<'_>, + indent: usize, + is_toplevel: bool, +) -> Result<(), Error> { match term { T::Str(s) => buf.extend_from_slice(s), T::OwnedStr(s) => buf.extend_from_slice(&s), T::Dict(mut d) => { - buf.extend_from_slice(b"{\n"); - let indent2 = indent + 2; - let mut keys = d.keys().cloned().collect::>(); - keys.sort(); - for k in keys { - let v = d.remove(k).unwrap(); - for _ in 0..indent2 { - buf.push(b' '); - } + if d.is_empty() { + buf.extend_from_slice(b"{}"); + } else if d.len() == 1 { + buf.extend_from_slice(b"{ "); + let (k, v) = d.into_iter().next().unwrap(); buf.extend_from_slice(k); buf.extend_from_slice(b" = "); - encode_aux(buf, v, indent2)?; - buf.extend_from_slice(b",\n"); + encode_aux(buf, v, indent + 2, false)?; + buf.extend_from_slice(b" }"); + } else { + buf.extend_from_slice(b"{\n"); + let indent2 = indent + 2; + let mut keys = d.keys().cloned().collect::>(); + keys.sort(); + for k in keys { + let v = d.remove(k).unwrap(); + for _ in 0..indent2 { + buf.push(b' '); + } + buf.extend_from_slice(k); + buf.extend_from_slice(b" = "); + encode_aux(buf, v, indent2, false)?; + buf.extend_from_slice(b",\n"); + } + for _ in 0..indent { + buf.push(b' '); + } + buf.push(b'}'); } - for _ in 0..indent { - buf.push(b' '); - } - buf.push(b'}'); } T::List(l) => { let indent2 = indent + 2; for (i, v) in l.into_iter().enumerate() { - if buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 { + if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 { buf.push(b'\n'); for _ in 0..indent2 { buf.push(b' '); @@ -211,7 +256,7 @@ fn encode_aux(buf: &mut Vec, term: T<'_>, indent: usize) -> Result<(), Error } else if i > 0 { buf.push(b' '); } - encode_aux(buf, v, indent2)?; + encode_aux(buf, v, indent2, is_toplevel)?; } } T::Err(e) => return Err(e), @@ -244,4 +289,24 @@ mod tests { eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap()); assert_eq!(&enc, &expected[..]); } + + #[test] + fn nested() { + assert!(encode(list([ + string("a"), + string("b"), + list([string("c"), string("d")]) + ])) + .is_err()); + + assert_eq!( + encode(list([ + string("a"), + string("b"), + list([string("c"), string("d")]).nested() + ])) + .unwrap(), + b"a b { . = c d }" + ); + } } diff --git a/src/lib.rs b/src/lib.rs index 4555f14..c565bc5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ //! ("a", string("hello")), //! ("b", string("world")), //! ("c", raw(b"{ a = 12, b = 42 }")), +//! ("d", bytes_split(&((0..128u8).collect::>()))), //! ]), //! keypair.public.term(), //! ])).unwrap(); @@ -55,21 +56,25 @@ //! a = hello, //! b = world, //! c = { a = 12, b = 42 }, -//! } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4 +//! d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v +//! MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f +//! YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8, +//! } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo //! ``` //! //! And the value of `text2` would be as follows: //! ```raw //! { -//! hash = IT4ay3XM4SycgYjxV8_Ioxqqt9JwdFK0sZqd-TOhOl9IGxbTQwK8vPy409h59xCV -//! NrMjDC1YIS7bXIrrv_Tvbw, +//! hash = Se6Wmbh3fbFQ9_ilE6zGbxNaEd9v5CHAb30p46Fxpi74iblRb9fXmGAiMkXnSe4DePTwb16zGAz_Ux4ZAG9s3w, //! payload = CALL myfunction { //! a = hello, //! b = world, //! c = { a = 12, b = 42 }, -//! } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4, -//! signature = UFje_N6vnrN23-ygB1yr8LwSipSwxrMLEB2ov6bvU4rR9BmfLjxyq8zTzKxb_VNw -//! UABMRcy-KiITwpY_b3UdBg, +//! d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v +//! MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f +//! YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8, +//! } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo, +//! signature = 8mo3aeQD7JAdqbDcm7oVdaU0XamDwg03JtC3mfsWhEy_ZkNmWBFZefIDlzBR3XpnF0szTzEwtoPFfnR1fz6fAA, //! } //! ``` //! @@ -86,7 +91,7 @@ pub(crate) const DICT_OPEN: u8 = b'{'; pub(crate) const DICT_CLOSE: u8 = b'}'; pub(crate) const DICT_ASSIGN: u8 = b'='; pub(crate) const DICT_DELIM: u8 = b','; -pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-*?"; +pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?"; pub(crate) fn is_string_char(c: u8) -> bool { c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)