From e7ea9151211925da6e379c713b2977bdd5b193d3 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Fri, 18 Nov 2022 01:59:00 +0100
Subject: [PATCH] Fixes for bytes, fix outer newlines, NESTED datatypes

---
 README.md      |   4 ++
 src/dec/mod.rs |  25 ++++++++++++
 src/enc/mod.rs | 105 +++++++++++++++++++++++++++++++++++++++----------
 src/lib.rs     |  19 +++++----
 4 files changed, 126 insertions(+), 27 deletions(-)
diff --git a/README.md b/README.md
index a0a532d..c76ef64 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,9 @@ A term can be of any of the following kinds:
 - a dict, which maps strings (as defined above) to any term type
 - a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
 
+Nested lists can be represented using a special dictionnary with a single key, `.`,
+for instance `TEST a { . = 0 4 2 1 9 7 0 } c`.
+
 Dicts are represented as follows:
 
 ```
@@ -64,6 +67,7 @@ Terms can be interpreted in a number of different ways, depending on the context
   - a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
 - DICT: if the term is a dict, interpret it as such
 - LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
+- NESTED: if the term is a dict with a single key `.`, interpret it as the term associated to that key
 
 ## Data mappings
 
diff --git a/src/dec/mod.rs b/src/dec/mod.rs
index 5c50550..4d9e6f7 100644
--- a/src/dec/mod.rs
+++ b/src/dec/mod.rs
@@ -356,6 +356,31 @@ impl<'a, 'b> Term<'a, 'b> {
         }
     }
 
+    /// Checks term is a dictionary with a single key `.`,
+    /// and returns the associated value.
+    ///
+    /// Example:
+    ///
+    /// ```
+    /// use nettext::dec::decode;
+    ///
+    /// let term = decode(b"{ . = a b c d e }").unwrap();
+    /// assert_eq!(term.nested().unwrap().raw(), b"a b c d e");
+    /// ```
+    pub fn nested(&self) -> Result<Term<'a, '_>, TypeError> {
+        match self.0.mkref() {
+            AnyTerm::DictRef(_, d) if d.len() == 1 => {
+                let (k, v) = d.iter().next().unwrap();
+                if k != b"." {
+                    Err(TypeError::WrongType("NESTED"))
+                } else {
+                    Ok(Term(v.mkref()))
+                }
+            }
+            _ => Err(TypeError::WrongType("NESTED")),
+        }
+    }
+
     // ---- TYPE CASTS ----
 
     /// Try to interpret this str as an i64
diff --git a/src/enc/mod.rs b/src/enc/mod.rs
index 03029f8..711bff7 100644
--- a/src/enc/mod.rs
+++ b/src/enc/mod.rs
@@ -22,6 +22,7 @@ pub enum Error {
     InvalidCharacter(u8),
     InvalidRaw,
     NotADictionnary,
+    ListInList,
 }
 
 // ---- helpers to transform datatypes into encoder terms ----
@@ -84,6 +85,7 @@ pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Term<'a> {
     for t in terms {
         match t.0 {
             T::Err(e) => return Term(T::Err(e)),
+            T::List(_) => return Term(T::Err(Error::ListInList)),
             x => tmp.push(x),
         }
     }
@@ -113,7 +115,7 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
     Term(T::Dict(tmp))
 }
 
-/// Term corresponding to  a byte slice,
+/// Term corresponding to a byte slice,
 /// encoding using base64 url-safe encoding without padding
 ///
 /// Example:
@@ -124,6 +126,16 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
 /// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ");
 /// ```
 pub fn bytes(bytes: &[u8]) -> Term<'static> {
+    Term(T::OwnedStr(
+        base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
+    ))
+}
+
+/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
+/// and encodes each chunk separately, putting them in a list of terms.
+/// Usefull for long byte slices to have cleaner representations,
+/// mainly usefull for dictionnary keys.
+pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
     let chunks = bytes
         .chunks(48)
         .map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
@@ -138,6 +150,7 @@ pub fn bytes(bytes: &[u8]) -> Term<'static> {
 impl<'a> Term<'a> {
     /// Append a term to an existing term.
     /// Transforms the initial term into a list if necessary.
+    #[must_use]
     pub fn append(self, t: Term<'a>) -> Term<'a> {
         match t.0 {
             T::Err(e) => Term(T::Err(e)),
@@ -153,6 +166,7 @@ impl<'a> Term<'a> {
 
     /// Inserts a key-value pair into a term that is a dictionnary.
     /// Fails if `self` is not a dictionnary.
+    #[must_use]
     pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> {
         match v.0 {
             T::Err(e) => Term(T::Err(e)),
@@ -165,6 +179,21 @@ impl<'a> Term<'a> {
             },
         }
     }
+
+    /// Makes a NESTED term of this term, by putting it in a dict
+    /// with a single key `.`.
+    ///
+    /// Example:
+    ///
+    /// ```
+    /// use nettext::enc::*;
+    ///
+    /// assert_eq!(encode(list([string("hello"), string("world")]).nested()).unwrap(), b"{ . = hello world }");
+    /// ```
+    #[must_use]
+    pub fn nested(self) -> Term<'a> {
+        dict([(".", self)])
+    }
 }
 
 // ---- encoding function ----
@@ -172,38 +201,54 @@ impl<'a> Term<'a> {
 /// Generate the nettext representation of a term
 pub fn encode(t: Term<'_>) -> Result<Vec<u8>, Error> {
     let mut buf = Vec::with_capacity(128);
-    encode_aux(&mut buf, t.0, 0)?;
+    encode_aux(&mut buf, t.0, 0, true)?;
     Ok(buf)
 }
 
-fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error> {
+fn encode_aux(
+    buf: &mut Vec<u8>,
+    term: T<'_>,
+    indent: usize,
+    is_toplevel: bool,
+) -> Result<(), Error> {
     match term {
         T::Str(s) => buf.extend_from_slice(s),
         T::OwnedStr(s) => buf.extend_from_slice(&s),
         T::Dict(mut d) => {
-            buf.extend_from_slice(b"{\n");
-            let indent2 = indent + 2;
-            let mut keys = d.keys().cloned().collect::<Vec<_>>();
-            keys.sort();
-            for k in keys {
-                let v = d.remove(k).unwrap();
-                for _ in 0..indent2 {
-                    buf.push(b' ');
-                }
+            if d.is_empty() {
+                buf.extend_from_slice(b"{}");
+            } else if d.len() == 1 {
+                buf.extend_from_slice(b"{ ");
+                let (k, v) = d.into_iter().next().unwrap();
                 buf.extend_from_slice(k);
                 buf.extend_from_slice(b" = ");
-                encode_aux(buf, v, indent2)?;
-                buf.extend_from_slice(b",\n");
+                encode_aux(buf, v, indent + 2, false)?;
+                buf.extend_from_slice(b" }");
+            } else {
+                buf.extend_from_slice(b"{\n");
+                let indent2 = indent + 2;
+                let mut keys = d.keys().cloned().collect::<Vec<_>>();
+                keys.sort();
+                for k in keys {
+                    let v = d.remove(k).unwrap();
+                    for _ in 0..indent2 {
+                        buf.push(b' ');
+                    }
+                    buf.extend_from_slice(k);
+                    buf.extend_from_slice(b" = ");
+                    encode_aux(buf, v, indent2, false)?;
+                    buf.extend_from_slice(b",\n");
+                }
+                for _ in 0..indent {
+                    buf.push(b' ');
+                }
+                buf.push(b'}');
             }
-            for _ in 0..indent {
-                buf.push(b' ');
-            }
-            buf.push(b'}');
         }
         T::List(l) => {
             let indent2 = indent + 2;
             for (i, v) in l.into_iter().enumerate() {
-                if buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
+                if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
                     buf.push(b'\n');
                     for _ in 0..indent2 {
                         buf.push(b' ');
@@ -211,7 +256,7 @@ fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error
                 } else if i > 0 {
                     buf.push(b' ');
                 }
-                encode_aux(buf, v, indent2)?;
+                encode_aux(buf, v, indent2, is_toplevel)?;
             }
         }
         T::Err(e) => return Err(e),
@@ -244,4 +289,24 @@ mod tests {
         eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
         assert_eq!(&enc, &expected[..]);
     }
+
+    #[test]
+    fn nested() {
+        assert!(encode(list([
+            string("a"),
+            string("b"),
+            list([string("c"), string("d")])
+        ]))
+        .is_err());
+
+        assert_eq!(
+            encode(list([
+                string("a"),
+                string("b"),
+                list([string("c"), string("d")]).nested()
+            ]))
+            .unwrap(),
+            b"a b { . = c d }"
+        );
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 4555f14..c565bc5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,6 +15,7 @@
 //!         ("a", string("hello")),
 //!         ("b", string("world")),
 //!         ("c", raw(b"{ a = 12, b = 42 }")),
+//!         ("d", bytes_split(&((0..128u8).collect::<Vec<_>>()))),
 //!     ]),
 //!     keypair.public.term(),
 //! ])).unwrap();
@@ -55,21 +56,25 @@
 //!     a = hello,
 //!     b = world,
 //!     c = { a = 12, b = 42 },
-//!   } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4
+//!     d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
+//!       MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
+//!       YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
+//!   } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo
 //! ```
 //!
 //! And the value of `text2` would be as follows:
 //! ```raw
 //! {
-//!   hash = IT4ay3XM4SycgYjxV8_Ioxqqt9JwdFK0sZqd-TOhOl9IGxbTQwK8vPy409h59xCV
-//!     NrMjDC1YIS7bXIrrv_Tvbw,
+//!   hash = Se6Wmbh3fbFQ9_ilE6zGbxNaEd9v5CHAb30p46Fxpi74iblRb9fXmGAiMkXnSe4DePTwb16zGAz_Ux4ZAG9s3w,
 //!   payload = CALL myfunction {
 //!     a = hello,
 //!     b = world,
 //!     c = { a = 12, b = 42 },
-//!   } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4,
-//!   signature = UFje_N6vnrN23-ygB1yr8LwSipSwxrMLEB2ov6bvU4rR9BmfLjxyq8zTzKxb_VNw
-//!     UABMRcy-KiITwpY_b3UdBg,
+//!     d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
+//!       MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
+//!       YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
+//!   } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo,
+//!   signature = 8mo3aeQD7JAdqbDcm7oVdaU0XamDwg03JtC3mfsWhEy_ZkNmWBFZefIDlzBR3XpnF0szTzEwtoPFfnR1fz6fAA,
 //! }
 //! ```
 //!
@@ -86,7 +91,7 @@ pub(crate) const DICT_OPEN: u8 = b'{';
 pub(crate) const DICT_CLOSE: u8 = b'}';
 pub(crate) const DICT_ASSIGN: u8 = b'=';
 pub(crate) const DICT_DELIM: u8 = b',';
-pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-*?";
+pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?";
 
 pub(crate) fn is_string_char(c: u8) -> bool {
     c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)