Fixes for bytes, fix outer newlines, NESTED datatypes

2022-11-18 01:59:00 +01:00 · 2022-11-18 01:59:00 +01:00 · e7ea915121
commit e7ea915121
parent 22fe9568bd
4 changed files with 126 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -17,6 +17,9 @@ A term can be of any of the following kinds:
 - a dict, which maps strings (as defined above) to any term type
 - a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
 Nested lists can be represented using a special dictionnary with a single key, `.`,
 for instance `TEST a { . = 0 4 2 1 9 7 0 } c`.
 Dicts are represented as follows:
 ```
@ -64,6 +67,7 @@ Terms can be interpreted in a number of different ways, depending on the context
  - a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
 - DICT: if the term is a dict, interpret it as such
 - LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
 - NESTED: if the term is a dict with a single key `.`, interpret it as the term associated to that key
 ## Data mappings
--- a/src/dec/mod.rs
+++ b/src/dec/mod.rs
@ -356,6 +356,31 @@ impl<'a, 'b> Term<'a, 'b> {
        }
    }
    /// Checks term is a dictionary with a single key `.`,
    /// and returns the associated value.
    ///
    /// Example:
    ///
    /// ```
    /// use nettext::dec::decode;
    ///
    /// let term = decode(b"{ . = a b c d e }").unwrap();
    /// assert_eq!(term.nested().unwrap().raw(), b"a b c d e");
    /// ```
    pub fn nested(&self) -> Result<Term<'a, '_>, TypeError> {
        match self.0.mkref() {
            AnyTerm::DictRef(_, d) if d.len() == 1 => {
                let (k, v) = d.iter().next().unwrap();
                if k != b"." {
                    Err(TypeError::WrongType("NESTED"))
                } else {
                    Ok(Term(v.mkref()))
                }
            }
            _ => Err(TypeError::WrongType("NESTED")),
        }
    }
    // ---- TYPE CASTS ----
    /// Try to interpret this str as an i64
--- a/src/enc/mod.rs
+++ b/src/enc/mod.rs
@ -22,6 +22,7 @@ pub enum Error {
    InvalidCharacter(u8),
    InvalidRaw,
    NotADictionnary,
    ListInList,
 }
 // ---- helpers to transform datatypes into encoder terms ----
@ -84,6 +85,7 @@ pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Term<'a> {
    for t in terms {
        match t.0 {
            T::Err(e) => return Term(T::Err(e)),
            T::List(_) => return Term(T::Err(Error::ListInList)),
            x => tmp.push(x),
        }
    }
@ -124,6 +126,16 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
 /// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ");
 /// ```
 pub fn bytes(bytes: &[u8]) -> Term<'static> {
    Term(T::OwnedStr(
        base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
    ))
 }
 /// Same as `bytes()`, but splits the byte slice in 48-byte chunks
 /// and encodes each chunk separately, putting them in a list of terms.
 /// Usefull for long byte slices to have cleaner representations,
 /// mainly usefull for dictionnary keys.
 pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
    let chunks = bytes
        .chunks(48)
        .map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
@ -138,6 +150,7 @@ pub fn bytes(bytes: &[u8]) -> Term<'static> {
 impl<'a> Term<'a> {
    /// Append a term to an existing term.
    /// Transforms the initial term into a list if necessary.
    #[must_use]
    pub fn append(self, t: Term<'a>) -> Term<'a> {
        match t.0 {
            T::Err(e) => Term(T::Err(e)),
@ -153,6 +166,7 @@ impl<'a> Term<'a> {
    /// Inserts a key-value pair into a term that is a dictionnary.
    /// Fails if `self` is not a dictionnary.
    #[must_use]
    pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> {
        match v.0 {
            T::Err(e) => Term(T::Err(e)),
@ -165,6 +179,21 @@ impl<'a> Term<'a> {
            },
        }
    }
    /// Makes a NESTED term of this term, by putting it in a dict
    /// with a single key `.`.
    ///
    /// Example:
    ///
    /// ```
    /// use nettext::enc::*;
    ///
    /// assert_eq!(encode(list([string("hello"), string("world")]).nested()).unwrap(), b"{ . = hello world }");
    /// ```
    #[must_use]
    pub fn nested(self) -> Term<'a> {
        dict([(".", self)])
    }
 }
 // ---- encoding function ----
@ -172,15 +201,30 @@ impl<'a> Term<'a> {
 /// Generate the nettext representation of a term
 pub fn encode(t: Term<'_>) -> Result<Vec<u8>, Error> {
    let mut buf = Vec::with_capacity(128);
-    encode_aux(&mut buf, t.0, 0)?;
+    encode_aux(&mut buf, t.0, 0, true)?;
    Ok(buf)
 }
-fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error> {
+fn encode_aux(
    buf: &mut Vec<u8>,
    term: T<'_>,
    indent: usize,
    is_toplevel: bool,
 ) -> Result<(), Error> {
    match term {
        T::Str(s) => buf.extend_from_slice(s),
        T::OwnedStr(s) => buf.extend_from_slice(&s),
        T::Dict(mut d) => {
            if d.is_empty() {
                buf.extend_from_slice(b"{}");
            } else if d.len() == 1 {
                buf.extend_from_slice(b"{ ");
                let (k, v) = d.into_iter().next().unwrap();
                buf.extend_from_slice(k);
                buf.extend_from_slice(b" = ");
                encode_aux(buf, v, indent + 2, false)?;
                buf.extend_from_slice(b" }");
            } else {
                buf.extend_from_slice(b"{\n");
                let indent2 = indent + 2;
                let mut keys = d.keys().cloned().collect::<Vec<_>>();
@ -192,7 +236,7 @@ fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error
                    }
                    buf.extend_from_slice(k);
                    buf.extend_from_slice(b" = ");
-                encode_aux(buf, v, indent2)?;
+                    encode_aux(buf, v, indent2, false)?;
                    buf.extend_from_slice(b",\n");
                }
                for _ in 0..indent {
@ -200,10 +244,11 @@ fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error
                }
                buf.push(b'}');
            }
        }
        T::List(l) => {
            let indent2 = indent + 2;
            for (i, v) in l.into_iter().enumerate() {
-                if buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
+                if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
                    buf.push(b'\n');
                    for _ in 0..indent2 {
                        buf.push(b' ');
@ -211,7 +256,7 @@ fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error
                } else if i > 0 {
                    buf.push(b' ');
                }
-                encode_aux(buf, v, indent2)?;
+                encode_aux(buf, v, indent2, is_toplevel)?;
            }
        }
        T::Err(e) => return Err(e),
@ -244,4 +289,24 @@ mod tests {
        eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
        assert_eq!(&enc, &expected[..]);
    }
    #[test]
    fn nested() {
        assert!(encode(list([
            string("a"),
            string("b"),
            list([string("c"), string("d")])
        ]))
        .is_err());
        assert_eq!(
            encode(list([
                string("a"),
                string("b"),
                list([string("c"), string("d")]).nested()
            ]))
            .unwrap(),
            b"a b { . = c d }"
        );
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -15,6 +15,7 @@
 //!         ("a", string("hello")),
 //!         ("b", string("world")),
 //!         ("c", raw(b"{ a = 12, b = 42 }")),
 //!         ("d", bytes_split(&((0..128u8).collect::<Vec<_>>()))),
 //!     ]),
 //!     keypair.public.term(),
 //! ])).unwrap();
@ -55,21 +56,25 @@
 //!     a = hello,
 //!     b = world,
 //!     c = { a = 12, b = 42 },
-//!   } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4
+//!     d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
 //!       MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
 //!       YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
 //!   } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo
 //! ```
 //!
 //! And the value of `text2` would be as follows:
 //! ```raw
 //! {
-//!   hash = IT4ay3XM4SycgYjxV8_Ioxqqt9JwdFK0sZqd-TOhOl9IGxbTQwK8vPy409h59xCV
+//!   hash = Se6Wmbh3fbFQ9_ilE6zGbxNaEd9v5CHAb30p46Fxpi74iblRb9fXmGAiMkXnSe4DePTwb16zGAz_Ux4ZAG9s3w,
 //!     NrMjDC1YIS7bXIrrv_Tvbw,
 //!   payload = CALL myfunction {
 //!     a = hello,
 //!     b = world,
 //!     c = { a = 12, b = 42 },
-//!   } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4,
+//!     d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
-//!   signature = UFje_N6vnrN23-ygB1yr8LwSipSwxrMLEB2ov6bvU4rR9BmfLjxyq8zTzKxb_VNw
+//!       MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
-//!     UABMRcy-KiITwpY_b3UdBg,
+//!       YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
 //!   } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo,
 //!   signature = 8mo3aeQD7JAdqbDcm7oVdaU0XamDwg03JtC3mfsWhEy_ZkNmWBFZefIDlzBR3XpnF0szTzEwtoPFfnR1fz6fAA,
 //! }
 //! ```
 //!
@ -86,7 +91,7 @@ pub(crate) const DICT_OPEN: u8 = b'{';
 pub(crate) const DICT_CLOSE: u8 = b'}';
 pub(crate) const DICT_ASSIGN: u8 = b'=';
 pub(crate) const DICT_DELIM: u8 = b',';
-pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-*?";
+pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?";
 pub(crate) fn is_string_char(c: u8) -> bool {
    c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)