Begin working encoder

2022-11-17 17:55:50 +01:00 · 2022-11-17 17:55:50 +01:00 · ab8c3e70c3
commit ab8c3e70c3
parent dc5bcec658
3 changed files with 210 additions and 23 deletions
--- a/src/dec/decode.rs
+++ b/src/dec/decode.rs
@ -9,12 +9,7 @@ use nom::{
 };

 use crate::dec::{debug, AnyTerm, NonListTerm, Term};
-
-const DICT_OPEN: &[u8] = b"{";
-const DICT_CLOSE: &[u8] = b"}";
-const DICT_ASSIGN: &[u8] = b"=";
-const DICT_DELIM: &[u8] = b",";
-const STR_EXTRA_CHARS: &[u8] = b"._-*?";
+use crate::{is_string_char, is_whitespace, DICT_ASSIGN, DICT_CLOSE, DICT_DELIM, DICT_OPEN};

 // ----

@ -51,7 +46,7 @@ impl<'a> From<nom::Err<nom::error::Error<&'a [u8]>>> for DecodeError<'a> {
 // ----

 /// Decodes a nettext string into the term it represents.
-pub fn decode(input: &[u8]) -> std::result::Result<Term<'_, 'static>, DecodeError<'_>> {
+pub fn decode(input: &[u8]) -> std::result::Result<Term<'_, '_>, DecodeError<'_>> {
    let (rest, term) = decode_term(input)?;
    let (end, _) = take_while(is_whitespace)(rest)?;
    if !end.is_empty() {
@ -60,7 +55,7 @@ pub fn decode(input: &[u8]) -> std::result::Result<Term<'_, 'static>, DecodeErro
    Ok(Term(term))
 }

-fn decode_term(input: &[u8]) -> IResult<&'_ [u8], AnyTerm<'_, 'static>> {
+fn decode_term(input: &[u8]) -> IResult<&'_ [u8], AnyTerm<'_, '_>> {
    let (start, _) = take_while(is_whitespace)(input)?;
    let (rest, list) = separated_list1(take_while1(is_whitespace), decode_nonlist_term)(start)?;

@ -73,7 +68,7 @@ fn decode_term(input: &[u8]) -> IResult<&'_ [u8], AnyTerm<'_, 'static>> {
    }
 }

-fn decode_nonlist_term(input: &[u8]) -> IResult<&'_ [u8], NonListTerm<'_, 'static>> {
+fn decode_nonlist_term(input: &[u8]) -> IResult<&'_ [u8], NonListTerm<'_, '_>> {
    let (rest, term) = alt((
        map(decode_str, NonListTerm::Str),
        map(decode_dict, |(raw, d)| NonListTerm::Dict(raw, d)),
@ -86,14 +81,14 @@ fn decode_str(input: &[u8]) -> IResult<&'_ [u8], &'_ [u8]> {
    Ok((rest, data))
 }

-type DictType<'a> = (&'a [u8], HashMap<&'a [u8], AnyTerm<'a, 'static>>);
+type DictType<'a> = (&'a [u8], HashMap<&'a [u8], AnyTerm<'a, 'a>>);

 fn decode_dict(dict_begin: &[u8]) -> IResult<&'_ [u8], DictType<'_>> {
-    let (d, _) = tag(DICT_OPEN)(dict_begin)?;
+    let (d, _) = tag(&[DICT_OPEN][..])(dict_begin)?;
    let (d, items) = separated_list0(dict_separator, decode_dict_item)(d)?;
    let (d, _) = opt(dict_separator)(d)?;
    let (d, _) = take_while(is_whitespace)(d)?;
-    let (dict_end, _) = tag(DICT_CLOSE)(d)?;
+    let (dict_end, _) = tag(&[DICT_CLOSE][..])(d)?;

    let dict = items.into_iter().collect::<HashMap<_, _>>();

@ -105,27 +100,19 @@ fn decode_dict(dict_begin: &[u8]) -> IResult<&'_ [u8], DictType<'_>> {

 fn dict_separator(d: &[u8]) -> IResult<&'_ [u8], ()> {
    let (d, _) = take_while(is_whitespace)(d)?;
-    let (d, _) = tag(DICT_DELIM)(d)?;
+    let (d, _) = tag(&[DICT_DELIM][..])(d)?;
    Ok((d, ()))
 }

-fn decode_dict_item(d: &[u8]) -> IResult<&'_ [u8], (&'_ [u8], AnyTerm<'_, 'static>)> {
+fn decode_dict_item(d: &[u8]) -> IResult<&'_ [u8], (&'_ [u8], AnyTerm<'_, '_>)> {
    let (d, _) = take_while(is_whitespace)(d)?;
    let (d, key) = decode_str(d)?;
    let (d, _) = take_while(is_whitespace)(d)?;
-    let (d, _) = tag(DICT_ASSIGN)(d)?;
+    let (d, _) = tag(&[DICT_ASSIGN][..])(d)?;
    let (d, value) = decode_term(d)?;
    Ok((d, (key, value)))
 }

-fn is_string_char(c: u8) -> bool {
-    c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
-}
-
-fn is_whitespace(c: u8) -> bool {
-    c.is_ascii_whitespace()
-}
-
 // ----

 #[cfg(test)]
--- a/src/enc/mod.rs
+++ b/src/enc/mod.rs
@ -0,0 +1,183 @@
+use std::collections::HashMap;
+
+use crate::dec::decode;
+use crate::{is_string_char, is_whitespace};
+
+pub struct Term<'a>(T<'a>);
+
+enum T<'a> {
+    Str(&'a [u8]),
+    OwnedStr(Vec<u8>),
+    Dict(HashMap<&'a [u8], T<'a>>),
+    List(Vec<T<'a>>),
+}
+
+#[derive(Debug)]
+pub enum Error {
+    InvalidCharacter(u8),
+    InvalidRaw,
+    NotADictionnary,
+}
+
+// ---- helpers to build terms ----
+
+/// Encode a string (may contain whitespace)
+///
+/// ```
+/// use nettext::enc::*;
+///
+/// assert_eq!(encode(&string("Hello world  .").unwrap()), b"Hello world  .");
+/// ```
+pub fn string(s: &str) -> Result<Term<'_>, Error> {
+    for c in s.as_bytes().iter() {
+        if !(is_string_char(*c) || is_whitespace(*c)) {
+            return Err(Error::InvalidCharacter(*c));
+        }
+    }
+    Ok(Term(T::Str(s.as_bytes())))
+}
+
+/// Include a raw nettext value
+///
+/// ```
+/// use nettext::enc::*;
+///
+/// assert_eq!(encode(&raw(b"Hello { a = b, c = d}  .").unwrap()), b"Hello { a = b, c = d}  .");
+/// ```
+pub fn raw(bytes: &[u8]) -> Result<Term<'_>, Error> {
+    if decode(bytes).is_err() {
+        return Err(Error::InvalidRaw);
+    }
+    Ok(Term(T::Str(bytes)))
+}
+
+/// Encode a list of items
+///
+/// ```
+/// use nettext::enc::*;
+///
+/// assert_eq!(encode(&list([
+///		string("Hello").unwrap(),
+///		string("world").unwrap()
+///		])), b"Hello world");
+/// ```
+pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Term<'a> {
+    Term(T::List(terms.into_iter().map(|x| x.0).collect()))
+}
+
+/// Encode a list of items
+///
+/// ```
+/// use nettext::enc::*;
+///
+/// assert_eq!(encode(&dict([
+///		("a", string("Hello").unwrap()),
+///		("b", string("world").unwrap())
+///		])), b"{\n  a = Hello,\n  b = world,\n}");
+/// ```
+pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'a> {
+    Term(T::Dict(
+        pairs
+            .into_iter()
+            .map(|(k, v)| (k.as_bytes(), v.0))
+            .collect(),
+    ))
+}
+
+impl<'a> Term<'a> {
+    pub fn push(self, t: Term<'a>) -> Term<'a> {
+        match self.0 {
+            T::List(mut v) => {
+                v.push(t.0);
+                Term(T::List(v))
+            }
+            x => Term(T::List(vec![x, t.0])),
+        }
+    }
+
+    pub fn insert(self, k: &'a str, v: Term<'a>) -> Result<Term<'a>, Error> {
+        match self.0 {
+            T::Dict(mut d) => {
+                d.insert(k.as_bytes(), v.0);
+                Ok(Term(T::Dict(d)))
+            }
+            _ => Err(Error::NotADictionnary),
+        }
+    }
+}
+
+// ---- encoding function ----
+
+pub fn encode<'a>(t: &Term<'a>) -> Vec<u8> {
+    let mut buf = Vec::with_capacity(128);
+    encode_aux(&mut buf, &t.0, 0);
+    buf
+}
+
+fn encode_aux<'a>(buf: &mut Vec<u8>, term: &T<'a>, indent: usize) {
+    match term {
+        T::Str(s) => buf.extend_from_slice(s),
+        T::OwnedStr(s) => buf.extend_from_slice(&s),
+        T::Dict(d) => {
+            buf.extend_from_slice(b"{\n");
+            let indent2 = indent + 2;
+            let mut keys = d.keys().collect::<Vec<_>>();
+            keys.sort();
+            for k in keys {
+                let v = d.get(k).unwrap();
+                for _ in 0..indent2 {
+                    buf.push(b' ');
+                }
+                buf.extend_from_slice(k);
+                buf.extend_from_slice(b" = ");
+                encode_aux(buf, v, indent2);
+                buf.extend_from_slice(b",\n");
+            }
+            for _ in 0..indent {
+                buf.push(b' ');
+            }
+            buf.push(b'}');
+        }
+        T::List(l) => {
+            let indent2 = indent + 2;
+            for (i, v) in l.iter().enumerate() {
+                if buf.iter().rev().take_while(|c| **c != b'\n').count() > 80 {
+                    buf.push(b'\n');
+                    for _ in 0..indent2 {
+                        buf.push(b' ');
+                    }
+                } else if i > 0 {
+                    buf.push(b' ');
+                }
+                encode_aux(buf, v, indent2);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn complex1() {
+        let input = list([
+            string("HELLO").unwrap(),
+            string("alexhelloworld").unwrap(),
+            dict([
+                ("from", string("jxx").unwrap()),
+                ("subject", string("hello").unwrap()),
+                ("data", raw(b"{ f1 = plop, f2 = kuko }").unwrap()),
+            ]),
+        ]);
+        let expected = b"HELLO alexhelloworld {
+    data = { f1 = plop, f2 = kuko },
+    from = jxx,
+    subject = hello,
+  }";
+        let enc = encode(&input);
+        eprintln!("{}", std::str::from_utf8(&enc).unwrap());
+        eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
+        assert_eq!(encode(&input), expected);
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,2 +1,19 @@
 pub mod crypto;
 pub mod dec;
+pub mod enc;
+
+// ---- syntactic elements of the data format ----
+
+pub(crate) const DICT_OPEN: u8 = b'{';
+pub(crate) const DICT_CLOSE: u8 = b'}';
+pub(crate) const DICT_ASSIGN: u8 = b'=';
+pub(crate) const DICT_DELIM: u8 = b',';
+pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-*?";
+
+pub(crate) fn is_string_char(c: u8) -> bool {
+    c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
+}
+
+pub(crate) fn is_whitespace(c: u8) -> bool {
+    c.is_ascii_whitespace()
+}