nettext/src/enc/mod.rs

//! Functions to generate nettext representations of data structures
//!
//! Example:
//!
//! ```
//! use nettext::enc::*;
//!
//! let nettext_encoding = list([
//!     string("CALL").unwrap(),
//!     string("myfunction").unwrap(),
//!     dict([
//!         ("a", string("hello").unwrap()),
//!         ("b", string("world").unwrap()),
//!         ("c", raw(b"{ a = 12, b = 42 }").unwrap()),
//!         ("d", bytes_split(&((0..128u8).collect::<Vec<_>>()))),
//!     ]).unwrap(),
//! ]).unwrap().encode();
//! ```

mod error;

use std::borrow::{Borrow, Cow};
use std::collections::HashMap;

use crate::dec::{self, decode};
use crate::{is_string_char, is_whitespace};

pub use error::Error;

/// A term meant to be encoded into a nettext representation
pub struct Term<'a>(T<'a>);

enum T<'a> {
    Str(&'a [u8]),
    OwnedStr(Vec<u8>),
    Dict(HashMap<Cow<'a, [u8]>, T<'a>>),
    List(Vec<T<'a>>),
}

pub type Result<'a> = std::result::Result<Term<'a>, Error>;

// ---- helpers to transform datatypes into encoder terms ----

/// Trait for anything that can be encoded as nettext
pub trait Encode {
    fn term(&self) -> Result<'_>;
}

impl<'a, 'b> Encode for dec::Term<'a, 'b> {
    fn term(&self) -> Result<'_> {
        Ok(Term(T::Str(self.raw())))
    }
}

// ---- helpers to build terms ----

/// Term corresponding to a string (that may contain whitespace)
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(string("Hello world  .").unwrap().encode(), b"Hello world  .");
/// ```
pub fn string(s: &str) -> Result<'_> {
    for c in s.as_bytes().iter() {
        if !(is_string_char(*c) || is_whitespace(*c)) {
            return Err(Error::InvalidCharacter(*c));
        }
    }
    Ok(Term(T::Str(s.as_bytes())))
}

/// Same as `string` but takes an owned String
pub fn string_owned(s: String) -> Result<'static> {
    for c in s.as_bytes().iter() {
        if !(is_string_char(*c) || is_whitespace(*c)) {
            return Err(Error::InvalidCharacter(*c));
        }
    }
    Ok(Term(T::OwnedStr(s.into_bytes())))
}

/// Include a raw nettext value
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(raw(b"Hello { a = b, c = d}  .").unwrap().encode(), b"Hello { a = b, c = d}  .");
/// ```
pub fn raw(bytes: &[u8]) -> Result<'_> {
    if decode(bytes).is_err() {
        return Err(Error::InvalidRaw);
    }
    Ok(Term(T::Str(bytes)))
}

/// Term corresponding to a list of terms
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(list([
///     string("Hello").unwrap(),
///     string("world").unwrap()
/// ]).unwrap().encode(), b"Hello world");
/// ```
pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Result<'a> {
    let mut tmp = Vec::with_capacity(8);
    for t in terms {
        match t.0 {
            T::List(_) => return Err(Error::ListInList),
            x => tmp.push(x),
        }
    }
    Ok(Term(T::List(tmp)))
}

/// Term corresponding to a list of terms. Sub-lists are flattenned.
pub fn list_flatten<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Result<'a> {
    let mut tmp = Vec::with_capacity(8);
    for t in terms {
        match t.0 {
            T::List(t) => tmp.extend(t),
            x => tmp.push(x),
        }
    }
    Ok(Term(T::List(tmp)))
}

/// Term corresponding to a list of terms. Sub-lists are represented as NESTED: `{.= sub list items }`.
pub fn list_nested<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Result<'a> {
    let mut tmp = Vec::with_capacity(8);
    for t in terms {
        match t.0 {
            T::List(t) => tmp.push(Term(T::List(t)).nested().0),
            x => tmp.push(x),
        }
    }
    Ok(Term(T::List(tmp)))
}

/// Term corresponding to a dictionnary of items
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(dict([
///     ("a", string("Hello").unwrap()),
///     ("b", string("world").unwrap())
/// ]).unwrap().encode(), b"{\n  a = Hello,\n  b = world,\n}");
/// ```
pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Result<'a> {
    let mut tmp = HashMap::new();
    for (k, v) in pairs {
        if tmp.insert(Cow::from(k.as_bytes()), v.0).is_some() {
            return Err(Error::DuplicateKey(k.to_string()));
        }
    }
    Ok(Term(T::Dict(tmp)))
}

/// Term corresponding to a byte slice,
/// encoding using base64 url-safe encoding without padding
///
/// Example:
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(bytes(b"hello, world!").encode(), b"aGVsbG8sIHdvcmxkIQ");
/// ```
pub fn bytes(bytes: &[u8]) -> Term<'static> {
    Term(T::OwnedStr(
        base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
    ))
}

/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
/// and encodes each chunk separately, putting them in a list of terms.
/// Usefull for long byte slices to have cleaner representations,
/// mainly usefull for dictionnary keys.
pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
    let chunks = bytes
        .chunks(48)
        .map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
        .collect::<Vec<_>>();
    if chunks.len() > 1 {
        Term(T::List(chunks))
    } else {
        Term(chunks.into_iter().next().unwrap_or(T::Str(b".")))
    }
}

impl<'a> Term<'a> {
    /// Append a term to an existing term.
    /// Transforms the initial term into a list if necessary.
    #[must_use]
    pub fn append(self, t: Term<'a>) -> Term<'a> {
        match self.0 {
            T::List(mut v) => {
                v.push(t.0);
                Term(T::List(v))
            }
            x => Term(T::List(vec![x, t.0])),
        }
    }

    /// Inserts a key-value pair into a term that is a dictionnary.
    /// Fails if `self` is not a dictionnary.
    pub fn insert(self, k: &'a str, v: Term<'a>) -> Result<'a> {
        match self.0 {
            T::Dict(mut d) => {
                if d.insert(Cow::from(k.as_bytes()), v.0).is_some() {
                    return Err(Error::DuplicateKey(k.to_string()));
                }
                Ok(Term(T::Dict(d)))
            }
            _ => Err(Error::NotADictionnary),
        }
    }

    /// Makes a NESTED term of this term, by putting it in a dict
    /// with a single key `.`.
    ///
    /// Example:
    ///
    /// ```
    /// use nettext::enc::*;
    ///
    /// assert_eq!(list([string("hello").unwrap(), string("world").unwrap()]).unwrap().nested().encode(), b"{.= hello world }");
    /// ```
    #[must_use]
    pub fn nested(self) -> Term<'a> {
        dict([(".", self)]).unwrap()
    }
}

// ---- additional internal functions for serde module ----

#[cfg(feature = "serde")]
pub(crate) fn dict_owned_u8<'a, I: IntoIterator<Item = (Vec<u8>, Term<'a>)>>(
    pairs: I,
) -> Result<'a> {
    let mut tmp = HashMap::new();
    for (k, v) in pairs {
        tmp.insert(Cow::from(k), v.0);
    }
    Ok(Term(T::Dict(tmp)))
}

#[cfg(feature = "serde")]
pub(crate) fn safe_raw(bytes: &[u8]) -> Term<'_> {
    Term(T::Str(bytes))
}

#[cfg(feature = "serde")]
pub(crate) fn safe_raw_owned(bytes: Vec<u8>) -> Term<'static> {
    Term(T::OwnedStr(bytes))
}

// ---- encoding function ----

impl<'a> Term<'a> {
    /// Generate the nettext representation of a term
    pub fn encode(self) -> Vec<u8> {
        let mut buf = Vec::with_capacity(128);
        self.0.encode_aux(&mut buf, 0, true);
        buf
    }
}

impl<'a> T<'a> {
    fn encode_aux(self, buf: &mut Vec<u8>, indent: usize, is_toplevel: bool) {
        match self {
            T::Str(s) => buf.extend_from_slice(s),
            T::OwnedStr(s) => buf.extend_from_slice(&s),
            T::Dict(mut d) => {
                if d.is_empty() {
                    buf.extend_from_slice(b"{}");
                } else if d.len() == 1 {
                    let (k, v) = d.into_iter().next().unwrap();
                    if k.as_ref() == b"." {
                        buf.extend_from_slice(b"{.= ");
                    } else {
                        buf.extend_from_slice(b"{ ");
                        buf.extend_from_slice(k.borrow());
                        buf.extend_from_slice(b" = ");
                    }
                    v.encode_aux(buf, indent + 2, false);
                    buf.extend_from_slice(b" }");
                } else {
                    buf.extend_from_slice(b"{\n");
                    let indent2 = indent + 2;
                    let mut keys = d.keys().cloned().collect::<Vec<_>>();
                    keys.sort();
                    for k in keys {
                        let v = d.remove(&k).unwrap();
                        for _ in 0..indent2 {
                            buf.push(b' ');
                        }
                        buf.extend_from_slice(k.borrow());
                        buf.extend_from_slice(b" = ");
                        v.encode_aux(buf, indent2, false);
                        buf.extend_from_slice(b",\n");
                    }
                    for _ in 0..indent {
                        buf.push(b' ');
                    }
                    buf.push(b'}');
                }
            }
            T::List(l) => {
                let indent2 = indent + 2;
                for (i, v) in l.into_iter().enumerate() {
                    if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
                        buf.push(b'\n');
                        for _ in 0..indent2 {
                            buf.push(b' ');
                        }
                    } else if i > 0 {
                        buf.push(b' ');
                    }
                    v.encode_aux(buf, indent2, is_toplevel);
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn complex1() {
        let input = list([
            string("HELLO").unwrap(),
            string("alexhelloworld").unwrap(),
            dict([
                ("from", string("jxx").unwrap()),
                ("subject", string("hello").unwrap()),
                ("data", raw(b"{ f1 = plop, f2 = kuko }").unwrap()),
            ])
            .unwrap(),
        ])
        .unwrap();
        let expected = b"HELLO alexhelloworld {
    data = { f1 = plop, f2 = kuko },
    from = jxx,
    subject = hello,
  }";
        let enc = input.encode();
        eprintln!("{}", std::str::from_utf8(&enc).unwrap());
        eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
        assert_eq!(&enc, &expected[..]);
    }

    #[test]
    fn nested() {
        assert!(list([
            string("a").unwrap(),
            string("b").unwrap(),
            list([string("c").unwrap(), string("d").unwrap()]).unwrap()
        ])
        .is_err());

        assert_eq!(
            list([
                string("a").unwrap(),
                string("b").unwrap(),
                list([string("c").unwrap(), string("d").unwrap()])
                    .unwrap()
                    .nested()
            ])
            .unwrap()
            .encode(),
            b"a b {.= c d }"
        );
    }
}