//! Functions to generate nettext representations of data structures //! //! Example: //! //! ``` //! use nettext::enc::*; //! //! let nettext_encoding = seq([ //! string("CALL").unwrap(), //! string("myfunction").unwrap(), //! dict([ //! ("a", string("hello").unwrap()), //! ("b", string("world").unwrap()), //! ("c", raw(b"{ a = 12, b = 42 }").unwrap()), //! ("d", bytes_split(&((0..128u8).collect::>()))), //! ]).unwrap(), //! ]).unwrap().encode(); //! ``` mod error; use std::borrow::{Borrow, Cow}; use std::collections::HashMap; use crate::dec::{self, decode}; use crate::{is_string_char, is_whitespace}; pub use error::Error; /// A term meant to be encoded into a nettext representation pub struct Term<'a>(T<'a>); enum T<'a> { Str(&'a [u8]), OwnedStr(Vec), Dict(HashMap, T<'a>>), List(Vec>), Seq(Vec>), } /// The result type for trying to encode something as nettext pub type Result<'a> = std::result::Result, Error>; // ---- helpers to transform datatypes into encoder terms ---- /// Trait for anything that can be encoded as nettext pub trait Encode { fn term(&self) -> Result<'_>; } impl<'a, 'b> Encode for dec::Term<'a, 'b> { fn term(&self) -> Result<'_> { Ok(Term(T::Str(self.raw()))) } } // ---- helpers to build terms ---- /// Term corresponding to a string (that may contain whitespace) /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(string("Hello world .").unwrap().encode(), b"Hello world ."); /// ``` pub fn string(s: &str) -> Result<'_> { for c in s.as_bytes().iter() { if !(is_string_char(*c) || is_whitespace(*c)) { return Err(Error::InvalidCharacter(*c)); } } Ok(Term(T::Str(s.as_bytes()))) } /// Same as `string` but takes an owned String pub fn string_owned(s: String) -> Result<'static> { for c in s.as_bytes().iter() { if !(is_string_char(*c) || is_whitespace(*c)) { return Err(Error::InvalidCharacter(*c)); } } Ok(Term(T::OwnedStr(s.into_bytes()))) } /// Include a raw nettext value /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(raw(b"Hello { a = b, c = d} .").unwrap().encode(), b"Hello { a = b, c = d} ."); /// ``` pub fn raw(bytes: &[u8]) -> Result<'_> { if decode(bytes).is_err() { return Err(Error::InvalidRaw); } Ok(Term(T::Str(bytes))) } /// Term corresponding to a sequence of terms. Subsequences are banned and will raise an error. /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(seq([ /// string("Hello").unwrap(), /// string("world").unwrap() /// ]).unwrap().encode(), b"Hello world"); /// ``` pub fn seq<'a, I: IntoIterator>>(terms: I) -> Result<'a> { let mut tmp = Vec::with_capacity(8); for t in terms { match t.0 { T::Seq(_) => return Err(Error::SeqInSeq), x => tmp.push(x), } } Ok(Term(T::Seq(tmp))) } /// Term corresponding to a sequence of terms. Sub-sequences are flattenned. pub fn seq_flatten<'a, I: IntoIterator>>(terms: I) -> Term<'a> { let mut tmp = Vec::with_capacity(8); for t in terms { match t.0 { T::Seq(t) => tmp.extend(t), x => tmp.push(x), } } Term(T::Seq(tmp)) } /// Term corresponding to a list of terms. /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(list([ /// string("Hello").unwrap(), /// string("world").unwrap() /// ]).encode(), b"[\n Hello,\n world,\n]"); /// ``` pub fn list<'a, I: IntoIterator>>(terms: I) -> Term<'a> { let terms = terms.into_iter().map(|x| x.0).collect::>(); Term(T::List(terms)) } /// Term corresponding to a dictionnary of items /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(dict([ /// ("a", string("Hello").unwrap()), /// ("b", string("world").unwrap()) /// ]).unwrap().encode(), b"{\n a = Hello,\n b = world,\n}"); /// ``` pub fn dict<'a, I: IntoIterator)>>(pairs: I) -> Result<'a> { let mut tmp = HashMap::new(); for (k, v) in pairs { if tmp.insert(Cow::from(k.as_bytes()), v.0).is_some() { return Err(Error::DuplicateKey(k.to_string())); } } Ok(Term(T::Dict(tmp))) } /// Term corresponding to a byte slice, /// encoding using base64 url-safe encoding without padding /// /// Example: /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(bytes(b"hello, world!").encode(), b"aGVsbG8sIHdvcmxkIQ"); /// ``` pub fn bytes(bytes: &[u8]) -> Term<'static> { Term(T::OwnedStr( base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(), )) } /// Same as `bytes()`, but splits the byte slice in 48-byte chunks /// and encodes each chunk separately, putting them in a sequence of terms. /// Usefull for long byte slices to have cleaner representations, /// mainly usefull for dictionnary keys. pub fn bytes_split(bytes: &[u8]) -> Term<'static> { let chunks = bytes .chunks(48) .map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes())) .collect::>(); if chunks.len() > 1 { Term(T::Seq(chunks)) } else { Term(chunks.into_iter().next().unwrap_or(T::Str(b"."))) } } impl<'a> Term<'a> { /// Append a term to an existing term. /// Transforms the initial term into a seq if necessary. #[must_use] pub fn append(self, t: Term<'a>) -> Term<'a> { match self.0 { T::Seq(mut v) => { v.push(t.0); Term(T::Seq(v)) } x => Term(T::Seq(vec![x, t.0])), } } /// Inserts a key-value pair into a term that is a dictionnary. /// Fails if `self` is not a dictionnary. pub fn insert(self, k: &'a str, v: Term<'a>) -> Result<'a> { match self.0 { T::Dict(mut d) => { if d.insert(Cow::from(k.as_bytes()), v.0).is_some() { return Err(Error::DuplicateKey(k.to_string())); } Ok(Term(T::Dict(d))) } _ => Err(Error::NotADictionnary), } } } // ---- additional internal functions for serde module ---- #[cfg(feature = "serde")] pub(crate) fn dict_owned_u8<'a, I: IntoIterator, Term<'a>)>>( pairs: I, ) -> Result<'a> { let mut tmp = HashMap::new(); for (k, v) in pairs { tmp.insert(Cow::from(k), v.0); } Ok(Term(T::Dict(tmp))) } #[cfg(feature = "serde")] pub(crate) fn safe_raw(bytes: &[u8]) -> Term<'_> { Term(T::Str(bytes)) } #[cfg(feature = "serde")] pub(crate) fn safe_raw_owned(bytes: Vec) -> Term<'static> { Term(T::OwnedStr(bytes)) } // ---- encoding function ---- impl<'a> Term<'a> { /// Generate the nettext representation of a term pub fn encode(self) -> Vec { let mut buf = Vec::with_capacity(128); self.0.encode_aux(&mut buf, 0, true); buf } /// Generate the nettext representation of a term, as a String pub fn encode_string(self) -> String { unsafe { String::from_utf8_unchecked(self.encode()) } } /// Generate the concise nettext representation of a term pub fn encode_concise(self) -> Vec { let mut buf = Vec::with_capacity(128); self.0.encode_concise_aux(&mut buf); buf } } impl<'a> T<'a> { fn encode_aux(self, buf: &mut Vec, indent: usize, is_toplevel: bool) { match self { T::Str(s) => buf.extend_from_slice(s), T::OwnedStr(s) => buf.extend_from_slice(&s), T::Dict(mut d) => { if d.is_empty() { buf.extend_from_slice(b"{}"); } else if d.len() == 1 { let (k, v) = d.into_iter().next().unwrap(); buf.extend_from_slice(b"{ "); buf.extend_from_slice(k.borrow()); buf.extend_from_slice(b" = "); v.encode_aux(buf, indent + 2, false); buf.extend_from_slice(b" }"); } else { buf.extend_from_slice(b"{\n"); let indent2 = indent + 2; let mut keys = d.keys().cloned().collect::>(); keys.sort(); for k in keys { let v = d.remove(&k).unwrap(); for _ in 0..indent2 { buf.push(b' '); } buf.extend_from_slice(k.borrow()); buf.extend_from_slice(b" = "); v.encode_aux(buf, indent2, false); buf.extend_from_slice(b",\n"); } for _ in 0..indent { buf.push(b' '); } buf.push(b'}'); } } T::List(l) => { if l.len() == 0 { buf.extend_from_slice(b"[]"); } else if l.len() == 1 { buf.extend_from_slice(b"[ "); l.into_iter().next().unwrap().encode_aux(buf, indent + 2, false); buf.extend_from_slice(b" ]"); } else { let indent2 = indent + 2; buf.extend_from_slice(b"[\n"); for item in l { for _ in 0..indent2 { buf.push(b' '); } item.encode_aux(buf, indent2, false); buf.extend_from_slice(b",\n"); } for _ in 0..indent { buf.push(b' '); } buf.push(b']'); } } T::Seq(l) => { let indent2 = indent + 2; for (i, v) in l.into_iter().enumerate() { if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 { buf.push(b'\n'); for _ in 0..indent2 { buf.push(b' '); } } else if i > 0 { buf.push(b' '); } v.encode_aux(buf, indent2, is_toplevel); } } } } fn encode_concise_aux(self, buf: &mut Vec) { match self { T::Str(s) => buf.extend_from_slice(s), T::OwnedStr(s) => buf.extend_from_slice(&s), T::Dict(mut d) => { buf.push(b'{'); let mut keys = d.keys().cloned().collect::>(); keys.sort(); for (i, k) in keys.into_iter().enumerate() { if i > 0 { buf.push(b','); } let v = d.remove(&k).unwrap(); buf.extend_from_slice(k.borrow()); buf.push(b'='); v.encode_concise_aux(buf); } buf.push(b'}'); } T::List(l) => { buf.push(b'['); for (i,item) in l.into_iter().enumerate() { if i > 0 { buf.push(b','); } item.encode_concise_aux(buf); } buf.push(b']'); } T::Seq(l) => { for (i, v) in l.into_iter().enumerate() { if i > 0 { buf.push(b' '); } v.encode_concise_aux(buf); } } } } } #[cfg(test)] mod tests { use crate::debug; use super::*; #[test] fn complex1() { let input = seq([ string("HELLO").unwrap(), string("alexhelloworld").unwrap(), list([ string("dude").unwrap(), string("why").unwrap(), ]), dict([ ("from", string("jxx").unwrap()), ("subject", string("hello").unwrap()), ("data", raw(b"{ f1 = plop, f2 = kuko }").unwrap()), ]) .unwrap(), ]) .unwrap(); let expected = "HELLO alexhelloworld [ dude, why, ] { data = { f1 = plop, f2 = kuko }, from = jxx, subject = hello, }"; assert_eq!(debug(&input.encode()), expected); } #[test] fn complex1_concise() { let input = seq([ string("HELLO").unwrap(), string("alexhelloworld").unwrap(), list([ string("dude").unwrap(), string("why").unwrap(), ]), dict([ ("from", string("jxx").unwrap()), ("subject", string("hello").unwrap()), ("data", raw(b"{ f1 = plop, f2 = kuko }").unwrap()), ]) .unwrap(), ]) .unwrap(); let expected_concise = "HELLO alexhelloworld [dude,why] {data={ f1 = plop, f2 = kuko },from=jxx,subject=hello}"; assert_eq!(debug(&input.encode_concise()), expected_concise); } }