//! Functions to generate nettext representations of data structures //! //! Example: //! //! ``` //! use nettext::enc::*; //! //! let nettext_encoding = seq([ //! string("CALL").unwrap(), //! string("myfunction").unwrap(), //! dict([ //! ("a", string("hello").unwrap()), //! ("b", string("world").unwrap()), //! ("c", raw(b"{ a = 12; b = 42 }").unwrap()), //! ("d", bytes_split(&((0..128u8).collect::>()))), //! ]).unwrap(), //! ]).unwrap().encode(); //! ``` mod error; use std::borrow::{Borrow, Cow}; use std::collections::HashMap; use crate::*; use crate::dec::{self, decode}; use crate::{is_string_char, is_whitespace, BytesEncoding}; pub use error::Error; /// A term meant to be encoded into a nettext representation pub struct Term<'a>(T<'a>); enum T<'a> { Str(&'a [u8]), OwnedStr(Vec), Dict(HashMap, T<'a>>), List(Vec>), Seq(Vec>), } /// The result type for trying to encode something as nettext pub type Result<'a> = std::result::Result, Error>; // ---- helpers to transform datatypes into encoder terms ---- /// Trait for anything that can be encoded as nettext pub trait Encode { fn term(&self) -> Result<'_>; } impl<'a, 'b> Encode for dec::Term<'a, 'b> { fn term(&self) -> Result<'_> { Ok(Term(T::Str(self.raw()))) } } // ---- helpers to build terms ---- /// Term corresponding to a string (that may contain whitespace) /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(string("Hello world .").unwrap().encode(), b"Hello world ."); /// ``` pub fn string(s: &str) -> Result<'_> { for c in s.as_bytes().iter() { if !(is_string_char(*c) || is_whitespace(*c)) { return Err(Error::InvalidCharacter(*c)); } } Ok(Term(T::Str(s.as_bytes()))) } /// Same as `string` but takes an owned String pub fn string_owned(s: String) -> Result<'static> { for c in s.as_bytes().iter() { if !(is_string_char(*c) || is_whitespace(*c)) { return Err(Error::InvalidCharacter(*c)); } } Ok(Term(T::OwnedStr(s.into_bytes()))) } /// Include a raw nettext value /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(raw(b"Hello { a = b; c = d} .").unwrap().encode(), b"Hello { a = b; c = d} ."); /// ``` pub fn raw(bytes: &[u8]) -> Result<'_> { if decode(bytes).is_err() { return Err(Error::InvalidRaw); } Ok(Term(T::Str(bytes))) } /// Term corresponding to a byte slice, /// encoding using base64 url-safe encoding without padding. /// Since empty strings are not possible in nettext, /// an empty byte string is encoded as an empty list (`[]`). /// /// Example: /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(bytes(b"hello, world!").encode(), b"aGVsbG8sIHdvcmxkIQ"); /// ``` pub fn bytes(bytes: &[u8]) -> Term<'static> { bytes_format(bytes, BytesEncoding::Base64 { split: false }) } /// Same as `bytes()`, but splits the byte slice in 48-byte chunks /// and encodes each chunk separately, putting them in a sequence of terms. /// Usefull for long byte slices to have cleaner representations, /// mainly usefull for dictionnary keys. pub fn bytes_split(bytes: &[u8]) -> Term<'static> { bytes_format(bytes, BytesEncoding::Base64 { split: true }) } pub fn bytes_format(bytes: &[u8], encoding: BytesEncoding) -> Term<'static> { match encoding { BytesEncoding::Base64 { .. } | BytesEncoding::Hex { .. } if bytes.is_empty() => { Term(T::List(vec![])) } BytesEncoding::Base64 { split: false } => Term(T::OwnedStr( base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(), )), BytesEncoding::Base64 { split: true } => { let chunks = bytes .chunks(48) .map(|b| { T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()) }) .collect::>(); if chunks.len() > 1 { Term(T::Seq(chunks)) } else { Term(chunks.into_iter().next().unwrap()) } } BytesEncoding::Hex { split: false } => Term(T::OwnedStr(hex::encode(bytes).into_bytes())), BytesEncoding::Hex { split: true } => { let chunks = bytes .chunks(32) .map(|b| T::OwnedStr(hex::encode(b).into_bytes())) .collect::>(); if chunks.len() > 1 { Term(T::Seq(chunks)) } else { Term(chunks.into_iter().next().unwrap()) } } } } // ---- composed terms ----- /// Term corresponding to a sequence of terms. Subsequences are banned and will raise an error. /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(seq([ /// string("Hello").unwrap(), /// string("world").unwrap() /// ]).unwrap().encode(), b"Hello world"); /// ``` pub fn seq<'a, I: IntoIterator>>(terms: I) -> Result<'a> { let mut tmp = Vec::with_capacity(8); for t in terms { match t.0 { T::Seq(_) => return Err(Error::SeqInSeq), x => tmp.push(x), } } Ok(Term(T::Seq(tmp))) } /// Term corresponding to a sequence of terms. Sub-sequences are flattenned. pub fn seq_flatten<'a, I: IntoIterator>>(terms: I) -> Term<'a> { let mut tmp = Vec::with_capacity(8); for t in terms { match t.0 { T::Seq(t) => tmp.extend(t), x => tmp.push(x), } } Term(T::Seq(tmp)) } /// Term corresponding to a list of terms. /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(list([ /// string("Hello").unwrap(), /// string("world").unwrap() /// ]).encode(), b"[\n Hello;\n world;\n]"); /// ``` pub fn list<'a, I: IntoIterator>>(terms: I) -> Term<'a> { let terms = terms.into_iter().map(|x| x.0).collect::>(); Term(T::List(terms)) } /// Term corresponding to a dictionnary of items /// /// ``` /// use nettext::enc::*; /// /// assert_eq!(dict([ /// ("a", string("Hello").unwrap()), /// ("b", string("world").unwrap()) /// ]).unwrap().encode(), b"{\n a = Hello;\n b = world;\n}"); /// ``` pub fn dict<'a, I: IntoIterator)>>(pairs: I) -> Result<'a> { let mut tmp = HashMap::new(); for (k, v) in pairs { if tmp.insert(Cow::from(k.as_bytes()), v.0).is_some() { return Err(Error::DuplicateKey(k.to_string())); } } Ok(Term(T::Dict(tmp))) } impl<'a> Term<'a> { /// Append a term to an existing term. /// Transforms the initial term into a seq if necessary. #[must_use] pub fn append(self, t: Term<'a>) -> Term<'a> { match self.0 { T::Seq(mut v) => { v.push(t.0); Term(T::Seq(v)) } x => Term(T::Seq(vec![x, t.0])), } } /// Inserts a key-value pair into a term that is a dictionnary. /// Fails if `self` is not a dictionnary. pub fn insert(self, k: &'a str, v: Term<'a>) -> Result<'a> { match self.0 { T::Dict(mut d) => { if d.insert(Cow::from(k.as_bytes()), v.0).is_some() { return Err(Error::DuplicateKey(k.to_string())); } Ok(Term(T::Dict(d))) } _ => Err(Error::NotADictionnary), } } } // ---- additional internal functions for serde module ---- #[cfg(feature = "serde")] pub(crate) fn dict_owned_u8<'a, I: IntoIterator, Term<'a>)>>( pairs: I, ) -> Result<'a> { let mut tmp = HashMap::new(); for (k, v) in pairs { tmp.insert(Cow::from(k), v.0); } Ok(Term(T::Dict(tmp))) } #[cfg(feature = "serde")] pub(crate) fn safe_raw(bytes: &[u8]) -> Term<'_> { Term(T::Str(bytes)) } #[cfg(feature = "serde")] pub(crate) fn safe_raw_owned(bytes: Vec) -> Term<'static> { Term(T::OwnedStr(bytes)) } // ---- encoding function ---- impl<'a> Term<'a> { /// Generate the nettext representation of a term pub fn encode(self) -> Vec { let mut buf = Vec::with_capacity(128); self.0.encode_aux(&mut buf, 0, true); buf } /// Generate the nettext representation of a term, as a String pub fn encode_string(self) -> String { unsafe { String::from_utf8_unchecked(self.encode()) } } /// Generate the concise nettext representation of a term pub fn encode_concise(self) -> Vec { let mut buf = Vec::with_capacity(128); self.0.encode_concise_aux(&mut buf); buf } } impl<'a> T<'a> { fn encode_aux(self, buf: &mut Vec, indent: usize, is_toplevel: bool) { match self { T::Str(s) => buf.extend_from_slice(s), T::OwnedStr(s) => buf.extend_from_slice(&s), T::Dict(mut d) => { if d.is_empty() { buf.extend_from_slice(&[DICT_OPEN, DICT_CLOSE]); } else if d.len() == 1 { let (k, v) = d.into_iter().next().unwrap(); buf.extend_from_slice(&[DICT_OPEN, b' ']); buf.extend_from_slice(k.borrow()); buf.extend_from_slice(&[b' ', DICT_ASSIGN, b' ']); v.encode_aux(buf, indent + 2, false); buf.extend_from_slice(&[b' ', DICT_CLOSE]); } else { buf.extend_from_slice(&[DICT_OPEN, b'\n']); let indent2 = indent + 2; let mut keys = d.keys().cloned().collect::>(); keys.sort(); for k in keys { let v = d.remove(&k).unwrap(); for _ in 0..indent2 { buf.push(b' '); } buf.extend_from_slice(k.borrow()); buf.extend_from_slice(&[b' ', DICT_ASSIGN, b' ']); v.encode_aux(buf, indent2, false); buf.extend_from_slice(&[DICT_DELIM, b'\n']); } for _ in 0..indent { buf.push(b' '); } buf.push(DICT_CLOSE); } } T::List(l) => { if l.len() == 0 { buf.extend_from_slice(&[LIST_OPEN, LIST_CLOSE]); } else if l.len() == 1 { buf.extend_from_slice(&[LIST_OPEN, b' ']); l.into_iter() .next() .unwrap() .encode_aux(buf, indent + 2, false); buf.extend_from_slice(&[b' ', LIST_CLOSE]); } else { let indent2 = indent + 2; buf.extend_from_slice(&[LIST_OPEN, b'\n']); for item in l { for _ in 0..indent2 { buf.push(b' '); } item.encode_aux(buf, indent2, false); buf.extend_from_slice(&[LIST_DELIM, b'\n']); } for _ in 0..indent { buf.push(b' '); } buf.push(LIST_CLOSE); } } T::Seq(l) => { let indent2 = indent + 2; for (i, v) in l.into_iter().enumerate() { if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 { buf.push(b'\n'); for _ in 0..indent2 { buf.push(b' '); } } else if i > 0 { buf.push(b' '); } v.encode_aux(buf, indent2, is_toplevel); } } } } fn encode_concise_aux(self, buf: &mut Vec) { match self { T::Str(s) => buf.extend_from_slice(s), T::OwnedStr(s) => buf.extend_from_slice(&s), T::Dict(mut d) => { buf.push(DICT_OPEN); let mut keys = d.keys().cloned().collect::>(); keys.sort(); for (i, k) in keys.into_iter().enumerate() { if i > 0 { buf.push(DICT_DELIM); } let v = d.remove(&k).unwrap(); buf.extend_from_slice(k.borrow()); buf.push(DICT_ASSIGN); v.encode_concise_aux(buf); } buf.push(DICT_CLOSE); } T::List(l) => { buf.push(LIST_OPEN); for (i, item) in l.into_iter().enumerate() { if i > 0 { buf.push(LIST_DELIM); } item.encode_concise_aux(buf); } buf.push(LIST_CLOSE); } T::Seq(l) => { for (i, v) in l.into_iter().enumerate() { if i > 0 { buf.push(b' '); } v.encode_concise_aux(buf); } } } } } #[cfg(test)] mod tests { use super::*; use crate::debug; #[test] fn complex1() { let input = seq([ string("HELLO").unwrap(), string("alexhelloworld").unwrap(), list([string("dude").unwrap(), string("why").unwrap()]), dict([ ("from", string("jxx").unwrap()), ("subject", string("hello").unwrap()), ("data", raw(b"{ f1 = plop; f2 = kuko }").unwrap()), ]) .unwrap(), ]) .unwrap(); let expected = "HELLO alexhelloworld [ dude; why; ] { data = { f1 = plop; f2 = kuko }; from = jxx; subject = hello; }"; assert_eq!(debug(&input.encode()), expected); } #[test] fn complex1_concise() { let input = seq([ string("HELLO").unwrap(), string("alexhelloworld").unwrap(), list([string("dude").unwrap(), string("why").unwrap()]), dict([ ("from", string("jxx").unwrap()), ("subject", string("hello").unwrap()), ("data", raw(b"{ f1 = plop; f2 = kuko }").unwrap()), ]) .unwrap(), ]) .unwrap(); let expected_concise = "HELLO alexhelloworld [dude;why] {data={ f1 = plop; f2 = kuko };from=jxx;subject=hello}"; assert_eq!(debug(&input.encode_concise()), expected_concise); } }