nettext/src/enc/mod.rs

313 lines
8.4 KiB
Rust

//! Functions to generate nettext representations of data structures
use std::collections::HashMap;
use crate::dec::{self, decode};
use crate::{is_string_char, is_whitespace};
/// A term meant to be encoded into a nettext representation
pub struct Term<'a>(T<'a>);
enum T<'a> {
Str(&'a [u8]),
OwnedStr(Vec<u8>),
Dict(HashMap<&'a [u8], T<'a>>),
List(Vec<T<'a>>),
Err(Error),
}
/// An error that happenned when creating a nettext encoder term
#[derive(Debug)]
pub enum Error {
InvalidCharacter(u8),
InvalidRaw,
NotADictionnary,
ListInList,
}
// ---- helpers to transform datatypes into encoder terms ----
/// Trait for anything that can be encoded as nettext
pub trait Encode {
fn term(&self) -> Term<'_>;
}
impl<'a, 'b> Encode for dec::Term<'a, 'b> {
fn term(&self) -> Term<'_> {
Term(T::Str(self.raw()))
}
}
// ---- helpers to build terms ----
/// Term corresponding to a string (that may contain whitespace)
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(string("Hello world .")).unwrap(), b"Hello world .");
/// ```
pub fn string(s: &str) -> Term<'_> {
for c in s.as_bytes().iter() {
if !(is_string_char(*c) || is_whitespace(*c)) {
return Term(T::Err(Error::InvalidCharacter(*c)));
}
}
Term(T::Str(s.as_bytes()))
}
/// Include a raw nettext value
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(raw(b"Hello { a = b, c = d} .")).unwrap(), b"Hello { a = b, c = d} .");
/// ```
pub fn raw(bytes: &[u8]) -> Term<'_> {
if decode(bytes).is_err() {
return Term(T::Err(Error::InvalidRaw));
}
Term(T::Str(bytes))
}
/// Term corresponding to a list of terms
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(list([
/// string("Hello"),
/// string("world")
/// ])).unwrap(), b"Hello world");
/// ```
pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Term<'a> {
let mut tmp = Vec::with_capacity(8);
for t in terms {
match t.0 {
T::Err(e) => return Term(T::Err(e)),
T::List(_) => return Term(T::Err(Error::ListInList)),
x => tmp.push(x),
}
}
Term(T::List(tmp))
}
/// Term corresponding to a dictionnary of items
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(dict([
/// ("a", string("Hello")),
/// ("b", string("world"))
/// ])).unwrap(), b"{\n a = Hello,\n b = world,\n}");
/// ```
pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'a> {
let mut tmp = HashMap::new();
for (k, v) in pairs {
match v.0 {
T::Err(e) => return Term(T::Err(e)),
vv => {
tmp.insert(k.as_bytes(), vv);
}
}
}
Term(T::Dict(tmp))
}
/// Term corresponding to a byte slice,
/// encoding using base64 url-safe encoding without padding
///
/// Example:
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ");
/// ```
pub fn bytes(bytes: &[u8]) -> Term<'static> {
Term(T::OwnedStr(
base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
))
}
/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
/// and encodes each chunk separately, putting them in a list of terms.
/// Usefull for long byte slices to have cleaner representations,
/// mainly usefull for dictionnary keys.
pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
let chunks = bytes
.chunks(48)
.map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
.collect::<Vec<_>>();
if chunks.len() > 1 {
Term(T::List(chunks))
} else {
Term(chunks.into_iter().next().unwrap_or(T::Str(b".")))
}
}
impl<'a> Term<'a> {
/// Append a term to an existing term.
/// Transforms the initial term into a list if necessary.
#[must_use]
pub fn append(self, t: Term<'a>) -> Term<'a> {
match t.0 {
T::Err(e) => Term(T::Err(e)),
tt => match self.0 {
T::List(mut v) => {
v.push(tt);
Term(T::List(v))
}
x => Term(T::List(vec![x, tt])),
},
}
}
/// Inserts a key-value pair into a term that is a dictionnary.
/// Fails if `self` is not a dictionnary.
#[must_use]
pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> {
match v.0 {
T::Err(e) => Term(T::Err(e)),
vv => match self.0 {
T::Dict(mut d) => {
d.insert(k.as_bytes(), vv);
Term(T::Dict(d))
}
_ => Term(T::Err(Error::NotADictionnary)),
},
}
}
/// Makes a NESTED term of this term, by putting it in a dict
/// with a single key `.`.
///
/// Example:
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(encode(list([string("hello"), string("world")]).nested()).unwrap(), b"{ . = hello world }");
/// ```
#[must_use]
pub fn nested(self) -> Term<'a> {
dict([(".", self)])
}
}
// ---- encoding function ----
/// Generate the nettext representation of a term
pub fn encode(t: Term<'_>) -> Result<Vec<u8>, Error> {
let mut buf = Vec::with_capacity(128);
encode_aux(&mut buf, t.0, 0, true)?;
Ok(buf)
}
fn encode_aux(
buf: &mut Vec<u8>,
term: T<'_>,
indent: usize,
is_toplevel: bool,
) -> Result<(), Error> {
match term {
T::Str(s) => buf.extend_from_slice(s),
T::OwnedStr(s) => buf.extend_from_slice(&s),
T::Dict(mut d) => {
if d.is_empty() {
buf.extend_from_slice(b"{}");
} else if d.len() == 1 {
buf.extend_from_slice(b"{ ");
let (k, v) = d.into_iter().next().unwrap();
buf.extend_from_slice(k);
buf.extend_from_slice(b" = ");
encode_aux(buf, v, indent + 2, false)?;
buf.extend_from_slice(b" }");
} else {
buf.extend_from_slice(b"{\n");
let indent2 = indent + 2;
let mut keys = d.keys().cloned().collect::<Vec<_>>();
keys.sort();
for k in keys {
let v = d.remove(k).unwrap();
for _ in 0..indent2 {
buf.push(b' ');
}
buf.extend_from_slice(k);
buf.extend_from_slice(b" = ");
encode_aux(buf, v, indent2, false)?;
buf.extend_from_slice(b",\n");
}
for _ in 0..indent {
buf.push(b' ');
}
buf.push(b'}');
}
}
T::List(l) => {
let indent2 = indent + 2;
for (i, v) in l.into_iter().enumerate() {
if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
buf.push(b'\n');
for _ in 0..indent2 {
buf.push(b' ');
}
} else if i > 0 {
buf.push(b' ');
}
encode_aux(buf, v, indent2, is_toplevel)?;
}
}
T::Err(e) => return Err(e),
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn complex1() {
let input = list([
string("HELLO"),
string("alexhelloworld"),
dict([
("from", string("jxx")),
("subject", string("hello")),
("data", raw(b"{ f1 = plop, f2 = kuko }")),
]),
]);
let expected = b"HELLO alexhelloworld {
data = { f1 = plop, f2 = kuko },
from = jxx,
subject = hello,
}";
let enc = encode(input).unwrap();
eprintln!("{}", std::str::from_utf8(&enc).unwrap());
eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
assert_eq!(&enc, &expected[..]);
}
#[test]
fn nested() {
assert!(encode(list([
string("a"),
string("b"),
list([string("c"), string("d")])
]))
.is_err());
assert_eq!(
encode(list([
string("a"),
string("b"),
list([string("c"), string("d")]).nested()
]))
.unwrap(),
b"a b { . = c d }"
);
}
}