Fixes for bytes, fix outer newlines, NESTED datatypes
This commit is contained in:
parent
22fe9568bd
commit
e7ea915121
4 changed files with 126 additions and 27 deletions
|
@ -17,6 +17,9 @@ A term can be of any of the following kinds:
|
||||||
- a dict, which maps strings (as defined above) to any term type
|
- a dict, which maps strings (as defined above) to any term type
|
||||||
- a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
|
- a list, which is a consecutive sequence of at least 2 strings or dicts (can be mixed), simply separated by whitespace
|
||||||
|
|
||||||
|
Nested lists can be represented using a special dictionnary with a single key, `.`,
|
||||||
|
for instance `TEST a { . = 0 4 2 1 9 7 0 } c`.
|
||||||
|
|
||||||
Dicts are represented as follows:
|
Dicts are represented as follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -64,6 +67,7 @@ Terms can be interpreted in a number of different ways, depending on the context
|
||||||
- a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
|
- a value, which is either the second item in case there are only two items, or the list composed of all items starting from the second if there are more than two
|
||||||
- DICT: if the term is a dict, interpret it as such
|
- DICT: if the term is a dict, interpret it as such
|
||||||
- LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
|
- LIST: if the term is a string or a dict, interpret it as a list composed of that single term. Otherwise, the term is a list, interpret it as a list of terms.
|
||||||
|
- NESTED: if the term is a dict with a single key `.`, interpret it as the term associated to that key
|
||||||
|
|
||||||
## Data mappings
|
## Data mappings
|
||||||
|
|
||||||
|
|
|
@ -356,6 +356,31 @@ impl<'a, 'b> Term<'a, 'b> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks term is a dictionary with a single key `.`,
|
||||||
|
/// and returns the associated value.
|
||||||
|
///
|
||||||
|
/// Example:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use nettext::dec::decode;
|
||||||
|
///
|
||||||
|
/// let term = decode(b"{ . = a b c d e }").unwrap();
|
||||||
|
/// assert_eq!(term.nested().unwrap().raw(), b"a b c d e");
|
||||||
|
/// ```
|
||||||
|
pub fn nested(&self) -> Result<Term<'a, '_>, TypeError> {
|
||||||
|
match self.0.mkref() {
|
||||||
|
AnyTerm::DictRef(_, d) if d.len() == 1 => {
|
||||||
|
let (k, v) = d.iter().next().unwrap();
|
||||||
|
if k != b"." {
|
||||||
|
Err(TypeError::WrongType("NESTED"))
|
||||||
|
} else {
|
||||||
|
Ok(Term(v.mkref()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => Err(TypeError::WrongType("NESTED")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---- TYPE CASTS ----
|
// ---- TYPE CASTS ----
|
||||||
|
|
||||||
/// Try to interpret this str as an i64
|
/// Try to interpret this str as an i64
|
||||||
|
|
105
src/enc/mod.rs
105
src/enc/mod.rs
|
@ -22,6 +22,7 @@ pub enum Error {
|
||||||
InvalidCharacter(u8),
|
InvalidCharacter(u8),
|
||||||
InvalidRaw,
|
InvalidRaw,
|
||||||
NotADictionnary,
|
NotADictionnary,
|
||||||
|
ListInList,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- helpers to transform datatypes into encoder terms ----
|
// ---- helpers to transform datatypes into encoder terms ----
|
||||||
|
@ -84,6 +85,7 @@ pub fn list<'a, I: IntoIterator<Item = Term<'a>>>(terms: I) -> Term<'a> {
|
||||||
for t in terms {
|
for t in terms {
|
||||||
match t.0 {
|
match t.0 {
|
||||||
T::Err(e) => return Term(T::Err(e)),
|
T::Err(e) => return Term(T::Err(e)),
|
||||||
|
T::List(_) => return Term(T::Err(Error::ListInList)),
|
||||||
x => tmp.push(x),
|
x => tmp.push(x),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -113,7 +115,7 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
|
||||||
Term(T::Dict(tmp))
|
Term(T::Dict(tmp))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Term corresponding to a byte slice,
|
/// Term corresponding to a byte slice,
|
||||||
/// encoding using base64 url-safe encoding without padding
|
/// encoding using base64 url-safe encoding without padding
|
||||||
///
|
///
|
||||||
/// Example:
|
/// Example:
|
||||||
|
@ -124,6 +126,16 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Term<'
|
||||||
/// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ");
|
/// assert_eq!(encode(bytes(b"hello, world!")).unwrap(), b"aGVsbG8sIHdvcmxkIQ");
|
||||||
/// ```
|
/// ```
|
||||||
pub fn bytes(bytes: &[u8]) -> Term<'static> {
|
pub fn bytes(bytes: &[u8]) -> Term<'static> {
|
||||||
|
Term(T::OwnedStr(
|
||||||
|
base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
|
||||||
|
/// and encodes each chunk separately, putting them in a list of terms.
|
||||||
|
/// Usefull for long byte slices to have cleaner representations,
|
||||||
|
/// mainly usefull for dictionnary keys.
|
||||||
|
pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
|
||||||
let chunks = bytes
|
let chunks = bytes
|
||||||
.chunks(48)
|
.chunks(48)
|
||||||
.map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
|
.map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
|
||||||
|
@ -138,6 +150,7 @@ pub fn bytes(bytes: &[u8]) -> Term<'static> {
|
||||||
impl<'a> Term<'a> {
|
impl<'a> Term<'a> {
|
||||||
/// Append a term to an existing term.
|
/// Append a term to an existing term.
|
||||||
/// Transforms the initial term into a list if necessary.
|
/// Transforms the initial term into a list if necessary.
|
||||||
|
#[must_use]
|
||||||
pub fn append(self, t: Term<'a>) -> Term<'a> {
|
pub fn append(self, t: Term<'a>) -> Term<'a> {
|
||||||
match t.0 {
|
match t.0 {
|
||||||
T::Err(e) => Term(T::Err(e)),
|
T::Err(e) => Term(T::Err(e)),
|
||||||
|
@ -153,6 +166,7 @@ impl<'a> Term<'a> {
|
||||||
|
|
||||||
/// Inserts a key-value pair into a term that is a dictionnary.
|
/// Inserts a key-value pair into a term that is a dictionnary.
|
||||||
/// Fails if `self` is not a dictionnary.
|
/// Fails if `self` is not a dictionnary.
|
||||||
|
#[must_use]
|
||||||
pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> {
|
pub fn insert(self, k: &'a str, v: Term<'a>) -> Term<'a> {
|
||||||
match v.0 {
|
match v.0 {
|
||||||
T::Err(e) => Term(T::Err(e)),
|
T::Err(e) => Term(T::Err(e)),
|
||||||
|
@ -165,6 +179,21 @@ impl<'a> Term<'a> {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Makes a NESTED term of this term, by putting it in a dict
|
||||||
|
/// with a single key `.`.
|
||||||
|
///
|
||||||
|
/// Example:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use nettext::enc::*;
|
||||||
|
///
|
||||||
|
/// assert_eq!(encode(list([string("hello"), string("world")]).nested()).unwrap(), b"{ . = hello world }");
|
||||||
|
/// ```
|
||||||
|
#[must_use]
|
||||||
|
pub fn nested(self) -> Term<'a> {
|
||||||
|
dict([(".", self)])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- encoding function ----
|
// ---- encoding function ----
|
||||||
|
@ -172,38 +201,54 @@ impl<'a> Term<'a> {
|
||||||
/// Generate the nettext representation of a term
|
/// Generate the nettext representation of a term
|
||||||
pub fn encode(t: Term<'_>) -> Result<Vec<u8>, Error> {
|
pub fn encode(t: Term<'_>) -> Result<Vec<u8>, Error> {
|
||||||
let mut buf = Vec::with_capacity(128);
|
let mut buf = Vec::with_capacity(128);
|
||||||
encode_aux(&mut buf, t.0, 0)?;
|
encode_aux(&mut buf, t.0, 0, true)?;
|
||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error> {
|
fn encode_aux(
|
||||||
|
buf: &mut Vec<u8>,
|
||||||
|
term: T<'_>,
|
||||||
|
indent: usize,
|
||||||
|
is_toplevel: bool,
|
||||||
|
) -> Result<(), Error> {
|
||||||
match term {
|
match term {
|
||||||
T::Str(s) => buf.extend_from_slice(s),
|
T::Str(s) => buf.extend_from_slice(s),
|
||||||
T::OwnedStr(s) => buf.extend_from_slice(&s),
|
T::OwnedStr(s) => buf.extend_from_slice(&s),
|
||||||
T::Dict(mut d) => {
|
T::Dict(mut d) => {
|
||||||
buf.extend_from_slice(b"{\n");
|
if d.is_empty() {
|
||||||
let indent2 = indent + 2;
|
buf.extend_from_slice(b"{}");
|
||||||
let mut keys = d.keys().cloned().collect::<Vec<_>>();
|
} else if d.len() == 1 {
|
||||||
keys.sort();
|
buf.extend_from_slice(b"{ ");
|
||||||
for k in keys {
|
let (k, v) = d.into_iter().next().unwrap();
|
||||||
let v = d.remove(k).unwrap();
|
|
||||||
for _ in 0..indent2 {
|
|
||||||
buf.push(b' ');
|
|
||||||
}
|
|
||||||
buf.extend_from_slice(k);
|
buf.extend_from_slice(k);
|
||||||
buf.extend_from_slice(b" = ");
|
buf.extend_from_slice(b" = ");
|
||||||
encode_aux(buf, v, indent2)?;
|
encode_aux(buf, v, indent + 2, false)?;
|
||||||
buf.extend_from_slice(b",\n");
|
buf.extend_from_slice(b" }");
|
||||||
|
} else {
|
||||||
|
buf.extend_from_slice(b"{\n");
|
||||||
|
let indent2 = indent + 2;
|
||||||
|
let mut keys = d.keys().cloned().collect::<Vec<_>>();
|
||||||
|
keys.sort();
|
||||||
|
for k in keys {
|
||||||
|
let v = d.remove(k).unwrap();
|
||||||
|
for _ in 0..indent2 {
|
||||||
|
buf.push(b' ');
|
||||||
|
}
|
||||||
|
buf.extend_from_slice(k);
|
||||||
|
buf.extend_from_slice(b" = ");
|
||||||
|
encode_aux(buf, v, indent2, false)?;
|
||||||
|
buf.extend_from_slice(b",\n");
|
||||||
|
}
|
||||||
|
for _ in 0..indent {
|
||||||
|
buf.push(b' ');
|
||||||
|
}
|
||||||
|
buf.push(b'}');
|
||||||
}
|
}
|
||||||
for _ in 0..indent {
|
|
||||||
buf.push(b' ');
|
|
||||||
}
|
|
||||||
buf.push(b'}');
|
|
||||||
}
|
}
|
||||||
T::List(l) => {
|
T::List(l) => {
|
||||||
let indent2 = indent + 2;
|
let indent2 = indent + 2;
|
||||||
for (i, v) in l.into_iter().enumerate() {
|
for (i, v) in l.into_iter().enumerate() {
|
||||||
if buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
|
if !is_toplevel && buf.iter().rev().take_while(|c| **c != b'\n').count() >= 70 {
|
||||||
buf.push(b'\n');
|
buf.push(b'\n');
|
||||||
for _ in 0..indent2 {
|
for _ in 0..indent2 {
|
||||||
buf.push(b' ');
|
buf.push(b' ');
|
||||||
|
@ -211,7 +256,7 @@ fn encode_aux(buf: &mut Vec<u8>, term: T<'_>, indent: usize) -> Result<(), Error
|
||||||
} else if i > 0 {
|
} else if i > 0 {
|
||||||
buf.push(b' ');
|
buf.push(b' ');
|
||||||
}
|
}
|
||||||
encode_aux(buf, v, indent2)?;
|
encode_aux(buf, v, indent2, is_toplevel)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
T::Err(e) => return Err(e),
|
T::Err(e) => return Err(e),
|
||||||
|
@ -244,4 +289,24 @@ mod tests {
|
||||||
eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
|
eprintln!("{}", std::str::from_utf8(&expected[..]).unwrap());
|
||||||
assert_eq!(&enc, &expected[..]);
|
assert_eq!(&enc, &expected[..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nested() {
|
||||||
|
assert!(encode(list([
|
||||||
|
string("a"),
|
||||||
|
string("b"),
|
||||||
|
list([string("c"), string("d")])
|
||||||
|
]))
|
||||||
|
.is_err());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
encode(list([
|
||||||
|
string("a"),
|
||||||
|
string("b"),
|
||||||
|
list([string("c"), string("d")]).nested()
|
||||||
|
]))
|
||||||
|
.unwrap(),
|
||||||
|
b"a b { . = c d }"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
19
src/lib.rs
19
src/lib.rs
|
@ -15,6 +15,7 @@
|
||||||
//! ("a", string("hello")),
|
//! ("a", string("hello")),
|
||||||
//! ("b", string("world")),
|
//! ("b", string("world")),
|
||||||
//! ("c", raw(b"{ a = 12, b = 42 }")),
|
//! ("c", raw(b"{ a = 12, b = 42 }")),
|
||||||
|
//! ("d", bytes_split(&((0..128u8).collect::<Vec<_>>()))),
|
||||||
//! ]),
|
//! ]),
|
||||||
//! keypair.public.term(),
|
//! keypair.public.term(),
|
||||||
//! ])).unwrap();
|
//! ])).unwrap();
|
||||||
|
@ -55,21 +56,25 @@
|
||||||
//! a = hello,
|
//! a = hello,
|
||||||
//! b = world,
|
//! b = world,
|
||||||
//! c = { a = 12, b = 42 },
|
//! c = { a = 12, b = 42 },
|
||||||
//! } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4
|
//! d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
|
||||||
|
//! MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
|
||||||
|
//! YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
|
||||||
|
//! } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! And the value of `text2` would be as follows:
|
//! And the value of `text2` would be as follows:
|
||||||
//! ```raw
|
//! ```raw
|
||||||
//! {
|
//! {
|
||||||
//! hash = IT4ay3XM4SycgYjxV8_Ioxqqt9JwdFK0sZqd-TOhOl9IGxbTQwK8vPy409h59xCV
|
//! hash = Se6Wmbh3fbFQ9_ilE6zGbxNaEd9v5CHAb30p46Fxpi74iblRb9fXmGAiMkXnSe4DePTwb16zGAz_Ux4ZAG9s3w,
|
||||||
//! NrMjDC1YIS7bXIrrv_Tvbw,
|
|
||||||
//! payload = CALL myfunction {
|
//! payload = CALL myfunction {
|
||||||
//! a = hello,
|
//! a = hello,
|
||||||
//! b = world,
|
//! b = world,
|
||||||
//! c = { a = 12, b = 42 },
|
//! c = { a = 12, b = 42 },
|
||||||
//! } YutjCfgXXYNkNR1IQiNi3pFKpvqfwICkLc3EJOekcq4,
|
//! d = AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4v
|
||||||
//! signature = UFje_N6vnrN23-ygB1yr8LwSipSwxrMLEB2ov6bvU4rR9BmfLjxyq8zTzKxb_VNw
|
//! MDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5f
|
||||||
//! UABMRcy-KiITwpY_b3UdBg,
|
//! YGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8,
|
||||||
|
//! } 1hUAS2C0lzHXHWIvXqwuhUYVPlu3BbZ7ANLUMH_OYjo,
|
||||||
|
//! signature = 8mo3aeQD7JAdqbDcm7oVdaU0XamDwg03JtC3mfsWhEy_ZkNmWBFZefIDlzBR3XpnF0szTzEwtoPFfnR1fz6fAA,
|
||||||
//! }
|
//! }
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
|
@ -86,7 +91,7 @@ pub(crate) const DICT_OPEN: u8 = b'{';
|
||||||
pub(crate) const DICT_CLOSE: u8 = b'}';
|
pub(crate) const DICT_CLOSE: u8 = b'}';
|
||||||
pub(crate) const DICT_ASSIGN: u8 = b'=';
|
pub(crate) const DICT_ASSIGN: u8 = b'=';
|
||||||
pub(crate) const DICT_DELIM: u8 = b',';
|
pub(crate) const DICT_DELIM: u8 = b',';
|
||||||
pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-*?";
|
pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?";
|
||||||
|
|
||||||
pub(crate) fn is_string_char(c: u8) -> bool {
|
pub(crate) fn is_string_char(c: u8) -> bool {
|
||||||
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
|
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
|
||||||
|
|
Loading…
Reference in a new issue