Switch64 encoding available in serde encoder (TODO decoder)

This commit is contained in:
Alex 2022-12-15 16:47:04 +01:00
parent 1579030b44
commit eae4a0443a
Signed by: lx
GPG Key ID: 0E496D15096376BE
7 changed files with 285 additions and 52 deletions

View File

@ -2,7 +2,7 @@
name = "nettext"
description = "A text-based data format for cryptographic network protocols"
authors = ["Alex Auvolat <alex@adnab.me>"]
version = "0.3.1"
version = "0.3.2"
edition = "2021"
license = "AGPL-3.0"
readme = "README.md"
@ -12,6 +12,7 @@ readme = "README.md"
[dependencies]
nom = "7.1"
base64 = "0.13"
hex = "0.4"
err-derive = "0.3"
dryoc = { version = "0.4", optional = true }

View File

@ -23,7 +23,7 @@ use std::borrow::{Borrow, Cow};
use std::collections::HashMap;
use crate::dec::{self, decode};
use crate::{is_string_char, is_whitespace};
use crate::{is_string_char, is_whitespace, switch64, BytesEncoding};
pub use error::Error;
@ -96,6 +96,71 @@ pub fn raw(bytes: &[u8]) -> Result<'_> {
Ok(Term(T::Str(bytes)))
}
/// Term corresponding to a byte slice,
/// encoding using base64 url-safe encoding without padding.
/// Since empty strings are not possible in nettext,
/// an empty byte string is encoded as an empty list (`[]`).
///
/// Example:
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(bytes(b"hello, world!").encode(), b"aGVsbG8sIHdvcmxkIQ");
/// ```
pub fn bytes(bytes: &[u8]) -> Term<'static> {
bytes_format(bytes, BytesEncoding::Base64 { split: false })
}
/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
/// and encodes each chunk separately, putting them in a sequence of terms.
/// Usefull for long byte slices to have cleaner representations,
/// mainly usefull for dictionnary keys.
pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
bytes_format(bytes, BytesEncoding::Base64 { split: true })
}
pub fn bytes_format(bytes: &[u8], encoding: BytesEncoding) -> Term<'static> {
match encoding {
BytesEncoding::Base64 { .. } | BytesEncoding::Hex { .. } if bytes.is_empty() => {
Term(T::List(vec![]))
}
BytesEncoding::Base64 { split: false } => Term(T::OwnedStr(
base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
)),
BytesEncoding::Base64 { split: true } => {
let chunks = bytes
.chunks(48)
.map(|b| {
T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes())
})
.collect::<Vec<_>>();
if chunks.len() > 1 {
Term(T::Seq(chunks))
} else {
Term(chunks.into_iter().next().unwrap())
}
}
BytesEncoding::Hex { split: false } => Term(T::OwnedStr(hex::encode(bytes).into_bytes())),
BytesEncoding::Hex { split: true } => {
let chunks = bytes
.chunks(32)
.map(|b| T::OwnedStr(hex::encode(b).into_bytes()))
.collect::<Vec<_>>();
if chunks.len() > 1 {
Term(T::Seq(chunks))
} else {
Term(chunks.into_iter().next().unwrap())
}
}
BytesEncoding::Switch64 { allow_whitespace } => {
Term(T::OwnedStr(switch64::encode(bytes, allow_whitespace)))
}
}
}
// ---- composed terms -----
/// Term corresponding to a sequence of terms. Subsequences are banned and will raise an error.
///
/// ```
@ -164,38 +229,6 @@ pub fn dict<'a, I: IntoIterator<Item = (&'a str, Term<'a>)>>(pairs: I) -> Result
Ok(Term(T::Dict(tmp)))
}
/// Term corresponding to a byte slice,
/// encoding using base64 url-safe encoding without padding
///
/// Example:
///
/// ```
/// use nettext::enc::*;
///
/// assert_eq!(bytes(b"hello, world!").encode(), b"aGVsbG8sIHdvcmxkIQ");
/// ```
pub fn bytes(bytes: &[u8]) -> Term<'static> {
Term(T::OwnedStr(
base64::encode_config(bytes, base64::URL_SAFE_NO_PAD).into_bytes(),
))
}
/// Same as `bytes()`, but splits the byte slice in 48-byte chunks
/// and encodes each chunk separately, putting them in a sequence of terms.
/// Usefull for long byte slices to have cleaner representations,
/// mainly usefull for dictionnary keys.
pub fn bytes_split(bytes: &[u8]) -> Term<'static> {
let chunks = bytes
.chunks(48)
.map(|b| T::OwnedStr(base64::encode_config(b, base64::URL_SAFE_NO_PAD).into_bytes()))
.collect::<Vec<_>>();
if chunks.len() > 1 {
Term(T::Seq(chunks))
} else {
Term(chunks.into_iter().next().unwrap_or(T::Str(b".")))
}
}
impl<'a> Term<'a> {
/// Append a term to an existing term.
/// Transforms the initial term into a seq if necessary.

View File

@ -87,6 +87,7 @@
pub mod dec;
pub mod enc;
pub mod switch64;
#[cfg(feature = "dryoc")]
pub mod crypto;
@ -94,6 +95,35 @@ pub mod crypto;
#[cfg(feature = "serde")]
pub mod serde;
/// Possible encodings for byte strings in NetText
#[derive(Clone, Copy)]
pub enum BytesEncoding {
/// Base64 encoding (default)
Base64 { split: bool },
/// Hexadecimal encoding
Hex { split: bool },
/// Switch64 encoding, a mix of plain text and base64
Switch64 { allow_whitespace: bool },
}
impl Default for BytesEncoding {
fn default() -> Self {
BytesEncoding::Base64 { split: true }
}
}
impl BytesEncoding {
pub fn without_whitespace(&self) -> Self {
match self {
BytesEncoding::Base64 { .. } => BytesEncoding::Base64 { split: false },
BytesEncoding::Hex { .. } => BytesEncoding::Hex { split: false },
BytesEncoding::Switch64 { .. } => BytesEncoding::Switch64 {
allow_whitespace: false,
},
}
}
}
// ---- syntactic elements of the data format ----
pub(crate) const DICT_OPEN: u8 = b'{';
@ -103,12 +133,17 @@ pub(crate) const DICT_DELIM: u8 = b',';
pub(crate) const LIST_OPEN: u8 = b'[';
pub(crate) const LIST_CLOSE: u8 = b']';
pub(crate) const LIST_DELIM: u8 = b',';
pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?@:";
pub(crate) const STR_EXTRA_CHARS: &[u8] = b"._-+*?@:/\\";
pub(crate) const SWITCH64_SEPARATOR: u8 = b'\\';
pub(crate) const SWITCH64_EXTRA_CHARS: &[u8] = b"._-+*?@:/";
#[inline]
pub(crate) fn is_string_char(c: u8) -> bool {
c.is_ascii_alphanumeric() || STR_EXTRA_CHARS.contains(&c)
}
#[inline]
pub(crate) fn is_whitespace(c: u8) -> bool {
c.is_ascii_whitespace()
}

View File

@ -14,7 +14,7 @@ use crate::serde::error::{Error, Result};
pub struct Deserializer<'de, 'a>(Term<'de, 'a>);
impl<'de, 'a> Deserializer<'de, 'a> {
fn from_term(input: &'a Term<'de, 'a>) -> Deserializer<'de, 'a> {
pub fn from_term(input: &'a Term<'de, 'a>) -> Deserializer<'de, 'a> {
Deserializer(Term(input.0.mkref()))
}
}

View File

@ -4,6 +4,7 @@ mod de;
mod error;
mod ser;
pub use crate::BytesEncoding;
pub use de::{from_bytes, from_term, Deserializer};
pub use error::{Error, Result};
pub use ser::{to_bytes, to_term, Serializer};
@ -31,6 +32,17 @@ mod tests {
eprintln!("Serialized (concise): {}", ser_concise);
assert_eq!(ser_concise, expected_concise);
assert_eq!(from_bytes::<T>(ser_concise.as_bytes()).unwrap(), input);
let ser_str_hex = input
.serialize(&mut Serializer {
string_format: BytesEncoding::Switch64 {
allow_whitespace: true,
},
bytes_format: BytesEncoding::Hex { split: true },
})
.unwrap()
.encode();
panic!("{}", debug(&ser_str_hex));
}
#[test]

View File

@ -2,17 +2,22 @@ use serde::{ser, Serialize};
use crate::enc::*;
use crate::serde::error::{Error, Result};
use crate::BytesEncoding;
use serde::ser::Error as SerError;
/// Serde serializer for nettext
pub struct Serializer;
#[derive(Clone, Copy, Default)]
pub struct Serializer {
pub string_format: BytesEncoding,
pub bytes_format: BytesEncoding,
}
/// Serialize value to nettext encoder term
pub fn to_term<T>(value: &T) -> Result<Term<'static>>
where
T: Serialize,
{
value.serialize(&mut Serializer)
value.serialize(&mut Serializer::default())
}
/// Serialize value to nettext
@ -20,7 +25,7 @@ pub fn to_bytes<T>(value: &T) -> Result<Vec<u8>>
where
T: Serialize,
{
Ok(value.serialize(&mut Serializer)?.encode())
Ok(value.serialize(&mut Serializer::default())?.encode())
}
impl<'a> ser::Serializer for &'a mut Serializer {
@ -89,11 +94,11 @@ impl<'a> ser::Serializer for &'a mut Serializer {
}
fn serialize_str(self, v: &str) -> Result<Self::Ok> {
Ok(bytes(v.as_bytes()))
Ok(bytes_format(v.as_bytes(), self.string_format))
}
fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok> {
Ok(bytes(v))
Ok(bytes_format(v, self.bytes_format))
}
fn serialize_none(self) -> Result<Self::Ok> {
@ -148,12 +153,16 @@ impl<'a> ser::Serializer for &'a mut Serializer {
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq> {
Ok(ListSerializer { items: vec![] })
Ok(ListSerializer {
items: vec![],
ser: *self,
})
}
fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> {
Ok(SeqSerializer {
items: Vec::with_capacity(len),
ser: *self,
})
}
@ -164,7 +173,7 @@ impl<'a> ser::Serializer for &'a mut Serializer {
) -> Result<Self::SerializeTupleStruct> {
let mut items = Vec::with_capacity(len + 1);
items.push(string(name)?);
Ok(SeqSerializer { items })
Ok(SeqSerializer { items, ser: *self })
}
fn serialize_tuple_variant(
@ -176,13 +185,14 @@ impl<'a> ser::Serializer for &'a mut Serializer {
) -> Result<Self::SerializeTupleVariant> {
let mut items = Vec::with_capacity(len + 1);
items.push(string_owned(format!("{}.{}", name, variant))?);
Ok(SeqSerializer { items })
Ok(SeqSerializer { items, ser: *self })
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
Ok(MapSerializer {
next: None,
fields: vec![],
ser: *self,
})
}
@ -190,6 +200,7 @@ impl<'a> ser::Serializer for &'a mut Serializer {
Ok(StructSerializer {
name,
fields: Vec::with_capacity(len),
ser: *self,
})
}
@ -204,6 +215,7 @@ impl<'a> ser::Serializer for &'a mut Serializer {
name,
variant,
fields: Vec::with_capacity(len),
ser: *self,
})
}
}
@ -212,6 +224,7 @@ impl<'a> ser::Serializer for &'a mut Serializer {
pub struct SeqSerializer {
items: Vec<Term<'static>>,
ser: Serializer,
}
impl ser::SerializeTuple for SeqSerializer {
@ -222,7 +235,7 @@ impl ser::SerializeTuple for SeqSerializer {
where
T: ?Sized + Serialize,
{
self.items.push(value.serialize(&mut Serializer)?);
self.items.push(value.serialize(&mut self.ser)?);
Ok(())
}
@ -239,7 +252,7 @@ impl ser::SerializeTupleStruct for SeqSerializer {
where
T: ?Sized + Serialize,
{
self.items.push(value.serialize(&mut Serializer)?);
self.items.push(value.serialize(&mut self.ser)?);
Ok(())
}
@ -256,7 +269,7 @@ impl ser::SerializeTupleVariant for SeqSerializer {
where
T: ?Sized + Serialize,
{
self.items.push(value.serialize(&mut Serializer)?);
self.items.push(value.serialize(&mut self.ser)?);
Ok(())
}
@ -267,6 +280,7 @@ impl ser::SerializeTupleVariant for SeqSerializer {
pub struct ListSerializer {
items: Vec<Term<'static>>,
ser: Serializer,
}
impl ser::SerializeSeq for ListSerializer {
type Ok = Term<'static>;
@ -276,7 +290,7 @@ impl ser::SerializeSeq for ListSerializer {
where
T: ?Sized + Serialize,
{
self.items.push(value.serialize(&mut Serializer)?);
self.items.push(value.serialize(&mut self.ser)?);
Ok(())
}
@ -288,6 +302,7 @@ impl ser::SerializeSeq for ListSerializer {
pub struct MapSerializer {
next: Option<Vec<u8>>,
fields: Vec<(Vec<u8>, Term<'static>)>,
ser: Serializer,
}
impl ser::SerializeMap for MapSerializer {
@ -298,7 +313,11 @@ impl ser::SerializeMap for MapSerializer {
where
T: ?Sized + Serialize,
{
self.next = Some(key.serialize(&mut Serializer)?.encode());
let mut ser = Serializer {
string_format: self.ser.string_format.without_whitespace(),
bytes_format: self.ser.bytes_format.without_whitespace(),
};
self.next = Some(key.serialize(&mut ser)?.encode());
Ok(())
}
@ -310,7 +329,7 @@ impl ser::SerializeMap for MapSerializer {
self.next
.take()
.ok_or_else(|| Self::Error::custom("no key"))?,
value.serialize(&mut Serializer)?,
value.serialize(&mut self.ser)?,
));
Ok(())
}
@ -323,6 +342,7 @@ impl ser::SerializeMap for MapSerializer {
pub struct StructSerializer {
name: &'static str,
fields: Vec<(&'static str, Term<'static>)>,
ser: Serializer,
}
impl ser::SerializeStruct for StructSerializer {
@ -333,7 +353,7 @@ impl ser::SerializeStruct for StructSerializer {
where
T: ?Sized + Serialize,
{
self.fields.push((key, value.serialize(&mut Serializer)?));
self.fields.push((key, value.serialize(&mut self.ser)?));
Ok(())
}
@ -346,6 +366,7 @@ pub struct StructVariantSerializer {
name: &'static str,
variant: &'static str,
fields: Vec<(&'static str, Term<'static>)>,
ser: Serializer,
}
impl ser::SerializeStructVariant for StructVariantSerializer {
@ -356,7 +377,7 @@ impl ser::SerializeStructVariant for StructVariantSerializer {
where
T: ?Sized + Serialize,
{
self.fields.push((key, value.serialize(&mut Serializer)?));
self.fields.push((key, value.serialize(&mut self.ser)?));
Ok(())
}

131
src/switch64.rs Normal file
View File

@ -0,0 +1,131 @@
//! The Switch64 encoding for text strings
//!
//! Allowed characters are encoded as-is.
//! Others are encoded using base64.
//! Plain parts and base64-encoded parts are separated by a backslasah `\`
use crate::{SWITCH64_EXTRA_CHARS, SWITCH64_SEPARATOR};
pub fn encode(bytes: &[u8], allow_whitespace: bool) -> Vec<u8> {
let mut output = Vec::with_capacity(bytes.len());
let mut pos = 0;
while pos < bytes.len() {
// Determine how many bytes to copy as-is
let cnt = bytes[pos..]
.iter()
.take_while(|c| is_valid_plaintext_char(**c, allow_whitespace))
.count();
// Copy those bytes as-is
output.extend_from_slice(&bytes[pos..pos + cnt]);
pos += cnt;
// If some bytes remain, switch to base64 encoding
if pos < bytes.len() {
output.push(SWITCH64_SEPARATOR);
} else {
break;
}
// Count how many bytes to write as base64
// We stop at the first position where we find three consecutive
// characters to encode as-is
let mut b64end = bytes.len();
for i in pos..bytes.len() - 3 {
if bytes[i..i + 3]
.iter()
.all(|c| is_valid_plaintext_char(*c, allow_whitespace))
{
b64end = i;
break;
}
}
output.extend_from_slice(
base64::encode_config(&bytes[pos..b64end], base64::URL_SAFE_NO_PAD).as_bytes(),
);
pos = b64end;
if pos < bytes.len() {
output.push(SWITCH64_SEPARATOR);
}
}
output
}
pub fn decode(bytes: &[u8]) -> Result<Vec<u8>, base64::DecodeError> {
let mut output = Vec::with_capacity(bytes.len());
let mut pos = 0;
while pos < bytes.len() {
let cnt = bytes[pos..]
.iter()
.take_while(|c| **c != SWITCH64_SEPARATOR)
.count();
output.extend_from_slice(&bytes[pos..pos + cnt]);
pos += cnt + 1;
if pos >= bytes.len() {
break;
}
let cnt = bytes[pos..]
.iter()
.take_while(|c| **c != SWITCH64_SEPARATOR)
.count();
output.extend_from_slice(&base64::decode_config(
&bytes[pos..pos + cnt],
base64::URL_SAFE_NO_PAD,
)?);
pos += cnt + 1;
}
Ok(output)
}
#[inline]
fn is_valid_plaintext_char(c: u8, allow_whitespace: bool) -> bool {
c.is_ascii_alphanumeric()
|| (allow_whitespace && c.is_ascii_whitespace())
|| SWITCH64_EXTRA_CHARS.contains(&c)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::debug;
#[test]
fn test_encode() {
assert_eq!(debug(&encode(&b"hello world"[..], true)), "hello world");
assert_eq!(
debug(&encode(&b"hello, world!"[..], true)),
"hello\\LA\\ world\\IQ"
);
assert_eq!(debug(&encode(&b",;,@$;8"[..], true)), "\\LDssQCQ7OA");
}
#[test]
fn test_decode() {
assert_eq!(debug(&decode(&b"hello world"[..]).unwrap()), "hello world");
assert_eq!(
debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()),
"hello, world!"
);
assert_eq!(debug(&decode(&b"\\LDssQCQ7OA"[..]).unwrap()), ",;,@$;8");
}
#[test]
fn test_encdec() {
for s in [
br#"assert_eq!(debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()), "hello, world!");"#.to_vec(),
br#"- a list, which may contain any number of any kind of terms (can be mixed)"#.to_vec(),
base64::decode("dVcG5EzJqGP/2ZGkVu4ewzfAug1W96tb2KiBOVyPUXfw8uD34DEepW/PPqRzi0HL").unwrap()
] {
assert_eq!(decode(&encode(&s, true)).unwrap(), s);
assert_eq!(decode(&encode(&s, false)).unwrap(), s);
}
}
}