nettext/src/switch64.rs

132 lines
3.7 KiB
Rust
Raw Normal View History

//! The Switch64 encoding for text strings
//!
//! Allowed characters are encoded as-is.
//! Others are encoded using base64.
//! Plain parts and base64-encoded parts are separated by a backslasah `\`
use crate::{SWITCH64_EXTRA_CHARS, SWITCH64_SEPARATOR};
pub fn encode(bytes: &[u8], allow_whitespace: bool) -> Vec<u8> {
let mut output = Vec::with_capacity(bytes.len());
let mut pos = 0;
while pos < bytes.len() {
// Determine how many bytes to copy as-is
let cnt = bytes[pos..]
.iter()
.take_while(|c| is_valid_plaintext_char(**c, allow_whitespace))
.count();
// Copy those bytes as-is
output.extend_from_slice(&bytes[pos..pos + cnt]);
pos += cnt;
// If some bytes remain, switch to base64 encoding
if pos < bytes.len() {
output.push(SWITCH64_SEPARATOR);
} else {
break;
}
// Count how many bytes to write as base64
// We stop at the first position where we find three consecutive
// characters to encode as-is
let mut b64end = bytes.len();
2022-12-15 16:03:12 +00:00
for i in pos..bytes.len() {
if i + 3 > bytes.len() {
break;
}
if bytes[i..i + 3]
.iter()
.all(|c| is_valid_plaintext_char(*c, allow_whitespace))
{
b64end = i;
break;
}
}
output.extend_from_slice(
base64::encode_config(&bytes[pos..b64end], base64::URL_SAFE_NO_PAD).as_bytes(),
);
pos = b64end;
if pos < bytes.len() {
output.push(SWITCH64_SEPARATOR);
}
}
output
}
pub fn decode(bytes: &[u8]) -> Result<Vec<u8>, base64::DecodeError> {
let mut output = Vec::with_capacity(bytes.len());
let mut pos = 0;
while pos < bytes.len() {
let cnt = bytes[pos..]
.iter()
.take_while(|c| **c != SWITCH64_SEPARATOR)
.count();
output.extend_from_slice(&bytes[pos..pos + cnt]);
pos += cnt + 1;
if pos >= bytes.len() {
break;
}
let cnt = bytes[pos..]
.iter()
.take_while(|c| **c != SWITCH64_SEPARATOR)
.count();
output.extend_from_slice(&base64::decode_config(
&bytes[pos..pos + cnt],
base64::URL_SAFE_NO_PAD,
)?);
pos += cnt + 1;
}
Ok(output)
}
#[inline]
fn is_valid_plaintext_char(c: u8, allow_whitespace: bool) -> bool {
c.is_ascii_alphanumeric()
|| (allow_whitespace && c.is_ascii_whitespace())
|| SWITCH64_EXTRA_CHARS.contains(&c)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::debug;
#[test]
fn test_encode() {
assert_eq!(debug(&encode(&b"hello world"[..], true)), "hello world");
2023-05-10 14:21:49 +00:00
assert_eq!(debug(&encode(&b"hello, world!"[..], true)), "hello, world!");
}
#[test]
fn test_decode() {
assert_eq!(debug(&decode(&b"hello world"[..]).unwrap()), "hello world");
assert_eq!(
debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()),
"hello, world!"
);
assert_eq!(debug(&decode(&b"\\LDssQCQ7OA"[..]).unwrap()), ",;,@$;8");
}
#[test]
fn test_encdec() {
for s in [
br#"assert_eq!(debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()), "hello, world!");"#.to_vec(),
br#"- a list, which may contain any number of any kind of terms (can be mixed)"#.to_vec(),
2023-05-10 10:05:25 +00:00
base64::decode("dVcG5EzJqGP/2ZGkVu4ewzfAug1W96tb2KiBOVyPUXfw8uD34DEepW/PPqRzi0HL").unwrap(),
br#",;,@$;8"#.to_vec()
] {
assert_eq!(decode(&encode(&s, true)).unwrap(), s);
assert_eq!(decode(&encode(&s, false)).unwrap(), s);
}
}
}