nettext/src/switch64.rs

//! The Switch64 encoding for text strings
//!
//! Allowed characters are encoded as-is.
//! Others are encoded using base64.
//! Plain parts and base64-encoded parts are separated by a backslasah `\`

use crate::{SWITCH64_EXTRA_CHARS, SWITCH64_SEPARATOR};

pub fn encode(bytes: &[u8], allow_whitespace: bool) -> Vec<u8> {
    let mut output = Vec::with_capacity(bytes.len());

    let mut pos = 0;
    while pos < bytes.len() {
        // Determine how many bytes to copy as-is
        let cnt = bytes[pos..]
            .iter()
            .take_while(|c| is_valid_plaintext_char(**c, allow_whitespace))
            .count();

        // Copy those bytes as-is
        output.extend_from_slice(&bytes[pos..pos + cnt]);
        pos += cnt;

        // If some bytes remain, switch to base64 encoding
        if pos < bytes.len() {
            output.push(SWITCH64_SEPARATOR);
        } else {
            break;
        }

        // Count how many bytes to write as base64
        // We stop at the first position where we find three consecutive
        // characters to encode as-is
        let mut b64end = bytes.len();
        for i in pos..bytes.len() {
            if i + 3 > bytes.len() {
                break;
            }
            if bytes[i..i + 3]
                .iter()
                .all(|c| is_valid_plaintext_char(*c, allow_whitespace))
            {
                b64end = i;
                break;
            }
        }

        output.extend_from_slice(
            base64::encode_config(&bytes[pos..b64end], base64::URL_SAFE_NO_PAD).as_bytes(),
        );
        pos = b64end;

        if pos < bytes.len() {
            output.push(SWITCH64_SEPARATOR);
        }
    }

    output
}

pub fn decode(bytes: &[u8]) -> Result<Vec<u8>, base64::DecodeError> {
    let mut output = Vec::with_capacity(bytes.len());

    let mut pos = 0;
    while pos < bytes.len() {
        let cnt = bytes[pos..]
            .iter()
            .take_while(|c| **c != SWITCH64_SEPARATOR)
            .count();
        output.extend_from_slice(&bytes[pos..pos + cnt]);
        pos += cnt + 1;

        if pos >= bytes.len() {
            break;
        }

        let cnt = bytes[pos..]
            .iter()
            .take_while(|c| **c != SWITCH64_SEPARATOR)
            .count();
        output.extend_from_slice(&base64::decode_config(
            &bytes[pos..pos + cnt],
            base64::URL_SAFE_NO_PAD,
        )?);
        pos += cnt + 1;
    }

    Ok(output)
}

#[inline]
fn is_valid_plaintext_char(c: u8, allow_whitespace: bool) -> bool {
    c.is_ascii_alphanumeric()
        || (allow_whitespace && c.is_ascii_whitespace())
        || SWITCH64_EXTRA_CHARS.contains(&c)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::debug;

    #[test]
    fn test_encode() {
        assert_eq!(debug(&encode(&b"hello world"[..], true)), "hello world");
        assert_eq!(debug(&encode(&b"hello, world!"[..], true)), "hello, world!");
    }

    #[test]
    fn test_decode() {
        assert_eq!(debug(&decode(&b"hello world"[..]).unwrap()), "hello world");
        assert_eq!(
            debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()),
            "hello, world!"
        );
        assert_eq!(debug(&decode(&b"\\LDssQCQ7OA"[..]).unwrap()), ",;,@$;8");
    }

    #[test]
    fn test_encdec() {
        for s in [
			br#"assert_eq!(debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()), "hello, world!");"#.to_vec(),
			br#"- a list, which may contain any number of any kind of terms (can be mixed)"#.to_vec(),
			base64::decode("dVcG5EzJqGP/2ZGkVu4ewzfAug1W96tb2KiBOVyPUXfw8uD34DEepW/PPqRzi0HL").unwrap(),
            br#",;,@$;8"#.to_vec()
		] {
			assert_eq!(decode(&encode(&s, true)).unwrap(), s);
			assert_eq!(decode(&encode(&s, false)).unwrap(), s);
		}
    }
}
Switch64 encoding available in serde encoder (TODO decoder) 2022-12-15 15:47:04 +00:00			`//! The Switch64 encoding for text strings`
			`//!`
			`//! Allowed characters are encoded as-is.`
			`//! Others are encoded using base64.`
			//! Plain parts and base64-encoded parts are separated by a backslasah `\`

			`use crate::{SWITCH64_EXTRA_CHARS, SWITCH64_SEPARATOR};`

			`pub fn encode(bytes: &[u8], allow_whitespace: bool) -> Vec<u8> {`
			`let mut output = Vec::with_capacity(bytes.len());`

			`let mut pos = 0;`
			`while pos < bytes.len() {`
			`// Determine how many bytes to copy as-is`
			`let cnt = bytes[pos..]`
			`.iter()`
			`.take_while(\|c\| is_valid_plaintext_char(**c, allow_whitespace))`
			`.count();`

			`// Copy those bytes as-is`
			`output.extend_from_slice(&bytes[pos..pos + cnt]);`
			`pos += cnt;`

			`// If some bytes remain, switch to base64 encoding`
			`if pos < bytes.len() {`
			`output.push(SWITCH64_SEPARATOR);`
			`} else {`
			`break;`
			`}`

			`// Count how many bytes to write as base64`
			`// We stop at the first position where we find three consecutive`
			`// characters to encode as-is`
			`let mut b64end = bytes.len();`
fix substraction overflow 2022-12-15 16:03:12 +00:00			`for i in pos..bytes.len() {`
			`if i + 3 > bytes.len() {`
			`break;`
			`}`
Switch64 encoding available in serde encoder (TODO decoder) 2022-12-15 15:47:04 +00:00			`if bytes[i..i + 3]`
			`.iter()`
			`.all(\|c\| is_valid_plaintext_char(*c, allow_whitespace))`
			`{`
			`b64end = i;`
			`break;`
			`}`
			`}`

			`output.extend_from_slice(`
			`base64::encode_config(&bytes[pos..b64end], base64::URL_SAFE_NO_PAD).as_bytes(),`
			`);`
			`pos = b64end;`

			`if pos < bytes.len() {`
			`output.push(SWITCH64_SEPARATOR);`
			`}`
			`}`

			`output`
			`}`

			`pub fn decode(bytes: &[u8]) -> Result<Vec<u8>, base64::DecodeError> {`
			`let mut output = Vec::with_capacity(bytes.len());`

			`let mut pos = 0;`
			`while pos < bytes.len() {`
			`let cnt = bytes[pos..]`
			`.iter()`
			`.take_while(\|c\| **c != SWITCH64_SEPARATOR)`
			`.count();`
			`output.extend_from_slice(&bytes[pos..pos + cnt]);`
			`pos += cnt + 1;`

			`if pos >= bytes.len() {`
			`break;`
			`}`

			`let cnt = bytes[pos..]`
			`.iter()`
			`.take_while(\|c\| **c != SWITCH64_SEPARATOR)`
			`.count();`
			`output.extend_from_slice(&base64::decode_config(`
			`&bytes[pos..pos + cnt],`
			`base64::URL_SAFE_NO_PAD,`
			`)?);`
			`pos += cnt + 1;`
			`}`

			`Ok(output)`
			`}`

			`#[inline]`
			`fn is_valid_plaintext_char(c: u8, allow_whitespace: bool) -> bool {`
			`c.is_ascii_alphanumeric()`
			`\|\| (allow_whitespace && c.is_ascii_whitespace())`
			`\|\| SWITCH64_EXTRA_CHARS.contains(&c)`
			`}`

			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`
			`use crate::debug;`

			`#[test]`
			`fn test_encode() {`
			`assert_eq!(debug(&encode(&b"hello world"[..], true)), "hello world");`
start new parser 2023-05-10 14:21:49 +00:00			`assert_eq!(debug(&encode(&b"hello, world!"[..], true)), "hello, world!");`
Switch64 encoding available in serde encoder (TODO decoder) 2022-12-15 15:47:04 +00:00			`}`

			`#[test]`
			`fn test_decode() {`
			`assert_eq!(debug(&decode(&b"hello world"[..]).unwrap()), "hello world");`
			`assert_eq!(`
			`debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()),`
			`"hello, world!"`
			`);`
			`assert_eq!(debug(&decode(&b"\\LDssQCQ7OA"[..]).unwrap()), ",;,@$;8");`
			`}`

			`#[test]`
			`fn test_encdec() {`
			`for s in [`
			`br#"assert_eq!(debug(&decode(&b"hello\\LA\\ world\\IQ"[..]).unwrap()), "hello, world!");"#.to_vec(),`
			`br#"- a list, which may contain any number of any kind of terms (can be mixed)"#.to_vec(),`
v0.4.0 : change delimiter, etc 2023-05-10 10:05:25 +00:00			`base64::decode("dVcG5EzJqGP/2ZGkVu4ewzfAug1W96tb2KiBOVyPUXfw8uD34DEepW/PPqRzi0HL").unwrap(),`
			`br#",;,@$;8"#.to_vec()`
Switch64 encoding available in serde encoder (TODO decoder) 2022-12-15 15:47:04 +00:00			`] {`
			`assert_eq!(decode(&encode(&s, true)).unwrap(), s);`
			`assert_eq!(decode(&encode(&s, false)).unwrap(), s);`
			`}`
			`}`
			`}`