nettext/src/dec/mod.rs
2022-11-17 15:02:33 +01:00

430 lines
14 KiB
Rust

mod decode;
use std::collections::HashMap;
pub use decode::*;
/// A parsed NetText term.
///
/// Lifetime 'a is the lifetime of the buffer containing the encoded data.
///
/// Lifetime 'b is the lifetime of another Term from which this one is borrowed, when it
/// is returned by one of the helper functions, or 'static when first returned from
/// `decode()`
#[derive(Eq, PartialEq, Debug)]
pub struct Term<'a, 'b>(AnyTerm<'a, 'b>);
#[derive(Eq, PartialEq, Clone)]
pub(crate) enum AnyTerm<'a, 'b> {
Str(&'a [u8]),
Dict(&'a [u8], HashMap<&'a [u8], AnyTerm<'a, 'b>>),
DictRef(&'a [u8], &'b HashMap<&'a [u8], AnyTerm<'a, 'b>>),
List(&'a [u8], Vec<NonListTerm<'a, 'b>>),
ListRef(&'a [u8], &'b [NonListTerm<'a, 'b>]),
}
#[derive(Eq, PartialEq, Clone)]
pub(crate) enum NonListTerm<'a, 'b> {
Str(&'a [u8]),
Dict(&'a [u8], HashMap<&'a [u8], AnyTerm<'a, 'b>>),
DictRef(&'a [u8], &'b HashMap<&'a [u8], AnyTerm<'a, 'b>>),
}
impl<'a, 'b> From<NonListTerm<'a, 'b>> for AnyTerm<'a, 'b> {
fn from(x: NonListTerm<'a, 'b>) -> AnyTerm<'a, 'b> {
match x {
NonListTerm::Str(s) => AnyTerm::Str(s),
NonListTerm::Dict(raw, d) => AnyTerm::Dict(raw, d),
NonListTerm::DictRef(raw, d) => AnyTerm::DictRef(raw, d),
}
}
}
impl<'a, 'b> TryFrom<AnyTerm<'a, 'b>> for NonListTerm<'a, 'b> {
type Error = ();
fn try_from(x: AnyTerm<'a, 'b>) -> Result<NonListTerm<'a, 'b>, ()> {
match x {
AnyTerm::Str(s) => Ok(NonListTerm::Str(s)),
AnyTerm::Dict(raw, d) => Ok(NonListTerm::Dict(raw, d)),
AnyTerm::DictRef(raw, d) => Ok(NonListTerm::DictRef(raw, d)),
_ => Err(()),
}
}
}
impl<'a> From<AnyTerm<'a, 'static>> for Term<'a, 'static> {
fn from(x: AnyTerm<'a, 'static>) -> Term<'a, 'static> {
Term(x)
}
}
// ---- PUBLIC IMPLS ----
#[derive(Debug, Clone)]
pub enum TypeError {
WrongType(&'static str),
WrongLength(usize, usize),
MissingKey(String),
UnexpectedKey(String),
}
impl<'a, 'b> Term<'a, 'b> {
/// Get the term's raw representation
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term = decode(b"hello world").unwrap();
/// assert_eq!(term.raw(), b"hello world");
/// ```
pub fn raw(&self) -> &'a [u8] {
self.0.raw()
}
/// If the term is a single string, get that string
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term1 = decode(b" hello ").unwrap();
/// assert_eq!(term1.str().unwrap(), b"hello");
///
/// let term2 = decode(b"hello world").unwrap();
/// assert!(term2.str().is_err());
/// ```
pub fn str(&self) -> Result<&'a [u8], TypeError> {
match &self.0 {
AnyTerm::Str(s) => Ok(s),
_ => Err(TypeError::WrongType("STR")),
}
}
/// If the term is a single string, or a list containing only strings,
/// get its raw representation
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term1 = decode(b" hello world ").unwrap();
/// assert_eq!(term1.string().unwrap(), b"hello world");
///
/// let term2 = decode(b"hello { a= 5}").unwrap();
/// assert!(term2.string().is_err());
/// ```
pub fn string(&self) -> Result<&'a [u8], TypeError> {
match &self.0 {
AnyTerm::Str(s) => Ok(s),
AnyTerm::List(r, l) if l.iter().all(|x| matches!(x, NonListTerm::Str(_))) => Ok(r),
_ => Err(TypeError::WrongType("STRING")),
}
}
/// Return a list of terms made from this term.
/// If it is a str or a dict, returns a list of a single term.
/// If it is a list, that's the list of terms we return.
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term1 = decode(b" hello ").unwrap();
/// let list1 = term1.list();
/// assert_eq!(list1.len(), 1);
/// assert_eq!(list1[0].str().unwrap(), b"hello");
///
/// let term2 = decode(b" hello world ").unwrap();
/// let list2 = term2.list();
/// assert_eq!(list2.len(), 2);
/// assert_eq!(list2[0].str().unwrap(), b"hello");
/// assert_eq!(list2[1].str().unwrap(), b"world");
/// ```
pub fn list(&self) -> Vec<Term<'a, '_>> {
match self.0.mkref() {
AnyTerm::ListRef(_r, l) => l.iter().map(|x| Term(x.mkref().into())).collect::<Vec<_>>(),
x => vec![Term(x)],
}
}
/// Same as `.list()`, but deconstructs it in a const length array.
/// This allows to directly bind the resulting list into discrete variables.
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term1 = decode(b" hello ").unwrap();
/// let [s1] = term1.list_of().unwrap();
/// assert_eq!(s1.str().unwrap(), b"hello");
///
/// let term2 = decode(b" hello world ").unwrap();
/// let [s2a, s2b] = term2.list_of().unwrap();
/// assert_eq!(s2a.str().unwrap(), b"hello");
/// assert_eq!(s2b.str().unwrap(), b"world");
/// ```
pub fn list_of<const N: usize>(&self) -> Result<[Term<'a, '_>; N], TypeError> {
let list = self.list();
let list_len = list.len();
list.try_into()
.map_err(|_| TypeError::WrongLength(list_len, N))
}
/// Same as `.list_of()`, but only binds the first N-1 terms.
/// If there are exactly N terms, the last one is bound to the Nth return variable.
/// If there are more then N terms, the remaining terms are bound to a new list term
/// that is returned as the Nth return variable.
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term1 = decode(b" hello world ").unwrap();
/// let [s1a, s1b] = term1.list_of_first().unwrap();
/// assert_eq!(s1a.str().unwrap(), b"hello");
/// assert_eq!(s1b.str().unwrap(), b"world");
///
/// let term2 = decode(b" hello mighty world ").unwrap();
/// let [s2a, s2b] = term2.list_of_first().unwrap();
/// assert_eq!(s2a.str().unwrap(), b"hello");
/// assert_eq!(s2b.list().len(), 2);
/// assert_eq!(s2b.raw(), b"mighty world");
/// ```
pub fn list_of_first<const N: usize>(&self) -> Result<[Term<'a, '_>; N], TypeError> {
match self.0.mkref() {
AnyTerm::ListRef(raw, list) => match list.len().cmp(&N) {
std::cmp::Ordering::Less => Err(TypeError::WrongLength(list.len(), N)),
std::cmp::Ordering::Equal => Ok(list
.iter()
.map(|x| Term(x.mkref().into()))
.collect::<Vec<_>>()
.try_into()
.unwrap()),
std::cmp::Ordering::Greater => {
let mut ret = Vec::with_capacity(N);
for item in list[0..N - 1].iter() {
ret.push(Term(item.mkref().into()));
}
let remaining_begin = list[N - 1].raw().as_ptr() as usize;
let remaining_offset = remaining_begin - raw.as_ptr() as usize;
let remaining_raw = &raw[remaining_offset..];
let remaining = list[N - 1..]
.iter()
.map(|x| x.mkref())
.collect::<Vec<NonListTerm<'a, '_>>>();
ret.push(Term(AnyTerm::List(remaining_raw, remaining)));
Ok(ret.try_into().unwrap())
}
},
x if N == 1 => Ok([Term(x)]
.into_iter()
.collect::<Vec<_>>()
.try_into()
.unwrap()),
_ => Err(TypeError::WrongLength(1, N)),
}
}
/// Checks term is a dictionnary and returns hashmap of inner terms
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term = decode(b"{ k1 = v1, k2 = v2 }").unwrap();
/// let dict = term.dict().unwrap();
/// assert_eq!(dict.get(&b"k1"[..]).unwrap().str().unwrap(), b"v1");
/// assert_eq!(dict.get(&b"k2"[..]).unwrap().str().unwrap(), b"v2");
/// ```
pub fn dict(&self) -> Result<HashMap<&'a [u8], Term<'a, '_>>, TypeError> {
match self.0.mkref() {
AnyTerm::DictRef(_, d) => Ok(d.iter().map(|(k, t)| (*k, Term(t.mkref()))).collect()),
_ => Err(TypeError::WrongType("DICT")),
}
}
/// Checks term is a dictionnary whose keys are exactly those supplied,
/// and returns the associated values as a list.
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term = decode(b"{ k1 = v1, k2 = v2 }").unwrap();
/// let [s1, s2] = term.dict_of([b"k1", b"k2"]).unwrap();
/// assert_eq!(s1.str().unwrap(), b"v1");
/// assert_eq!(s2.str().unwrap(), b"v2");
/// ```
pub fn dict_of<const N: usize>(
&self,
keys: [&'static [u8]; N],
) -> Result<[Term<'a, '_>; N], TypeError> {
match self.0.mkref() {
AnyTerm::DictRef(_, dict) => {
// Check all required keys exist in dictionnary
for k in keys.iter() {
if !dict.contains_key(k) {
return Err(TypeError::MissingKey(debug(k).to_string()));
}
}
// Check that dictionnary contains no extraneous keys
for k in dict.keys() {
if !keys.contains(k) {
return Err(TypeError::UnexpectedKey(debug(k).to_string()));
}
}
Ok(keys.map(|k| Term(dict.get(k).unwrap().mkref())))
}
_ => Err(TypeError::WrongType("DICT")),
}
}
/// Checks term is a dictionnary whose keys are included in those supplied,
/// and returns the associated values as a list of options.
///
/// Example:
///
/// ```
/// use nettext::dec::decode;
///
/// let term = decode(b"{ k1 = v1, k2 = v2 }").unwrap();
/// let [s1, s2, s3] = term.dict_of_opt([b"k1", b"k2", b"k3"]).unwrap();
/// assert_eq!(s1.unwrap().str().unwrap(), b"v1");
/// assert_eq!(s2.unwrap().str().unwrap(), b"v2");
/// assert!(s3.is_none());
/// ```
pub fn dict_of_opt<const N: usize>(
&self,
keys: [&'static [u8]; N],
) -> Result<[Option<Term<'a, '_>>; N], TypeError> {
match self.0.mkref() {
AnyTerm::DictRef(_, dict) => {
// Check that dictionnary contains no extraneous keys
for k in dict.keys() {
if !keys.contains(k) {
return Err(TypeError::UnexpectedKey(debug(k).to_string()));
}
}
Ok(keys.map(|k| dict.get(k).map(|x| Term(x.mkref()))))
}
_ => Err(TypeError::WrongType("DICT")),
}
}
}
// ---- INTERNAL IMPLS ----
impl<'a, 'b> AnyTerm<'a, 'b> {
fn raw(&self) -> &'a [u8] {
match self {
AnyTerm::Str(s) => s,
AnyTerm::Dict(r, _)
| AnyTerm::DictRef(r, _)
| AnyTerm::List(r, _)
| AnyTerm::ListRef(r, _) => r,
}
}
fn mkref(&self) -> AnyTerm<'a, '_> {
match &self {
AnyTerm::Str(s) => AnyTerm::Str(s),
AnyTerm::Dict(r, d) => AnyTerm::DictRef(r, d),
AnyTerm::DictRef(r, d) => AnyTerm::DictRef(r, d),
AnyTerm::List(r, l) => AnyTerm::ListRef(r, &l[..]),
AnyTerm::ListRef(r, l) => AnyTerm::ListRef(r, l),
}
}
}
impl<'a, 'b> NonListTerm<'a, 'b> {
fn raw(&self) -> &'a [u8] {
match &self {
NonListTerm::Str(s) => s,
NonListTerm::Dict(r, _) | NonListTerm::DictRef(r, _) => r,
}
}
fn mkref(&self) -> NonListTerm<'a, '_> {
match &self {
NonListTerm::Str(s) => NonListTerm::Str(s),
NonListTerm::Dict(r, d) => NonListTerm::DictRef(r, d),
NonListTerm::DictRef(r, d) => NonListTerm::DictRef(r, d),
}
}
}
// ---- DISPLAY REPR = Raw NetText representation ----
impl<'a, 'b> std::fmt::Display for AnyTerm<'a, 'b> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
write!(
f,
"{}",
std::str::from_utf8(self.raw()).map_err(|_| Default::default())?
)
}
}
impl<'a, 'b> std::fmt::Display for Term<'a, 'b> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
write!(f, "{}", self.0)
}
}
// ---- DEBUG REPR ----
pub fn debug(x: &[u8]) -> &str {
std::str::from_utf8(x).unwrap_or("<invalid ascii>")
}
impl<'a, 'b> std::fmt::Debug for AnyTerm<'a, 'b> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
match self.mkref() {
AnyTerm::Str(s) => write!(f, "Str(`{}`)", debug(s)),
AnyTerm::DictRef(raw, d) => {
write!(f, "Dict<`{}`", debug(raw))?;
for (k, v) in d.iter() {
write!(f, "\n `{}`={:?}", debug(k), v)?;
}
write!(f, ">")
}
AnyTerm::ListRef(raw, l) => {
write!(f, "List[`{}`", debug(raw))?;
for i in l.iter() {
write!(f, "\n {:?}", i)?;
}
write!(f, "]")
}
_ => unreachable!(),
}
}
}
impl<'a, 'b> std::fmt::Debug for NonListTerm<'a, 'b> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
match self.mkref() {
NonListTerm::Str(s) => write!(f, "Str(`{}`)", debug(s)),
NonListTerm::DictRef(raw, d) => {
write!(f, "Dict<`{}`", debug(raw))?;
for (k, v) in d.iter() {
write!(f, "\n `{}`={:?}", debug(k), v)?;
}
write!(f, ">")
}
_ => unreachable!(),
}
}
}