use std::borrow::Cow; use crate::{ is_string_char, is_whitespace, DICT_ASSIGN, DICT_CLOSE, DICT_DELIM, DICT_OPEN, LIST_CLOSE, LIST_DELIM, LIST_OPEN, }; pub type Pos = u32; #[derive(Debug)] pub enum ParseError { IncompleteInput, DuplicateKey(Pos), Garbage(Pos), } #[derive(Clone, Copy, Debug)] pub struct ITerm(Pos); #[derive(Clone, Copy, Debug)] struct IRaw { start: Pos, end: Pos, } #[derive(Clone, Copy, Debug)] struct ISeq { seq_start: Pos, seq_end: Pos, } #[derive(Clone, Copy, Debug)] struct IDict { dict_start: Pos, dict_end: Pos, } #[derive(Debug)] enum TTerm { Str(IRaw), RawSeq(IRaw, ISeq), RawList(IRaw, ISeq), RawDict(IRaw, IDict), } #[derive(Debug)] pub struct Buf<'a> { bytes: Cow<'a, [u8]>, seqs: Vec, dicts: Vec<(IRaw, ITerm)>, terms: Vec, } pub fn decode(input: &[u8]) -> Result<(Buf<'_>, ITerm), ParseError> { let mut buf = Buf { bytes: input.into(), seqs: Vec::with_capacity(16), dicts: Vec::with_capacity(16), terms: Vec::with_capacity(16), }; let all_buf = IRaw { start: 0, end: input.len() as Pos, }; let mut stack = Vec::with_capacity(16); let (term, rest) = buf.decode(all_buf, &mut stack)?; assert!(stack.is_empty()); let rest = buf.take_whitespace(rest); if rest.start < all_buf.end { return Err(ParseError::Garbage(rest.start)); } Ok((buf, term)) } enum StackItem { Term(ITerm), KeyValue(IRaw, ITerm), } type Stack = Vec; impl StackItem { fn term(self) -> ITerm { match self { StackItem::Term(term) => term, _ => unreachable!(), } } fn kv(self) -> (IRaw, ITerm) { match self { StackItem::KeyValue(key, term) => (key, term), _ => unreachable!(), } } } impl<'a> Buf<'a> { // ============================================ PUBLIC ACCESS FUNCTIONS pub fn raw(&self, term: ITerm) -> Option<&[u8]> { match self.terms.get(term.0 as usize)? { TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => { self.get_bytes(*r) } } } pub fn str(&self, term: ITerm) -> Option<&str> { match self.terms.get(term.0 as usize)? { TTerm::Str(r) => self .bytes .get(r.start as usize..r.end as usize) .map(|x| unsafe { std::str::from_utf8_unchecked(x) }), _ => None, } } pub fn seq<'x>(&'x self, term: &'x ITerm) -> Option<&'x [ITerm]> { match self.terms.get(term.0 as usize)? { TTerm::RawSeq(_, s) => self.seqs.get(s.seq_start as usize..s.seq_end as usize), _ => Some(std::slice::from_ref(term)), } } pub fn seq_of(&self, term: ITerm) -> Option<[ITerm; N]> { match self.terms.get(term.0 as usize)? { TTerm::RawSeq(_, s) => { if (s.seq_end - s.seq_start) as usize == N { let seq = self.seqs.get(s.seq_start as usize..s.seq_end as usize)?; Some(seq.try_into().unwrap()) } else { None } } _ => None, } } pub fn list(&self, term: ITerm) -> Option<&[ITerm]> { match self.terms.get(term.0 as usize)? { TTerm::RawList(_, s) => self.seqs.get(s.seq_start as usize..s.seq_end as usize), _ => None, } } pub fn list_of(&self, term: ITerm) -> Option<[ITerm; N]> { match self.terms.get(term.0 as usize)? { TTerm::RawList(_, s) if (s.seq_end - s.seq_start) as usize == N => { let seq = self.seqs.get(s.seq_start as usize..s.seq_end as usize)?; Some(seq.try_into().unwrap()) } _ => None, } } pub fn dict_get(&self, term: ITerm, key: &str) -> Option { match self.terms.get(term.0 as usize)? { TTerm::RawDict(_, d) => { let dict = self.dicts.get(d.dict_start as usize..d.dict_end as usize)?; let pos = dict.binary_search_by(|(k, _)| self.get_bytes(*k).unwrap().cmp(key.as_bytes())).ok()?; Some(dict[pos].1) } _ => None, } } pub fn dict_of(&self, term: ITerm, keys: [&str; N], allow_other: bool) -> Option<[ITerm; N]> { match self.terms.get(term.0 as usize)? { TTerm::RawDict(_, d) => { let dict = self.dicts.get(d.dict_start as usize..d.dict_end as usize)?; if dict.len() < N || (dict.len() > N && !allow_other) { return None; } let mut ret = [ITerm(0); N]; for i in 0..N { let pos = dict.binary_search_by(|(k, _)| self.get_bytes(*k).unwrap().cmp(keys[i].as_bytes())).ok()?; ret[i] = dict[pos].1; } Some(ret) } _ => None, } } pub fn dict_iter(&self, term: ITerm) -> Option + '_> { match self.terms.get(term.0 as usize)? { TTerm::RawDict(_, d) => { let dict = self.dicts.get(d.dict_start as usize..d.dict_end as usize)?; let iter = dict.iter().map(|(k, v)| (unsafe { std::str::from_utf8_unchecked(self.get_bytes(*k).unwrap()) }, *v)); Some(iter) } _ => None, } } // ============================================ NETTEXT PARSING fn decode(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> { let start = self.take_whitespace(raw); let stack_start = stack.len(); let mut cur_end = start; let mut next_start = start; loop { match self.decode_nonseq_term(next_start, stack) { Err(_) => break, Ok((term, rest)) => { stack.push(StackItem::Term(term)); cur_end = rest; next_start = self.take_whitespace(rest); } } } if stack.len() == stack_start { Err(self.error_at(next_start)) } else if stack.len() == stack_start + 1 { Ok((stack.pop().unwrap().term(), next_start)) } else { let seq_raw = IRaw { start: start.start, end: cur_end.start, }; let seq_start = self.seqs.len(); self.seqs .extend(stack.drain(stack_start..).map(StackItem::term)); let seq = TTerm::RawSeq( seq_raw, ISeq { seq_start: seq_start as Pos, seq_end: self.seqs.len() as Pos, }, ); Ok((self.push_term(seq), next_start)) } } fn decode_nonseq_term( &mut self, raw: IRaw, stack: &mut Stack, ) -> Result<(ITerm, IRaw), ParseError> { if let Ok((term, rest)) = self.decode_string(raw) { Ok((term, rest)) } else if let Ok((term, rest)) = self.decode_list(raw, stack) { Ok((term, rest)) } else if let Ok((term, rest)) = self.decode_dict(raw, stack) { Ok((term, rest)) } else { Err(self.error_at(raw)) } } fn decode_list(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> { let stack_start = stack.len(); let mut cur = self.take_whitespace(self.take_char(raw, LIST_OPEN)?); while let Ok((term, rest)) = self.decode(cur, stack) { stack.push(StackItem::Term(term)); cur = self.take_whitespace(rest); if let Ok(rest) = self.take_char(rest, LIST_DELIM) { cur = self.take_whitespace(rest); } else { break; } } if let Ok(rest) = self.take_char(cur, LIST_CLOSE) { let seq_raw = IRaw { start: raw.start, end: rest.start, }; let seq_start = self.seqs.len(); self.seqs .extend(stack.drain(stack_start..).map(StackItem::term)); let seq = TTerm::RawList( seq_raw, ISeq { seq_start: seq_start as Pos, seq_end: self.seqs.len() as Pos, }, ); Ok((self.push_term(seq), rest)) } else { stack.truncate(stack_start); Err(self.error_at(cur)) } } fn decode_dict(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> { let stack_start = stack.len(); match self.decode_dict_inner(raw, stack) { Ok(rest) => { let dict_raw = IRaw { start: raw.start, end: rest.start, }; let dict_start = self.dicts.len(); self.dicts .extend(stack.drain(stack_start..).map(StackItem::kv)); self.dicts[dict_start..] .sort_by_key(|(k, _)| (&self.bytes[k.start as usize..k.end as usize], k.start)); for ((k1, _), (k2, _)) in self.dicts[dict_start..] .iter() .zip(self.dicts[dict_start + 1..].iter()) { if self.get_bytes(*k1).unwrap() == self.get_bytes(*k2).unwrap() { return Err(ParseError::DuplicateKey(k1.start)); } } let dict = TTerm::RawDict( dict_raw, IDict { dict_start: dict_start as Pos, dict_end: self.dicts.len() as Pos, }, ); Ok((self.push_term(dict), rest)) } Err(e) => { stack.truncate(stack_start); Err(e) } } } fn decode_dict_inner(&mut self, raw: IRaw, stack: &mut Stack) -> Result { let mut cur = self.take_whitespace(self.take_char(raw, DICT_OPEN)?); while let Ok((key, rest)) = self.take_string(cur) { cur = self.take_whitespace(rest); cur = self.take_char(cur, DICT_ASSIGN)?; let (value, rest) = self.decode(cur, stack)?; cur = self.take_whitespace(rest); stack.push(StackItem::KeyValue(key, value)); if let Ok(rest) = self.take_char(cur, DICT_DELIM) { cur = self.take_whitespace(rest); } else { break; } } let rest = self.take_char(cur, DICT_CLOSE)?; Ok(rest) } fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> { let (string_raw, rest) = self.take_string(raw)?; let string = self.push_term(TTerm::Str(string_raw)); Ok((string, rest)) } fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> { let mut rest = raw; while rest.start < rest.end { if is_string_char(self.bytes[rest.start as usize]) { rest.start += 1; } else { break; } } if rest.start > raw.start { let string_raw = IRaw { start: raw.start, end: rest.start, }; Ok((string_raw, rest)) } else { Err(self.error_at(rest)) } } #[inline] fn take_char(&self, raw: IRaw, c: u8) -> Result { if raw.start >= raw.end { Err(ParseError::IncompleteInput) } else if self.bytes[raw.start as usize] != c { Err(ParseError::Garbage(raw.start)) } else { Ok(IRaw { start: raw.start + 1, end: raw.end, }) } } #[inline] fn take_whitespace(&self, mut raw: IRaw) -> IRaw { while raw.start < raw.end { if is_whitespace(self.bytes[raw.start as usize]) { raw.start += 1; } else { break; } } raw } #[inline] fn get_bytes(&self, raw: IRaw) -> Option<&[u8]> { self.bytes.get(raw.start as usize..raw.end as usize) } #[inline] fn error_at(&self, raw: IRaw) -> ParseError { if raw.start < raw.end { ParseError::Garbage(raw.start) } else { ParseError::IncompleteInput } } #[inline] fn push_term(&mut self, term: TTerm) -> ITerm { let ret = ITerm(self.terms.len() as Pos); self.terms.push(term); ret } #[cfg(test)] fn debug(&self, i: ITerm) { use crate::debug as debug_str; let term = &self.terms[i.0 as usize]; match term { TTerm::Str(r) => { eprintln!( "{} -> {:?} = `{}`", i.0, term, debug_str(&self.bytes[r.start as usize..r.end as usize]) ); } TTerm::RawSeq(r, s) => { eprintln!( "{} -> {:?} = `{}` ((", i.0, term, debug_str(&self.bytes[r.start as usize..r.end as usize]) ); for j in self.seqs[s.seq_start as usize..s.seq_end as usize].iter() { self.debug(*j); } eprintln!("))"); } TTerm::RawList(r, l) => { eprintln!( "{} -> {:?} = `{}` [[", i.0, term, debug_str(&self.bytes[r.start as usize..r.end as usize]) ); for j in self.seqs[l.seq_start as usize..l.seq_end as usize].iter() { self.debug(*j); } eprintln!("]]"); } TTerm::RawDict(r, d) => { eprintln!( "{} -> {:?} = `{}` {{{{", i.0, term, debug_str(&self.bytes[r.start as usize..r.end as usize]) ); for (k, v) in self.dicts[d.dict_start as usize..d.dict_end as usize].iter() { eprint!( "[`{}`] = ", debug_str(&self.bytes[k.start as usize..k.end as usize]) ); self.debug(*v); } eprintln!("}}}}"); } } } } #[cfg(test)] mod tests { use super::*; #[test] fn list_and_seq() { let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; cooj dlpa] ]"; eprintln!("{}", string); let (buf, term) = decode(string.as_bytes()).unwrap(); buf.debug(term); let [a, b] = buf.list_of(term).unwrap(); assert_eq!(buf.raw(a).unwrap(), b"[ h1; h2; h3 ]"); let l = buf.list(a).unwrap(); assert_eq!(l.len(), 3); assert_eq!(buf.str(l[0]).unwrap(), "h1"); assert_eq!(buf.str(l[1]).unwrap(), "h2"); assert_eq!(buf.str(l[2]).unwrap(), "h3"); assert_eq!(buf.raw(b).unwrap(), b"hello world [ acc bii; cooj dlpa]"); let [h, w, l] = buf.seq_of(b).unwrap(); assert_eq!(buf.str(h).unwrap(), "hello"); assert_eq!(buf.str(w).unwrap(), "world"); assert_eq!(buf.raw(l).unwrap(), b"[ acc bii; cooj dlpa]"); let [l1, l2] = buf.list_of(l).unwrap(); assert_eq!(buf.raw(l1).unwrap(), b"acc bii"); let s = buf.seq(&l1).unwrap(); assert_eq!(s.len(), 2); assert_eq!(buf.str(s[0]).unwrap(), "acc"); assert_eq!(buf.str(s[1]).unwrap(), "bii"); assert_eq!(buf.raw(l2).unwrap(), b"cooj dlpa"); let [s2a, s2b] = buf.seq_of(l2).unwrap(); assert_eq!(buf.str(s2a).unwrap(), "cooj"); assert_eq!(buf.str(s2b).unwrap(), "dlpa"); } #[test] fn dict() { let string = "[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=val2}} ]"; eprintln!("{}", string); let (buf, term) = decode(string.as_bytes()).unwrap(); buf.debug(term); let [a, b] = buf.list_of(term).unwrap(); assert_eq!(buf.raw(a).unwrap(), b"{ a = plop; b = hello world }"); let [aa, ab] = buf.dict_of(a, ["a", "b"], false).unwrap(); assert_eq!(buf.raw(aa).unwrap(), b"plop"); assert_eq!(buf.raw(ab).unwrap(), b"hello world"); assert_eq!(buf.raw(b).unwrap(), b"ploplop { e=15; d=12 ;c = {key=val;key2=val2}}"); let [ba, bb] = buf.seq_of(b).unwrap(); assert_eq!(buf.str(ba).unwrap(), "ploplop"); assert_eq!(buf.str(buf.dict_get(bb, "e").unwrap()).unwrap(), "15"); let mut iter = buf.dict_iter(bb).unwrap(); let (k1, v1) = iter.next().unwrap(); assert_eq!(k1, "c"); assert_eq!(buf.raw(v1).unwrap(), b"{key=val;key2=val2}"); let (k2, v2) = iter.next().unwrap(); assert_eq!(k2, "d"); assert_eq!(buf.str(v2).unwrap(), "12"); let (k3, v3) = iter.next().unwrap(); assert_eq!(k3, "e"); assert_eq!(buf.str(v3).unwrap(), "15"); assert!(iter.next().is_none()); } }