nettext/src/buf/decode.rs
2023-05-10 19:12:29 +02:00

475 lines
15 KiB
Rust

use crate::{
is_string_char, is_whitespace, DICT_ASSIGN, DICT_CLOSE, DICT_DELIM, DICT_OPEN, LIST_CLOSE,
LIST_DELIM, LIST_OPEN,
};
use super::*;
pub fn decode(input: &[u8]) -> Result<(Buf<'_>, ITerm), ParseError> {
let mut buf = Buf {
bytes: input.into(),
seqs: Vec::with_capacity(16),
dicts: Vec::with_capacity(16),
terms: Vec::with_capacity(16),
};
let all_buf = IRaw {
start: 0,
end: input.len() as Pos,
};
let term = buf.decode(all_buf)?;
Ok((buf, term))
}
// ================
enum StackItem {
Term(ITerm),
KeyValue(IRaw, ITerm),
}
type Stack = Vec<StackItem>;
impl StackItem {
fn term(self) -> ITerm {
match self {
StackItem::Term(term) => term,
_ => unreachable!(),
}
}
fn kv(self) -> (IRaw, ITerm) {
match self {
StackItem::KeyValue(key, term) => (key, term),
_ => unreachable!(),
}
}
}
impl<'a> Buf<'a> {
pub(crate) fn decode(&mut self, raw: IRaw) -> Result<ITerm, ParseError> {
let mut stack = Vec::with_capacity(16);
let (term, rest) = self.decode_seq(raw, &mut stack)?;
assert!(stack.is_empty());
let rest = self.take_whitespace(rest);
if rest.start < raw.end {
return Err(ParseError::UnexpectedInput(rest.start as usize));
}
Ok(term)
}
fn decode_seq(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> {
let start = self.take_whitespace(raw);
let stack_start = stack.len();
let mut cur_end = start;
let mut next_start = start;
loop {
match self.decode_nonseq_term(next_start, stack) {
Err(_) => break,
Ok((term, rest)) => {
stack.push(StackItem::Term(term));
cur_end = rest;
next_start = self.take_whitespace(rest);
}
}
}
if stack.len() == stack_start {
Err(self.error_at(next_start))
} else if stack.len() == stack_start + 1 {
Ok((stack.pop().unwrap().term(), next_start))
} else {
let seq_raw = IRaw {
start: start.start,
end: cur_end.start,
};
let seq_start = self.seqs.len();
self.seqs
.extend(stack.drain(stack_start..).map(StackItem::term));
let seq = TTerm::RawSeq(
seq_raw,
ISeq {
seq_start: seq_start as Pos,
seq_end: self.seqs.len() as Pos,
},
);
Ok((self.push_term(seq), next_start))
}
}
fn decode_nonseq_term(
&mut self,
raw: IRaw,
stack: &mut Stack,
) -> Result<(ITerm, IRaw), ParseError> {
if let Ok((term, rest)) = self.decode_string(raw) {
Ok((term, rest))
} else if let Ok((term, rest)) = self.decode_list(raw, stack) {
Ok((term, rest))
} else if let Ok((term, rest)) = self.decode_dict(raw, stack) {
Ok((term, rest))
} else {
Err(self.error_at(raw))
}
}
fn decode_list(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> {
let stack_start = stack.len();
let mut cur = self.take_whitespace(self.take_char(raw, LIST_OPEN)?);
while let Ok((term, rest)) = self.decode_seq(cur, stack) {
stack.push(StackItem::Term(term));
cur = self.take_whitespace(rest);
if let Ok(rest) = self.take_char(rest, LIST_DELIM) {
cur = self.take_whitespace(rest);
} else {
break;
}
}
if let Ok(rest) = self.take_char(cur, LIST_CLOSE) {
let seq_raw = IRaw {
start: raw.start,
end: rest.start,
};
let seq_start = self.seqs.len();
self.seqs
.extend(stack.drain(stack_start..).map(StackItem::term));
let seq = TTerm::RawList(
seq_raw,
ISeq {
seq_start: seq_start as Pos,
seq_end: self.seqs.len() as Pos,
},
);
Ok((self.push_term(seq), rest))
} else {
stack.truncate(stack_start);
Err(self.error_at(cur))
}
}
fn decode_dict(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> {
let stack_start = stack.len();
match self.decode_dict_inner(raw, stack) {
Ok(rest) => {
let dict_raw = IRaw {
start: raw.start,
end: rest.start,
};
let dict_start = self.dicts.len();
self.dicts
.extend(stack.drain(stack_start..).map(StackItem::kv));
self.dicts[dict_start..]
.sort_by_key(|(k, _)| (&self.bytes[k.start as usize..k.end as usize], k.start));
for ((k1, _), (k2, _)) in self.dicts[dict_start..]
.iter()
.zip(self.dicts[dict_start + 1..].iter())
{
if self.get_bytes(*k1) == self.get_bytes(*k2) {
return Err(ParseError::DuplicateKey(String::from_utf8(self.get_bytes(*k1).to_vec()).unwrap()));
}
}
let dict = TTerm::RawDict(
dict_raw,
IDict {
dict_start: dict_start as Pos,
dict_end: self.dicts.len() as Pos,
},
);
Ok((self.push_term(dict), rest))
}
Err(e) => {
stack.truncate(stack_start);
Err(e)
}
}
}
fn decode_dict_inner(&mut self, raw: IRaw, stack: &mut Stack) -> Result<IRaw, ParseError> {
let mut cur = self.take_whitespace(self.take_char(raw, DICT_OPEN)?);
while let Ok((key, rest)) = self.take_string(cur) {
cur = self.take_whitespace(rest);
cur = self.take_char(cur, DICT_ASSIGN)?;
let (value, rest) = self.decode_seq(cur, stack)?;
cur = self.take_whitespace(rest);
stack.push(StackItem::KeyValue(key, value));
if let Ok(rest) = self.take_char(cur, DICT_DELIM) {
cur = self.take_whitespace(rest);
} else {
break;
}
}
let rest = self.take_char(cur, DICT_CLOSE)?;
Ok(rest)
}
fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> {
let (string_raw, rest) = self.take_string(raw)?;
let term = if (string_raw.end - string_raw.start) as usize <= STR_INLINE_MAX {
let b = self.get_bytes(string_raw);
let mut bytes = [0u8; STR_INLINE_MAX];
bytes[..b.len()].copy_from_slice(b);
TTerm::StrInline(b.len() as u8, bytes)
} else {
TTerm::Str(string_raw)
};
Ok((self.push_term(term), rest))
}
fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> {
let mut rest = raw;
while rest.start < rest.end {
if is_string_char(self.bytes[rest.start as usize]) {
rest.start += 1;
} else {
break;
}
}
if rest.start > raw.start {
let string_raw = IRaw {
start: raw.start,
end: rest.start,
};
Ok((string_raw, rest))
} else {
Err(self.error_at(rest))
}
}
#[inline]
fn take_char(&self, raw: IRaw, c: u8) -> Result<IRaw, ParseError> {
if raw.start >= raw.end {
Err(ParseError::IncompleteInput)
} else if self.bytes[raw.start as usize] != c {
Err(ParseError::UnexpectedInput(raw.start as usize))
} else {
Ok(IRaw {
start: raw.start + 1,
end: raw.end,
})
}
}
#[inline]
fn take_whitespace(&self, mut raw: IRaw) -> IRaw {
while raw.start < raw.end {
if is_whitespace(self.bytes[raw.start as usize]) {
raw.start += 1;
} else {
break;
}
}
raw
}
#[inline]
pub(crate) fn get_bytes(&self, raw: IRaw) -> &[u8] {
&self.bytes[raw.start as usize..raw.end as usize]
}
#[inline]
fn error_at(&self, raw: IRaw) -> ParseError {
if raw.start < raw.end {
ParseError::UnexpectedInput(raw.start as usize)
} else {
ParseError::IncompleteInput
}
}
#[cfg(test)]
fn debug(&self, i: ITerm) {
use crate::debug as debug_str;
let term = &self.terms[i.0 as usize];
match term {
TTerm::StrInline(len, bytes) => {
eprintln!(
"{} -> {:?} = `{}`",
i.0,
term,
debug_str(&bytes[..*len as usize])
);
}
TTerm::Str(r) => {
eprintln!(
"{} -> {:?} = `{}`",
i.0,
term,
debug_str(&self.bytes[r.start as usize..r.end as usize])
);
}
TTerm::RawSeq(r, s) => {
eprintln!(
"{} -> {:?} = `{}` ((",
i.0,
term,
debug_str(&self.bytes[r.start as usize..r.end as usize])
);
for j in self.seqs[s.seq_start as usize..s.seq_end as usize].iter() {
self.debug(*j);
}
eprintln!("))");
}
TTerm::Seq(s) => {
eprintln!("{} -> {:?} ((", i.0, term);
for j in self.seqs[s.seq_start as usize..s.seq_end as usize].iter() {
self.debug(*j);
}
eprintln!("))");
}
TTerm::RawList(r, l) => {
eprintln!(
"{} -> {:?} = `{}` [[",
i.0,
term,
debug_str(&self.bytes[r.start as usize..r.end as usize])
);
for j in self.seqs[l.seq_start as usize..l.seq_end as usize].iter() {
self.debug(*j);
}
eprintln!("]]");
}
TTerm::List(l) => {
eprintln!("{} -> {:?} [[", i.0, term);
for j in self.seqs[l.seq_start as usize..l.seq_end as usize].iter() {
self.debug(*j);
}
eprintln!("]]");
}
TTerm::RawDict(r, d) => {
eprintln!(
"{} -> {:?} = `{}` {{{{",
i.0,
term,
debug_str(&self.bytes[r.start as usize..r.end as usize])
);
for (k, v) in self.dicts[d.dict_start as usize..d.dict_end as usize].iter() {
eprint!(
"[`{}`] = ",
debug_str(&self.bytes[k.start as usize..k.end as usize])
);
self.debug(*v);
}
eprintln!("}}}}");
}
TTerm::Dict(d) => {
eprintln!("{} -> {:?} {{{{", i.0, term);
for (k, v) in self.dicts[d.dict_start as usize..d.dict_end as usize].iter() {
eprint!(
"[`{}`] = ",
debug_str(&self.bytes[k.start as usize..k.end as usize])
);
self.debug(*v);
}
eprintln!("}}}}");
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn list_and_seq() {
let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; persephonisumiolaodododo dlpa] ]";
eprintln!("{}", string);
let (buf, term) = decode(string.as_bytes()).unwrap();
buf.debug(term);
let [a, b] = buf.list_of(term).unwrap();
assert_eq!(buf.raw(a).unwrap(), b"[ h1; h2; h3 ]");
let l = buf.list(a).unwrap();
assert_eq!(l.len(), 3);
assert_eq!(buf.str(l[0]).unwrap(), "h1");
assert_eq!(buf.str(l[1]).unwrap(), "h2");
assert_eq!(buf.str(l[2]).unwrap(), "h3");
assert_eq!(
buf.raw(b).unwrap(),
b"hello world [ acc bii; persephonisumiolaodododo dlpa]"
);
let [h, w, l] = buf.seq_of(b).unwrap();
assert_eq!(buf.str(h).unwrap(), "hello");
assert_eq!(buf.str(w).unwrap(), "world");
assert_eq!(
buf.raw(l).unwrap(),
b"[ acc bii; persephonisumiolaodododo dlpa]"
);
let [l1, l2] = buf.list_of(l).unwrap();
assert_eq!(buf.raw(l1).unwrap(), b"acc bii");
let s = buf.seq(&l1).unwrap();
assert_eq!(s.len(), 2);
assert_eq!(buf.str(s[0]).unwrap(), "acc");
assert_eq!(buf.str(s[1]).unwrap(), "bii");
assert_eq!(buf.raw(l2).unwrap(), b"persephonisumiolaodododo dlpa");
let [s2a, s2b] = buf.seq_of(l2).unwrap();
assert_eq!(buf.str(s2a).unwrap(), "persephonisumiolaodododo");
assert_eq!(buf.str(s2b).unwrap(), "dlpa");
}
#[test]
fn dict() {
let string =
"[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}} ]";
eprintln!("{}", string);
let (buf, term) = decode(string.as_bytes()).unwrap();
buf.debug(term);
let [a, b] = buf.list_of(term).unwrap();
assert_eq!(buf.raw(a).unwrap(), b"{ a = plop; b = hello world }");
let [aa, ab] = buf.dict_of(a, ["a", "b"], false).unwrap();
assert_eq!(buf.raw(aa).unwrap(), b"plop");
assert_eq!(buf.raw(ab).unwrap(), b"hello world");
assert_eq!(
buf.raw(b).unwrap(),
b"ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}}"
);
let [ba, bb] = buf.seq_of(b).unwrap();
assert_eq!(buf.str(ba).unwrap(), "ploplop");
assert_eq!(
buf.str(buf.dict_get(bb, "e").unwrap().unwrap()).unwrap(),
"15"
);
let mut iter = buf.dict_iter(bb).unwrap();
let (k1, v1) = iter.next().unwrap();
assert_eq!(k1, "c");
assert_eq!(
buf.raw(v1).unwrap(),
b"{key=val;key2=azertyuiopazertyuiopazertyuiop}"
);
let (k2, v2) = iter.next().unwrap();
assert_eq!(k2, "d");
assert_eq!(buf.str(v2).unwrap(), "12");
let (k3, v3) = iter.next().unwrap();
assert_eq!(k3, "e");
assert_eq!(buf.str(v3).unwrap(), "15");
assert!(iter.next().is_none());
}
}