Compare commits
5 commits
main
...
new-parser
Author | SHA1 | Date | |
---|---|---|---|
3be9d0fc5e | |||
81d8d7bfcc | |||
9abc7519d4 | |||
7cc2212786 | |||
d5b3f64dfe |
8 changed files with 913 additions and 9 deletions
|
@ -15,7 +15,7 @@ base64 = "0.13"
|
|||
hex = "0.4"
|
||||
err-derive = "0.3"
|
||||
|
||||
dryoc = { version = "0.4", optional = true }
|
||||
dryoc = { version = "0.5", optional = true }
|
||||
serde = { version = "1.0", optional = true, features = ["derive"] }
|
||||
|
||||
[features]
|
||||
|
|
474
src/buf/decode.rs
Normal file
474
src/buf/decode.rs
Normal file
|
@ -0,0 +1,474 @@
|
|||
use crate::{
|
||||
is_string_char, is_whitespace, DICT_ASSIGN, DICT_CLOSE, DICT_DELIM, DICT_OPEN, LIST_CLOSE,
|
||||
LIST_DELIM, LIST_OPEN,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
pub fn decode(input: &[u8]) -> Result<(Buf<'_>, ITerm), ParseError> {
|
||||
let mut buf = Buf {
|
||||
bytes: input.into(),
|
||||
seqs: Vec::with_capacity(16),
|
||||
dicts: Vec::with_capacity(16),
|
||||
terms: Vec::with_capacity(16),
|
||||
};
|
||||
let all_buf = IRaw {
|
||||
start: 0,
|
||||
end: input.len() as Pos,
|
||||
};
|
||||
|
||||
let term = buf.decode(all_buf)?;
|
||||
|
||||
Ok((buf, term))
|
||||
}
|
||||
|
||||
// ================
|
||||
|
||||
enum StackItem {
|
||||
Term(ITerm),
|
||||
KeyValue(IRaw, ITerm),
|
||||
}
|
||||
|
||||
type Stack = Vec<StackItem>;
|
||||
|
||||
impl StackItem {
|
||||
fn term(self) -> ITerm {
|
||||
match self {
|
||||
StackItem::Term(term) => term,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
fn kv(self) -> (IRaw, ITerm) {
|
||||
match self {
|
||||
StackItem::KeyValue(key, term) => (key, term),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Buf<'a> {
|
||||
pub(crate) fn decode(&mut self, raw: IRaw) -> Result<ITerm, ParseError> {
|
||||
let mut stack = Vec::with_capacity(16);
|
||||
|
||||
let (term, rest) = self.decode_seq(raw, &mut stack)?;
|
||||
assert!(stack.is_empty());
|
||||
|
||||
let rest = self.take_whitespace(rest);
|
||||
if rest.start < raw.end {
|
||||
return Err(ParseError::UnexpectedInput(rest.start as usize));
|
||||
}
|
||||
|
||||
Ok(term)
|
||||
}
|
||||
|
||||
fn decode_seq(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> {
|
||||
let start = self.take_whitespace(raw);
|
||||
let stack_start = stack.len();
|
||||
|
||||
let mut cur_end = start;
|
||||
let mut next_start = start;
|
||||
loop {
|
||||
match self.decode_nonseq_term(next_start, stack) {
|
||||
Err(_) => break,
|
||||
Ok((term, rest)) => {
|
||||
stack.push(StackItem::Term(term));
|
||||
cur_end = rest;
|
||||
next_start = self.take_whitespace(rest);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if stack.len() == stack_start {
|
||||
Err(self.error_at(next_start))
|
||||
} else if stack.len() == stack_start + 1 {
|
||||
Ok((stack.pop().unwrap().term(), next_start))
|
||||
} else {
|
||||
let seq_raw = IRaw {
|
||||
start: start.start,
|
||||
end: cur_end.start,
|
||||
};
|
||||
let seq_start = self.seqs.len();
|
||||
self.seqs
|
||||
.extend(stack.drain(stack_start..).map(StackItem::term));
|
||||
let seq = TTerm::RawSeq(
|
||||
seq_raw,
|
||||
ISeq {
|
||||
seq_start: seq_start as Pos,
|
||||
seq_end: self.seqs.len() as Pos,
|
||||
},
|
||||
);
|
||||
Ok((self.push_term(seq), next_start))
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_nonseq_term(
|
||||
&mut self,
|
||||
raw: IRaw,
|
||||
stack: &mut Stack,
|
||||
) -> Result<(ITerm, IRaw), ParseError> {
|
||||
if let Ok((term, rest)) = self.decode_string(raw) {
|
||||
Ok((term, rest))
|
||||
} else if let Ok((term, rest)) = self.decode_list(raw, stack) {
|
||||
Ok((term, rest))
|
||||
} else if let Ok((term, rest)) = self.decode_dict(raw, stack) {
|
||||
Ok((term, rest))
|
||||
} else {
|
||||
Err(self.error_at(raw))
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_list(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> {
|
||||
let stack_start = stack.len();
|
||||
|
||||
let mut cur = self.take_whitespace(self.take_char(raw, LIST_OPEN)?);
|
||||
|
||||
while let Ok((term, rest)) = self.decode_seq(cur, stack) {
|
||||
stack.push(StackItem::Term(term));
|
||||
cur = self.take_whitespace(rest);
|
||||
if let Ok(rest) = self.take_char(rest, LIST_DELIM) {
|
||||
cur = self.take_whitespace(rest);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(rest) = self.take_char(cur, LIST_CLOSE) {
|
||||
let seq_raw = IRaw {
|
||||
start: raw.start,
|
||||
end: rest.start,
|
||||
};
|
||||
let seq_start = self.seqs.len();
|
||||
self.seqs
|
||||
.extend(stack.drain(stack_start..).map(StackItem::term));
|
||||
let seq = TTerm::RawList(
|
||||
seq_raw,
|
||||
ISeq {
|
||||
seq_start: seq_start as Pos,
|
||||
seq_end: self.seqs.len() as Pos,
|
||||
},
|
||||
);
|
||||
Ok((self.push_term(seq), rest))
|
||||
} else {
|
||||
stack.truncate(stack_start);
|
||||
Err(self.error_at(cur))
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_dict(&mut self, raw: IRaw, stack: &mut Stack) -> Result<(ITerm, IRaw), ParseError> {
|
||||
let stack_start = stack.len();
|
||||
|
||||
match self.decode_dict_inner(raw, stack) {
|
||||
Ok(rest) => {
|
||||
let dict_raw = IRaw {
|
||||
start: raw.start,
|
||||
end: rest.start,
|
||||
};
|
||||
let dict_start = self.dicts.len();
|
||||
self.dicts
|
||||
.extend(stack.drain(stack_start..).map(StackItem::kv));
|
||||
self.dicts[dict_start..]
|
||||
.sort_by_key(|(k, _)| (&self.bytes[k.start as usize..k.end as usize], k.start));
|
||||
|
||||
for ((k1, _), (k2, _)) in self.dicts[dict_start..]
|
||||
.iter()
|
||||
.zip(self.dicts[dict_start + 1..].iter())
|
||||
{
|
||||
if self.get_bytes(*k1) == self.get_bytes(*k2) {
|
||||
return Err(ParseError::DuplicateKey(String::from_utf8(self.get_bytes(*k1).to_vec()).unwrap()));
|
||||
}
|
||||
}
|
||||
|
||||
let dict = TTerm::RawDict(
|
||||
dict_raw,
|
||||
IDict {
|
||||
dict_start: dict_start as Pos,
|
||||
dict_end: self.dicts.len() as Pos,
|
||||
},
|
||||
);
|
||||
Ok((self.push_term(dict), rest))
|
||||
}
|
||||
Err(e) => {
|
||||
stack.truncate(stack_start);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_dict_inner(&mut self, raw: IRaw, stack: &mut Stack) -> Result<IRaw, ParseError> {
|
||||
let mut cur = self.take_whitespace(self.take_char(raw, DICT_OPEN)?);
|
||||
|
||||
while let Ok((key, rest)) = self.take_string(cur) {
|
||||
cur = self.take_whitespace(rest);
|
||||
|
||||
cur = self.take_char(cur, DICT_ASSIGN)?;
|
||||
|
||||
let (value, rest) = self.decode_seq(cur, stack)?;
|
||||
cur = self.take_whitespace(rest);
|
||||
stack.push(StackItem::KeyValue(key, value));
|
||||
|
||||
if let Ok(rest) = self.take_char(cur, DICT_DELIM) {
|
||||
cur = self.take_whitespace(rest);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let rest = self.take_char(cur, DICT_CLOSE)?;
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> {
|
||||
let (string_raw, rest) = self.take_string(raw)?;
|
||||
|
||||
let term = if (string_raw.end - string_raw.start) as usize <= STR_INLINE_MAX {
|
||||
let b = self.get_bytes(string_raw);
|
||||
let mut bytes = [0u8; STR_INLINE_MAX];
|
||||
bytes[..b.len()].copy_from_slice(b);
|
||||
TTerm::StrInline(b.len() as u8, bytes)
|
||||
} else {
|
||||
TTerm::Str(string_raw)
|
||||
};
|
||||
|
||||
Ok((self.push_term(term), rest))
|
||||
}
|
||||
|
||||
fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> {
|
||||
let mut rest = raw;
|
||||
while rest.start < rest.end {
|
||||
if is_string_char(self.bytes[rest.start as usize]) {
|
||||
rest.start += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if rest.start > raw.start {
|
||||
let string_raw = IRaw {
|
||||
start: raw.start,
|
||||
end: rest.start,
|
||||
};
|
||||
Ok((string_raw, rest))
|
||||
} else {
|
||||
Err(self.error_at(rest))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn take_char(&self, raw: IRaw, c: u8) -> Result<IRaw, ParseError> {
|
||||
if raw.start >= raw.end {
|
||||
Err(ParseError::IncompleteInput)
|
||||
} else if self.bytes[raw.start as usize] != c {
|
||||
Err(ParseError::UnexpectedInput(raw.start as usize))
|
||||
} else {
|
||||
Ok(IRaw {
|
||||
start: raw.start + 1,
|
||||
end: raw.end,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn take_whitespace(&self, mut raw: IRaw) -> IRaw {
|
||||
while raw.start < raw.end {
|
||||
if is_whitespace(self.bytes[raw.start as usize]) {
|
||||
raw.start += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn get_bytes(&self, raw: IRaw) -> &[u8] {
|
||||
&self.bytes[raw.start as usize..raw.end as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error_at(&self, raw: IRaw) -> ParseError {
|
||||
if raw.start < raw.end {
|
||||
ParseError::UnexpectedInput(raw.start as usize)
|
||||
} else {
|
||||
ParseError::IncompleteInput
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn debug(&self, i: ITerm) {
|
||||
use crate::debug as debug_str;
|
||||
|
||||
let term = &self.terms[i.0 as usize];
|
||||
match term {
|
||||
TTerm::StrInline(len, bytes) => {
|
||||
eprintln!(
|
||||
"{} -> {:?} = `{}`",
|
||||
i.0,
|
||||
term,
|
||||
debug_str(&bytes[..*len as usize])
|
||||
);
|
||||
}
|
||||
TTerm::Str(r) => {
|
||||
eprintln!(
|
||||
"{} -> {:?} = `{}`",
|
||||
i.0,
|
||||
term,
|
||||
debug_str(&self.bytes[r.start as usize..r.end as usize])
|
||||
);
|
||||
}
|
||||
TTerm::RawSeq(r, s) => {
|
||||
eprintln!(
|
||||
"{} -> {:?} = `{}` ((",
|
||||
i.0,
|
||||
term,
|
||||
debug_str(&self.bytes[r.start as usize..r.end as usize])
|
||||
);
|
||||
for j in self.seqs[s.seq_start as usize..s.seq_end as usize].iter() {
|
||||
self.debug(*j);
|
||||
}
|
||||
eprintln!("))");
|
||||
}
|
||||
TTerm::Seq(s) => {
|
||||
eprintln!("{} -> {:?} ((", i.0, term);
|
||||
for j in self.seqs[s.seq_start as usize..s.seq_end as usize].iter() {
|
||||
self.debug(*j);
|
||||
}
|
||||
eprintln!("))");
|
||||
}
|
||||
TTerm::RawList(r, l) => {
|
||||
eprintln!(
|
||||
"{} -> {:?} = `{}` [[",
|
||||
i.0,
|
||||
term,
|
||||
debug_str(&self.bytes[r.start as usize..r.end as usize])
|
||||
);
|
||||
for j in self.seqs[l.seq_start as usize..l.seq_end as usize].iter() {
|
||||
self.debug(*j);
|
||||
}
|
||||
eprintln!("]]");
|
||||
}
|
||||
TTerm::List(l) => {
|
||||
eprintln!("{} -> {:?} [[", i.0, term);
|
||||
for j in self.seqs[l.seq_start as usize..l.seq_end as usize].iter() {
|
||||
self.debug(*j);
|
||||
}
|
||||
eprintln!("]]");
|
||||
}
|
||||
TTerm::RawDict(r, d) => {
|
||||
eprintln!(
|
||||
"{} -> {:?} = `{}` {{{{",
|
||||
i.0,
|
||||
term,
|
||||
debug_str(&self.bytes[r.start as usize..r.end as usize])
|
||||
);
|
||||
for (k, v) in self.dicts[d.dict_start as usize..d.dict_end as usize].iter() {
|
||||
eprint!(
|
||||
"[`{}`] = ",
|
||||
debug_str(&self.bytes[k.start as usize..k.end as usize])
|
||||
);
|
||||
self.debug(*v);
|
||||
}
|
||||
eprintln!("}}}}");
|
||||
}
|
||||
TTerm::Dict(d) => {
|
||||
eprintln!("{} -> {:?} {{{{", i.0, term);
|
||||
for (k, v) in self.dicts[d.dict_start as usize..d.dict_end as usize].iter() {
|
||||
eprint!(
|
||||
"[`{}`] = ",
|
||||
debug_str(&self.bytes[k.start as usize..k.end as usize])
|
||||
);
|
||||
self.debug(*v);
|
||||
}
|
||||
eprintln!("}}}}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn list_and_seq() {
|
||||
let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; persephonisumiolaodododo dlpa] ]";
|
||||
eprintln!("{}", string);
|
||||
let (buf, term) = decode(string.as_bytes()).unwrap();
|
||||
buf.debug(term);
|
||||
|
||||
let [a, b] = buf.list_of(term).unwrap();
|
||||
|
||||
assert_eq!(buf.raw(a).unwrap(), b"[ h1; h2; h3 ]");
|
||||
let l = buf.list(a).unwrap();
|
||||
assert_eq!(l.len(), 3);
|
||||
assert_eq!(buf.str(l[0]).unwrap(), "h1");
|
||||
assert_eq!(buf.str(l[1]).unwrap(), "h2");
|
||||
assert_eq!(buf.str(l[2]).unwrap(), "h3");
|
||||
|
||||
assert_eq!(
|
||||
buf.raw(b).unwrap(),
|
||||
b"hello world [ acc bii; persephonisumiolaodododo dlpa]"
|
||||
);
|
||||
let [h, w, l] = buf.seq_of(b).unwrap();
|
||||
assert_eq!(buf.str(h).unwrap(), "hello");
|
||||
assert_eq!(buf.str(w).unwrap(), "world");
|
||||
|
||||
assert_eq!(
|
||||
buf.raw(l).unwrap(),
|
||||
b"[ acc bii; persephonisumiolaodododo dlpa]"
|
||||
);
|
||||
let [l1, l2] = buf.list_of(l).unwrap();
|
||||
|
||||
assert_eq!(buf.raw(l1).unwrap(), b"acc bii");
|
||||
let s = buf.seq(&l1).unwrap();
|
||||
assert_eq!(s.len(), 2);
|
||||
assert_eq!(buf.str(s[0]).unwrap(), "acc");
|
||||
assert_eq!(buf.str(s[1]).unwrap(), "bii");
|
||||
|
||||
assert_eq!(buf.raw(l2).unwrap(), b"persephonisumiolaodododo dlpa");
|
||||
let [s2a, s2b] = buf.seq_of(l2).unwrap();
|
||||
assert_eq!(buf.str(s2a).unwrap(), "persephonisumiolaodododo");
|
||||
assert_eq!(buf.str(s2b).unwrap(), "dlpa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dict() {
|
||||
let string =
|
||||
"[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}} ]";
|
||||
eprintln!("{}", string);
|
||||
let (buf, term) = decode(string.as_bytes()).unwrap();
|
||||
buf.debug(term);
|
||||
|
||||
let [a, b] = buf.list_of(term).unwrap();
|
||||
|
||||
assert_eq!(buf.raw(a).unwrap(), b"{ a = plop; b = hello world }");
|
||||
let [aa, ab] = buf.dict_of(a, ["a", "b"], false).unwrap();
|
||||
assert_eq!(buf.raw(aa).unwrap(), b"plop");
|
||||
assert_eq!(buf.raw(ab).unwrap(), b"hello world");
|
||||
|
||||
assert_eq!(
|
||||
buf.raw(b).unwrap(),
|
||||
b"ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}}"
|
||||
);
|
||||
let [ba, bb] = buf.seq_of(b).unwrap();
|
||||
assert_eq!(buf.str(ba).unwrap(), "ploplop");
|
||||
|
||||
assert_eq!(
|
||||
buf.str(buf.dict_get(bb, "e").unwrap().unwrap()).unwrap(),
|
||||
"15"
|
||||
);
|
||||
let mut iter = buf.dict_iter(bb).unwrap();
|
||||
let (k1, v1) = iter.next().unwrap();
|
||||
assert_eq!(k1, "c");
|
||||
assert_eq!(
|
||||
buf.raw(v1).unwrap(),
|
||||
b"{key=val;key2=azertyuiopazertyuiopazertyuiop}"
|
||||
);
|
||||
let (k2, v2) = iter.next().unwrap();
|
||||
assert_eq!(k2, "d");
|
||||
assert_eq!(buf.str(v2).unwrap(), "12");
|
||||
let (k3, v3) = iter.next().unwrap();
|
||||
assert_eq!(k3, "e");
|
||||
assert_eq!(buf.str(v3).unwrap(), "15");
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
}
|
39
src/buf/error.rs
Normal file
39
src/buf/error.rs
Normal file
|
@ -0,0 +1,39 @@
|
|||
use err_derive::Error;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ParseError {
|
||||
#[error(display = "Incomplete input")]
|
||||
IncompleteInput,
|
||||
#[error(display = "UnexpectedInput at position {}", _0)]
|
||||
UnexpectedInput(usize),
|
||||
#[error(display = "Duplicate dictionnary key: {}", _0)]
|
||||
DuplicateKey(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum TermError {
|
||||
#[error(display = "Invalid term index ({:?})", _0)]
|
||||
InvalidIndex(ITerm),
|
||||
#[error(display = "Wrong type, expected {}, got {}", _0, _1)]
|
||||
WrongType(&'static str, &'static str),
|
||||
#[error(display = "Wrong length, expected {}, got {}", _0, _1)]
|
||||
WrongLength(usize, usize),
|
||||
#[error(display = "Wrong dictionnary keys")]
|
||||
WrongKeys,
|
||||
#[error(display = "Term does not have a raw representation")]
|
||||
NoRawRepresentation,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ValueError {
|
||||
#[error(display = "Invalid term index ({:?})", _0)]
|
||||
InvalidIndex(ITerm),
|
||||
#[error(display = "Duplicate dictionary key")]
|
||||
DuplicateKey,
|
||||
#[error(display = "Bad string (contains invalid characters)")]
|
||||
BadString,
|
||||
#[error(display = "Nested sequence")]
|
||||
SeqInSeq,
|
||||
}
|
391
src/buf/mod.rs
Normal file
391
src/buf/mod.rs
Normal file
|
@ -0,0 +1,391 @@
|
|||
mod decode;
|
||||
mod error;
|
||||
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::is_string_char;
|
||||
|
||||
pub use error::*;
|
||||
pub use decode::*;
|
||||
|
||||
pub(crate) const STR_INLINE_MAX: usize = 18;
|
||||
|
||||
pub(crate) type Pos = u32;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct ITerm(Pos);
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct IRaw {
|
||||
start: Pos,
|
||||
end: Pos,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ISeq {
|
||||
seq_start: Pos,
|
||||
seq_end: Pos,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct IDict {
|
||||
dict_start: Pos,
|
||||
dict_end: Pos,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum TTerm {
|
||||
Str(IRaw),
|
||||
StrInline(u8, [u8; STR_INLINE_MAX]),
|
||||
RawSeq(IRaw, ISeq),
|
||||
RawList(IRaw, ISeq),
|
||||
RawDict(IRaw, IDict),
|
||||
Seq(ISeq),
|
||||
List(ISeq),
|
||||
Dict(IDict),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Buf<'a> {
|
||||
bytes: Cow<'a, [u8]>,
|
||||
seqs: Vec<ITerm>,
|
||||
dicts: Vec<(IRaw, ITerm)>,
|
||||
terms: Vec<TTerm>,
|
||||
}
|
||||
|
||||
impl<'a> Buf<'a> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
bytes: Default::default(),
|
||||
seqs: Vec::with_capacity(16),
|
||||
dicts: Vec::with_capacity(16),
|
||||
terms: Vec::with_capacity(16),
|
||||
}
|
||||
}
|
||||
|
||||
// ================ READING FUNCTIONS ==================
|
||||
|
||||
pub fn raw(&self, term: ITerm) -> Result<&[u8], TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::StrInline(len, bytes) => Ok(&bytes[..*len as usize]),
|
||||
TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => {
|
||||
Ok(self.get_bytes(*r))
|
||||
}
|
||||
_ => Err(TermError::NoRawRepresentation),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn str(&self, term: ITerm) -> Result<&str, TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::StrInline(len, bytes) => {
|
||||
let bytes = &bytes[..*len as usize];
|
||||
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
|
||||
Ok(s)
|
||||
}
|
||||
TTerm::Str(r) => {
|
||||
let bytes = self.get_bytes(*r);
|
||||
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
|
||||
Ok(s)
|
||||
}
|
||||
t => Err(TermError::WrongType("string", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn seq<'x>(&'x self, term: &'x ITerm) -> Result<&'x [ITerm], TermError> {
|
||||
match self.get_term(*term)? {
|
||||
TTerm::RawSeq(_, s) | TTerm::Seq(s) => {
|
||||
Ok(&self.seqs[s.seq_start as usize..s.seq_end as usize])
|
||||
}
|
||||
_ => Ok(std::slice::from_ref(term)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn seq_of<const N: usize>(&self, term: ITerm) -> Result<[ITerm; N], TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::RawSeq(_, s) | TTerm::Seq(s) => {
|
||||
let seq_len = (s.seq_end - s.seq_start) as usize;
|
||||
if seq_len == N {
|
||||
let seq = &self.seqs[s.seq_start as usize..s.seq_end as usize];
|
||||
Ok(seq.try_into().unwrap())
|
||||
} else {
|
||||
Err(TermError::WrongLength(N, seq_len))
|
||||
}
|
||||
}
|
||||
t => Err(TermError::WrongType("seq", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list(&self, term: ITerm) -> Result<&[ITerm], TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::RawList(_, s) | TTerm::List(s) => {
|
||||
Ok(&self.seqs[s.seq_start as usize..s.seq_end as usize])
|
||||
}
|
||||
t => Err(TermError::WrongType("list", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_of<const N: usize>(&self, term: ITerm) -> Result<[ITerm; N], TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::RawList(_, s) | TTerm::List(s) => {
|
||||
let list_len = (s.seq_end - s.seq_start) as usize;
|
||||
if list_len == N {
|
||||
let seq = &self.seqs[s.seq_start as usize..s.seq_end as usize];
|
||||
Ok(seq.try_into().unwrap())
|
||||
} else {
|
||||
Err(TermError::WrongLength(N, list_len))
|
||||
}
|
||||
}
|
||||
t => Err(TermError::WrongType("list", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dict_get(&self, term: ITerm, key: &str) -> Result<Option<ITerm>, TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::RawDict(_, d) | TTerm::Dict(d) => {
|
||||
let dict = &self.dicts[d.dict_start as usize..d.dict_end as usize];
|
||||
let pos_opt = dict
|
||||
.binary_search_by(|(k, _)| self.get_bytes(*k).cmp(key.as_bytes()))
|
||||
.ok();
|
||||
Ok(pos_opt.map(|pos| dict[pos].1))
|
||||
}
|
||||
t => Err(TermError::WrongType("dict", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dict_of<const N: usize>(
|
||||
&self,
|
||||
term: ITerm,
|
||||
keys: [&str; N],
|
||||
allow_other: bool,
|
||||
) -> Result<[ITerm; N], TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::RawDict(_, d) | TTerm::Dict(d) => {
|
||||
let dict = &self.dicts[d.dict_start as usize..d.dict_end as usize];
|
||||
if dict.len() < N || (dict.len() > N && !allow_other) {
|
||||
return Err(TermError::WrongKeys);
|
||||
}
|
||||
|
||||
let mut ret = [ITerm(0); N];
|
||||
for i in 0..N {
|
||||
let pos = dict
|
||||
.binary_search_by(|(k, _)| self.get_bytes(*k).cmp(keys[i].as_bytes()))
|
||||
.map_err(|_| TermError::WrongKeys)?;
|
||||
ret[i] = dict[pos].1;
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
t => Err(TermError::WrongType("dict", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dict_iter(
|
||||
&self,
|
||||
term: ITerm,
|
||||
) -> Result<impl Iterator<Item = (&str, ITerm)> + '_, TermError> {
|
||||
match self.get_term(term)? {
|
||||
TTerm::RawDict(_, d) | TTerm::Dict(d) => {
|
||||
let dict = &self.dicts[d.dict_start as usize..d.dict_end as usize];
|
||||
let iter = dict.iter().map(|(k, v)| {
|
||||
(
|
||||
unsafe { std::str::from_utf8_unchecked(self.get_bytes(*k)) },
|
||||
*v,
|
||||
)
|
||||
});
|
||||
Ok(iter)
|
||||
}
|
||||
t => Err(TermError::WrongType("dict", t.typename())),
|
||||
}
|
||||
}
|
||||
|
||||
// ================= WRITING FUNCTIONS ================
|
||||
|
||||
pub fn push_str(&mut self, s: &str) -> Result<ITerm, ValueError> {
|
||||
let b = s.as_bytes();
|
||||
|
||||
if !b.iter().copied().all(is_string_char) {
|
||||
return Err(ValueError::BadString);
|
||||
}
|
||||
|
||||
let term = if b.len() <= STR_INLINE_MAX {
|
||||
let mut bytes = [0u8; STR_INLINE_MAX];
|
||||
bytes[..b.len()].copy_from_slice(b);
|
||||
TTerm::StrInline(b.len() as u8, bytes)
|
||||
} else {
|
||||
TTerm::Str(self.push_bytes(b))
|
||||
};
|
||||
|
||||
Ok(self.push_term(term))
|
||||
}
|
||||
|
||||
pub fn push_seq(&mut self, iterator: impl Iterator<Item = ITerm>) -> Result<ITerm, ValueError> {
|
||||
let seq_start = self.seqs.len();
|
||||
|
||||
for term in iterator {
|
||||
match self.terms.get(term.0 as usize) {
|
||||
None => {
|
||||
self.seqs.truncate(seq_start);
|
||||
return Err(ValueError::InvalidIndex(term));
|
||||
}
|
||||
Some(TTerm::RawSeq(_, _)) => {
|
||||
self.seqs.truncate(seq_start);
|
||||
return Err(ValueError::SeqInSeq);
|
||||
}
|
||||
_ => {
|
||||
self.seqs.push(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let seq = ISeq {
|
||||
seq_start: seq_start as Pos,
|
||||
seq_end: self.seqs.len() as Pos,
|
||||
};
|
||||
|
||||
Ok(self.push_term(TTerm::Seq(seq)))
|
||||
}
|
||||
|
||||
pub fn push_list(
|
||||
&mut self,
|
||||
iterator: impl Iterator<Item = ITerm>,
|
||||
) -> Result<ITerm, ValueError> {
|
||||
let list_start = self.seqs.len();
|
||||
|
||||
for term in iterator {
|
||||
match self.terms.get(term.0 as usize) {
|
||||
None => {
|
||||
self.seqs.truncate(list_start);
|
||||
return Err(ValueError::InvalidIndex(term));
|
||||
}
|
||||
_ => {
|
||||
self.seqs.push(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let list = ISeq {
|
||||
seq_start: list_start as Pos,
|
||||
seq_end: self.seqs.len() as Pos,
|
||||
};
|
||||
|
||||
Ok(self.push_term(TTerm::List(list)))
|
||||
}
|
||||
|
||||
pub fn push_dict<'k>(
|
||||
&mut self,
|
||||
iterator: impl Iterator<Item = (&'k str, ITerm)>,
|
||||
) -> Result<ITerm, ValueError> {
|
||||
let bytes_start = self.bytes.len();
|
||||
let dict_start = self.dicts.len();
|
||||
|
||||
for (key, term) in iterator {
|
||||
if !key.as_bytes().iter().copied().all(is_string_char) {
|
||||
return Err(ValueError::BadString);
|
||||
}
|
||||
let key = self.push_bytes(key.as_bytes());
|
||||
|
||||
match self.terms.get(term.0 as usize) {
|
||||
None => {
|
||||
self.bytes.to_mut().truncate(bytes_start);
|
||||
self.dicts.truncate(dict_start);
|
||||
return Err(ValueError::InvalidIndex(term));
|
||||
}
|
||||
_ => {
|
||||
self.dicts.push((key, term));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.dicts[dict_start..]
|
||||
.sort_by_key(|(k, _)| (&self.bytes[k.start as usize..k.end as usize], k.start));
|
||||
|
||||
for ((k1, _), (k2, _)) in self.dicts[dict_start..]
|
||||
.iter()
|
||||
.zip(self.dicts[dict_start + 1..].iter())
|
||||
{
|
||||
if self.get_bytes(*k1) == self.get_bytes(*k2) {
|
||||
self.bytes.to_mut().truncate(bytes_start);
|
||||
self.dicts.truncate(dict_start);
|
||||
return Err(ValueError::DuplicateKey);
|
||||
}
|
||||
}
|
||||
|
||||
let dict = IDict {
|
||||
dict_start: dict_start as Pos,
|
||||
dict_end: self.dicts.len() as Pos,
|
||||
};
|
||||
|
||||
Ok(self.push_term(TTerm::Dict(dict)))
|
||||
}
|
||||
|
||||
pub fn push_raw(&mut self, raw: &[u8]) -> Result<ITerm, ParseError> {
|
||||
let bytes_len = self.bytes.len();
|
||||
let seqs_len = self.seqs.len();
|
||||
let dicts_len = self.dicts.len();
|
||||
let terms_len = self.terms.len();
|
||||
|
||||
let raw = self.push_bytes(raw);
|
||||
let result = self.decode(raw);
|
||||
|
||||
if result.is_err() {
|
||||
// reset to initial state
|
||||
self.bytes.to_mut().truncate(bytes_len);
|
||||
self.seqs.truncate(seqs_len);
|
||||
self.dicts.truncate(dicts_len);
|
||||
self.terms.truncate(terms_len);
|
||||
}
|
||||
|
||||
if let Err(ParseError::UnexpectedInput(pos)) = result {
|
||||
Err(ParseError::UnexpectedInput(pos - bytes_len))
|
||||
} else {
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
// ==== Internal ====
|
||||
|
||||
#[inline]
|
||||
fn get_term(&self, term: ITerm) -> Result<&TTerm, TermError> {
|
||||
self.terms
|
||||
.get(term.0 as usize)
|
||||
.ok_or(TermError::InvalidIndex(term))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn push_term(&mut self, term: TTerm) -> ITerm {
|
||||
let ret = ITerm(self.terms.len() as Pos);
|
||||
self.terms.push(term);
|
||||
ret
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn push_bytes(&mut self, raw: &[u8]) -> IRaw {
|
||||
let bytes_start = self.bytes.len();
|
||||
self.bytes.to_mut().extend(raw);
|
||||
IRaw {
|
||||
start: bytes_start as Pos,
|
||||
end: self.bytes.len() as Pos,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TTerm {
|
||||
fn typename(&self) -> &'static str {
|
||||
match self {
|
||||
TTerm::Str(_) | TTerm::StrInline(_, _) => "string",
|
||||
TTerm::RawSeq(_, _) | TTerm::Seq(_) => "seq",
|
||||
TTerm::RawList(_, _) | TTerm::List(_) => "list",
|
||||
TTerm::RawDict(_, _) | TTerm::Dict(_) => "dict",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
pub use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sizeof() {
|
||||
assert_eq!(std::mem::size_of::<TTerm>(), 20);
|
||||
}
|
||||
}
|
|
@ -38,8 +38,8 @@ pub fn compute_signature(message: &[u8], secret_key: &sign::SecretKey) -> sign::
|
|||
|
||||
/// Verify the ed25519 signature of a message using a public key
|
||||
pub fn verify_signature(
|
||||
signature: &sign::Signature,
|
||||
message: &[u8],
|
||||
signature: sign::Signature,
|
||||
message: Vec<u8>,
|
||||
public_key: &sign::PublicKey,
|
||||
) -> bool {
|
||||
sign::SignedMessage::from_parts(signature.clone(), message)
|
||||
|
|
|
@ -22,8 +22,8 @@ mod error;
|
|||
use std::borrow::{Borrow, Cow};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::*;
|
||||
use crate::dec::{self, decode};
|
||||
use crate::*;
|
||||
use crate::{is_string_char, is_whitespace, switch64, BytesEncoding};
|
||||
|
||||
pub use error::Error;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
//! A text-based data format for cryptographic network protocols.
|
||||
//!
|
||||
//! ```
|
||||
//! #[cfg(feature = "dryoc")] {
|
||||
//! use nettext::enc::*;
|
||||
//! use nettext::dec::*;
|
||||
//! use nettext::crypto::*;
|
||||
|
@ -46,10 +47,11 @@
|
|||
//!
|
||||
//! let [verb, arg1, arg2, pubkey] = object2.seq_of().unwrap();
|
||||
//! let pubkey = pubkey.public_key().unwrap();
|
||||
//! assert!(verify_signature(&signature, payload.raw(), &pubkey));
|
||||
//! assert!(verify_signature(signature, payload.raw().to_vec(), &pubkey));
|
||||
//!
|
||||
//! assert_eq!(verb.string().unwrap(), "CALL");
|
||||
//! assert_eq!(arg1.string().unwrap(), "myfunction");
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The value of `signed_payload` would be as follows:
|
||||
|
@ -84,6 +86,7 @@
|
|||
//! Note that the value of `text1` is embedded as-is inside `text2`. This is what allows us
|
||||
//! to check the hash and the signature: the raw representation of the term hasn't changed.
|
||||
|
||||
pub mod buf;
|
||||
pub mod dec;
|
||||
pub mod enc;
|
||||
pub mod switch64;
|
||||
|
|
|
@ -103,10 +103,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_encode() {
|
||||
assert_eq!(debug(&encode(&b"hello world"[..], true)), "hello world");
|
||||
assert_eq!(
|
||||
debug(&encode(&b"hello, world!"[..], true)),
|
||||
"hello, world!"
|
||||
);
|
||||
assert_eq!(debug(&encode(&b"hello, world!"[..], true)), "hello, world!");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in a new issue