inline strings
This commit is contained in:
parent
7cc2212786
commit
9abc7519d4
2 changed files with 68 additions and 14 deletions
|
@ -226,8 +226,17 @@ impl<'a> Buf<'a> {
|
||||||
|
|
||||||
fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> {
|
fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> {
|
||||||
let (string_raw, rest) = self.take_string(raw)?;
|
let (string_raw, rest) = self.take_string(raw)?;
|
||||||
let string = self.push_term(TTerm::Str(string_raw));
|
|
||||||
Ok((string, rest))
|
let term = if (string_raw.end - string_raw.start) as usize <= STR_INLINE_MAX {
|
||||||
|
let b = self.get_bytes(string_raw);
|
||||||
|
let mut bytes = [0u8; STR_INLINE_MAX];
|
||||||
|
bytes[..b.len()].copy_from_slice(b);
|
||||||
|
TTerm::StrInline(b.len() as u8, bytes)
|
||||||
|
} else {
|
||||||
|
TTerm::Str(string_raw)
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((self.push_term(term), rest))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> {
|
fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> {
|
||||||
|
@ -297,6 +306,14 @@ impl<'a> Buf<'a> {
|
||||||
|
|
||||||
let term = &self.terms[i.0 as usize];
|
let term = &self.terms[i.0 as usize];
|
||||||
match term {
|
match term {
|
||||||
|
TTerm::StrInline(len, bytes) => {
|
||||||
|
eprintln!(
|
||||||
|
"{} -> {:?} = `{}`",
|
||||||
|
i.0,
|
||||||
|
term,
|
||||||
|
debug_str(&bytes[..*len as usize])
|
||||||
|
);
|
||||||
|
}
|
||||||
TTerm::Str(r) => {
|
TTerm::Str(r) => {
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"{} -> {:?} = `{}`",
|
"{} -> {:?} = `{}`",
|
||||||
|
@ -380,7 +397,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn list_and_seq() {
|
fn list_and_seq() {
|
||||||
let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; cooj dlpa] ]";
|
let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; persephonisumiolaodododo dlpa] ]";
|
||||||
eprintln!("{}", string);
|
eprintln!("{}", string);
|
||||||
let (buf, term) = decode(string.as_bytes()).unwrap();
|
let (buf, term) = decode(string.as_bytes()).unwrap();
|
||||||
buf.debug(term);
|
buf.debug(term);
|
||||||
|
@ -394,12 +411,18 @@ mod tests {
|
||||||
assert_eq!(buf.str(l[1]).unwrap(), "h2");
|
assert_eq!(buf.str(l[1]).unwrap(), "h2");
|
||||||
assert_eq!(buf.str(l[2]).unwrap(), "h3");
|
assert_eq!(buf.str(l[2]).unwrap(), "h3");
|
||||||
|
|
||||||
assert_eq!(buf.raw(b).unwrap(), b"hello world [ acc bii; cooj dlpa]");
|
assert_eq!(
|
||||||
|
buf.raw(b).unwrap(),
|
||||||
|
b"hello world [ acc bii; persephonisumiolaodododo dlpa]"
|
||||||
|
);
|
||||||
let [h, w, l] = buf.seq_of(b).unwrap();
|
let [h, w, l] = buf.seq_of(b).unwrap();
|
||||||
assert_eq!(buf.str(h).unwrap(), "hello");
|
assert_eq!(buf.str(h).unwrap(), "hello");
|
||||||
assert_eq!(buf.str(w).unwrap(), "world");
|
assert_eq!(buf.str(w).unwrap(), "world");
|
||||||
|
|
||||||
assert_eq!(buf.raw(l).unwrap(), b"[ acc bii; cooj dlpa]");
|
assert_eq!(
|
||||||
|
buf.raw(l).unwrap(),
|
||||||
|
b"[ acc bii; persephonisumiolaodododo dlpa]"
|
||||||
|
);
|
||||||
let [l1, l2] = buf.list_of(l).unwrap();
|
let [l1, l2] = buf.list_of(l).unwrap();
|
||||||
|
|
||||||
assert_eq!(buf.raw(l1).unwrap(), b"acc bii");
|
assert_eq!(buf.raw(l1).unwrap(), b"acc bii");
|
||||||
|
@ -408,16 +431,16 @@ mod tests {
|
||||||
assert_eq!(buf.str(s[0]).unwrap(), "acc");
|
assert_eq!(buf.str(s[0]).unwrap(), "acc");
|
||||||
assert_eq!(buf.str(s[1]).unwrap(), "bii");
|
assert_eq!(buf.str(s[1]).unwrap(), "bii");
|
||||||
|
|
||||||
assert_eq!(buf.raw(l2).unwrap(), b"cooj dlpa");
|
assert_eq!(buf.raw(l2).unwrap(), b"persephonisumiolaodododo dlpa");
|
||||||
let [s2a, s2b] = buf.seq_of(l2).unwrap();
|
let [s2a, s2b] = buf.seq_of(l2).unwrap();
|
||||||
assert_eq!(buf.str(s2a).unwrap(), "cooj");
|
assert_eq!(buf.str(s2a).unwrap(), "persephonisumiolaodododo");
|
||||||
assert_eq!(buf.str(s2b).unwrap(), "dlpa");
|
assert_eq!(buf.str(s2b).unwrap(), "dlpa");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn dict() {
|
fn dict() {
|
||||||
let string =
|
let string =
|
||||||
"[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=val2}} ]";
|
"[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}} ]";
|
||||||
eprintln!("{}", string);
|
eprintln!("{}", string);
|
||||||
let (buf, term) = decode(string.as_bytes()).unwrap();
|
let (buf, term) = decode(string.as_bytes()).unwrap();
|
||||||
buf.debug(term);
|
buf.debug(term);
|
||||||
|
@ -431,7 +454,7 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
buf.raw(b).unwrap(),
|
buf.raw(b).unwrap(),
|
||||||
b"ploplop { e=15; d=12 ;c = {key=val;key2=val2}}"
|
b"ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}}"
|
||||||
);
|
);
|
||||||
let [ba, bb] = buf.seq_of(b).unwrap();
|
let [ba, bb] = buf.seq_of(b).unwrap();
|
||||||
assert_eq!(buf.str(ba).unwrap(), "ploplop");
|
assert_eq!(buf.str(ba).unwrap(), "ploplop");
|
||||||
|
@ -443,7 +466,10 @@ mod tests {
|
||||||
let mut iter = buf.dict_iter(bb).unwrap();
|
let mut iter = buf.dict_iter(bb).unwrap();
|
||||||
let (k1, v1) = iter.next().unwrap();
|
let (k1, v1) = iter.next().unwrap();
|
||||||
assert_eq!(k1, "c");
|
assert_eq!(k1, "c");
|
||||||
assert_eq!(buf.raw(v1).unwrap(), b"{key=val;key2=val2}");
|
assert_eq!(
|
||||||
|
buf.raw(v1).unwrap(),
|
||||||
|
b"{key=val;key2=azertyuiopazertyuiopazertyuiop}"
|
||||||
|
);
|
||||||
let (k2, v2) = iter.next().unwrap();
|
let (k2, v2) = iter.next().unwrap();
|
||||||
assert_eq!(k2, "d");
|
assert_eq!(k2, "d");
|
||||||
assert_eq!(buf.str(v2).unwrap(), "12");
|
assert_eq!(buf.str(v2).unwrap(), "12");
|
||||||
|
|
|
@ -6,6 +6,8 @@ use crate::is_string_char;
|
||||||
|
|
||||||
pub use decode::*;
|
pub use decode::*;
|
||||||
|
|
||||||
|
pub const STR_INLINE_MAX: usize = 18;
|
||||||
|
|
||||||
pub type Pos = u32;
|
pub type Pos = u32;
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
@ -32,6 +34,7 @@ pub(crate) struct IDict {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) enum TTerm {
|
pub(crate) enum TTerm {
|
||||||
Str(IRaw),
|
Str(IRaw),
|
||||||
|
StrInline(u8, [u8; STR_INLINE_MAX]),
|
||||||
RawSeq(IRaw, ISeq),
|
RawSeq(IRaw, ISeq),
|
||||||
RawList(IRaw, ISeq),
|
RawList(IRaw, ISeq),
|
||||||
RawDict(IRaw, IDict),
|
RawDict(IRaw, IDict),
|
||||||
|
@ -79,6 +82,7 @@ impl<'a> Buf<'a> {
|
||||||
|
|
||||||
pub fn raw(&self, term: ITerm) -> Result<&[u8], TermError> {
|
pub fn raw(&self, term: ITerm) -> Result<&[u8], TermError> {
|
||||||
match self.get_term(term)? {
|
match self.get_term(term)? {
|
||||||
|
TTerm::StrInline(len, bytes) => Ok(&bytes[..*len as usize]),
|
||||||
TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => {
|
TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => {
|
||||||
Ok(self.get_bytes(*r))
|
Ok(self.get_bytes(*r))
|
||||||
}
|
}
|
||||||
|
@ -88,6 +92,11 @@ impl<'a> Buf<'a> {
|
||||||
|
|
||||||
pub fn str(&self, term: ITerm) -> Result<&str, TermError> {
|
pub fn str(&self, term: ITerm) -> Result<&str, TermError> {
|
||||||
match self.get_term(term)? {
|
match self.get_term(term)? {
|
||||||
|
TTerm::StrInline(len, bytes) => {
|
||||||
|
let bytes = &bytes[..*len as usize];
|
||||||
|
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
|
||||||
|
Ok(s)
|
||||||
|
}
|
||||||
TTerm::Str(r) => {
|
TTerm::Str(r) => {
|
||||||
let bytes = self.get_bytes(*r);
|
let bytes = self.get_bytes(*r);
|
||||||
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
|
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
|
||||||
|
@ -207,11 +216,20 @@ impl<'a> Buf<'a> {
|
||||||
// ================= WRITING FUNCTIONS ================
|
// ================= WRITING FUNCTIONS ================
|
||||||
|
|
||||||
pub fn push_str(&mut self, s: &str) -> Result<ITerm, ValueError> {
|
pub fn push_str(&mut self, s: &str) -> Result<ITerm, ValueError> {
|
||||||
if !s.as_bytes().iter().copied().all(is_string_char) {
|
let b = s.as_bytes();
|
||||||
|
|
||||||
|
if !b.iter().copied().all(is_string_char) {
|
||||||
return Err(ValueError::BadString);
|
return Err(ValueError::BadString);
|
||||||
}
|
}
|
||||||
|
|
||||||
let term = TTerm::Str(self.push_bytes(s.as_bytes()));
|
let term = if b.len() <= STR_INLINE_MAX {
|
||||||
|
let mut bytes = [0u8; STR_INLINE_MAX];
|
||||||
|
bytes[..b.len()].copy_from_slice(b);
|
||||||
|
TTerm::StrInline(b.len() as u8, bytes)
|
||||||
|
} else {
|
||||||
|
TTerm::Str(self.push_bytes(b))
|
||||||
|
};
|
||||||
|
|
||||||
Ok(self.push_term(term))
|
Ok(self.push_term(term))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,7 +373,7 @@ impl<'a> Buf<'a> {
|
||||||
fn push_bytes(&mut self, raw: &[u8]) -> IRaw {
|
fn push_bytes(&mut self, raw: &[u8]) -> IRaw {
|
||||||
let bytes_start = self.bytes.len();
|
let bytes_start = self.bytes.len();
|
||||||
self.bytes.to_mut().extend(raw);
|
self.bytes.to_mut().extend(raw);
|
||||||
IRaw{
|
IRaw {
|
||||||
start: bytes_start as Pos,
|
start: bytes_start as Pos,
|
||||||
end: self.bytes.len() as Pos,
|
end: self.bytes.len() as Pos,
|
||||||
}
|
}
|
||||||
|
@ -365,10 +383,20 @@ impl<'a> Buf<'a> {
|
||||||
impl TTerm {
|
impl TTerm {
|
||||||
fn typename(&self) -> &'static str {
|
fn typename(&self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
TTerm::Str(_) => "string",
|
TTerm::Str(_) | TTerm::StrInline(_, _) => "string",
|
||||||
TTerm::RawSeq(_, _) | TTerm::Seq(_) => "seq",
|
TTerm::RawSeq(_, _) | TTerm::Seq(_) => "seq",
|
||||||
TTerm::RawList(_, _) | TTerm::List(_) => "list",
|
TTerm::RawList(_, _) | TTerm::List(_) => "list",
|
||||||
TTerm::RawDict(_, _) | TTerm::Dict(_) => "dict",
|
TTerm::RawDict(_, _) | TTerm::Dict(_) => "dict",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
pub use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sizeof() {
|
||||||
|
assert_eq!(std::mem::size_of::<TTerm>(), 20);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue