inline strings

This commit is contained in:
Alex 2023-05-10 18:30:16 +02:00
parent 7cc2212786
commit 9abc7519d4
2 changed files with 68 additions and 14 deletions

View file

@ -226,8 +226,17 @@ impl<'a> Buf<'a> {
fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> { fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> {
let (string_raw, rest) = self.take_string(raw)?; let (string_raw, rest) = self.take_string(raw)?;
let string = self.push_term(TTerm::Str(string_raw));
Ok((string, rest)) let term = if (string_raw.end - string_raw.start) as usize <= STR_INLINE_MAX {
let b = self.get_bytes(string_raw);
let mut bytes = [0u8; STR_INLINE_MAX];
bytes[..b.len()].copy_from_slice(b);
TTerm::StrInline(b.len() as u8, bytes)
} else {
TTerm::Str(string_raw)
};
Ok((self.push_term(term), rest))
} }
fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> { fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> {
@ -297,6 +306,14 @@ impl<'a> Buf<'a> {
let term = &self.terms[i.0 as usize]; let term = &self.terms[i.0 as usize];
match term { match term {
TTerm::StrInline(len, bytes) => {
eprintln!(
"{} -> {:?} = `{}`",
i.0,
term,
debug_str(&bytes[..*len as usize])
);
}
TTerm::Str(r) => { TTerm::Str(r) => {
eprintln!( eprintln!(
"{} -> {:?} = `{}`", "{} -> {:?} = `{}`",
@ -380,7 +397,7 @@ mod tests {
#[test] #[test]
fn list_and_seq() { fn list_and_seq() {
let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; cooj dlpa] ]"; let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; persephonisumiolaodododo dlpa] ]";
eprintln!("{}", string); eprintln!("{}", string);
let (buf, term) = decode(string.as_bytes()).unwrap(); let (buf, term) = decode(string.as_bytes()).unwrap();
buf.debug(term); buf.debug(term);
@ -394,12 +411,18 @@ mod tests {
assert_eq!(buf.str(l[1]).unwrap(), "h2"); assert_eq!(buf.str(l[1]).unwrap(), "h2");
assert_eq!(buf.str(l[2]).unwrap(), "h3"); assert_eq!(buf.str(l[2]).unwrap(), "h3");
assert_eq!(buf.raw(b).unwrap(), b"hello world [ acc bii; cooj dlpa]"); assert_eq!(
buf.raw(b).unwrap(),
b"hello world [ acc bii; persephonisumiolaodododo dlpa]"
);
let [h, w, l] = buf.seq_of(b).unwrap(); let [h, w, l] = buf.seq_of(b).unwrap();
assert_eq!(buf.str(h).unwrap(), "hello"); assert_eq!(buf.str(h).unwrap(), "hello");
assert_eq!(buf.str(w).unwrap(), "world"); assert_eq!(buf.str(w).unwrap(), "world");
assert_eq!(buf.raw(l).unwrap(), b"[ acc bii; cooj dlpa]"); assert_eq!(
buf.raw(l).unwrap(),
b"[ acc bii; persephonisumiolaodododo dlpa]"
);
let [l1, l2] = buf.list_of(l).unwrap(); let [l1, l2] = buf.list_of(l).unwrap();
assert_eq!(buf.raw(l1).unwrap(), b"acc bii"); assert_eq!(buf.raw(l1).unwrap(), b"acc bii");
@ -408,16 +431,16 @@ mod tests {
assert_eq!(buf.str(s[0]).unwrap(), "acc"); assert_eq!(buf.str(s[0]).unwrap(), "acc");
assert_eq!(buf.str(s[1]).unwrap(), "bii"); assert_eq!(buf.str(s[1]).unwrap(), "bii");
assert_eq!(buf.raw(l2).unwrap(), b"cooj dlpa"); assert_eq!(buf.raw(l2).unwrap(), b"persephonisumiolaodododo dlpa");
let [s2a, s2b] = buf.seq_of(l2).unwrap(); let [s2a, s2b] = buf.seq_of(l2).unwrap();
assert_eq!(buf.str(s2a).unwrap(), "cooj"); assert_eq!(buf.str(s2a).unwrap(), "persephonisumiolaodododo");
assert_eq!(buf.str(s2b).unwrap(), "dlpa"); assert_eq!(buf.str(s2b).unwrap(), "dlpa");
} }
#[test] #[test]
fn dict() { fn dict() {
let string = let string =
"[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=val2}} ]"; "[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}} ]";
eprintln!("{}", string); eprintln!("{}", string);
let (buf, term) = decode(string.as_bytes()).unwrap(); let (buf, term) = decode(string.as_bytes()).unwrap();
buf.debug(term); buf.debug(term);
@ -431,7 +454,7 @@ mod tests {
assert_eq!( assert_eq!(
buf.raw(b).unwrap(), buf.raw(b).unwrap(),
b"ploplop { e=15; d=12 ;c = {key=val;key2=val2}}" b"ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}}"
); );
let [ba, bb] = buf.seq_of(b).unwrap(); let [ba, bb] = buf.seq_of(b).unwrap();
assert_eq!(buf.str(ba).unwrap(), "ploplop"); assert_eq!(buf.str(ba).unwrap(), "ploplop");
@ -443,7 +466,10 @@ mod tests {
let mut iter = buf.dict_iter(bb).unwrap(); let mut iter = buf.dict_iter(bb).unwrap();
let (k1, v1) = iter.next().unwrap(); let (k1, v1) = iter.next().unwrap();
assert_eq!(k1, "c"); assert_eq!(k1, "c");
assert_eq!(buf.raw(v1).unwrap(), b"{key=val;key2=val2}"); assert_eq!(
buf.raw(v1).unwrap(),
b"{key=val;key2=azertyuiopazertyuiopazertyuiop}"
);
let (k2, v2) = iter.next().unwrap(); let (k2, v2) = iter.next().unwrap();
assert_eq!(k2, "d"); assert_eq!(k2, "d");
assert_eq!(buf.str(v2).unwrap(), "12"); assert_eq!(buf.str(v2).unwrap(), "12");

View file

@ -6,6 +6,8 @@ use crate::is_string_char;
pub use decode::*; pub use decode::*;
pub const STR_INLINE_MAX: usize = 18;
pub type Pos = u32; pub type Pos = u32;
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
@ -32,6 +34,7 @@ pub(crate) struct IDict {
#[derive(Debug)] #[derive(Debug)]
pub(crate) enum TTerm { pub(crate) enum TTerm {
Str(IRaw), Str(IRaw),
StrInline(u8, [u8; STR_INLINE_MAX]),
RawSeq(IRaw, ISeq), RawSeq(IRaw, ISeq),
RawList(IRaw, ISeq), RawList(IRaw, ISeq),
RawDict(IRaw, IDict), RawDict(IRaw, IDict),
@ -79,6 +82,7 @@ impl<'a> Buf<'a> {
pub fn raw(&self, term: ITerm) -> Result<&[u8], TermError> { pub fn raw(&self, term: ITerm) -> Result<&[u8], TermError> {
match self.get_term(term)? { match self.get_term(term)? {
TTerm::StrInline(len, bytes) => Ok(&bytes[..*len as usize]),
TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => { TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => {
Ok(self.get_bytes(*r)) Ok(self.get_bytes(*r))
} }
@ -88,6 +92,11 @@ impl<'a> Buf<'a> {
pub fn str(&self, term: ITerm) -> Result<&str, TermError> { pub fn str(&self, term: ITerm) -> Result<&str, TermError> {
match self.get_term(term)? { match self.get_term(term)? {
TTerm::StrInline(len, bytes) => {
let bytes = &bytes[..*len as usize];
let s = unsafe { std::str::from_utf8_unchecked(bytes) };
Ok(s)
}
TTerm::Str(r) => { TTerm::Str(r) => {
let bytes = self.get_bytes(*r); let bytes = self.get_bytes(*r);
let s = unsafe { std::str::from_utf8_unchecked(bytes) }; let s = unsafe { std::str::from_utf8_unchecked(bytes) };
@ -207,11 +216,20 @@ impl<'a> Buf<'a> {
// ================= WRITING FUNCTIONS ================ // ================= WRITING FUNCTIONS ================
pub fn push_str(&mut self, s: &str) -> Result<ITerm, ValueError> { pub fn push_str(&mut self, s: &str) -> Result<ITerm, ValueError> {
if !s.as_bytes().iter().copied().all(is_string_char) { let b = s.as_bytes();
if !b.iter().copied().all(is_string_char) {
return Err(ValueError::BadString); return Err(ValueError::BadString);
} }
let term = TTerm::Str(self.push_bytes(s.as_bytes())); let term = if b.len() <= STR_INLINE_MAX {
let mut bytes = [0u8; STR_INLINE_MAX];
bytes[..b.len()].copy_from_slice(b);
TTerm::StrInline(b.len() as u8, bytes)
} else {
TTerm::Str(self.push_bytes(b))
};
Ok(self.push_term(term)) Ok(self.push_term(term))
} }
@ -355,7 +373,7 @@ impl<'a> Buf<'a> {
fn push_bytes(&mut self, raw: &[u8]) -> IRaw { fn push_bytes(&mut self, raw: &[u8]) -> IRaw {
let bytes_start = self.bytes.len(); let bytes_start = self.bytes.len();
self.bytes.to_mut().extend(raw); self.bytes.to_mut().extend(raw);
IRaw{ IRaw {
start: bytes_start as Pos, start: bytes_start as Pos,
end: self.bytes.len() as Pos, end: self.bytes.len() as Pos,
} }
@ -365,10 +383,20 @@ impl<'a> Buf<'a> {
impl TTerm { impl TTerm {
fn typename(&self) -> &'static str { fn typename(&self) -> &'static str {
match self { match self {
TTerm::Str(_) => "string", TTerm::Str(_) | TTerm::StrInline(_, _) => "string",
TTerm::RawSeq(_, _) | TTerm::Seq(_) => "seq", TTerm::RawSeq(_, _) | TTerm::Seq(_) => "seq",
TTerm::RawList(_, _) | TTerm::List(_) => "list", TTerm::RawList(_, _) | TTerm::List(_) => "list",
TTerm::RawDict(_, _) | TTerm::Dict(_) => "dict", TTerm::RawDict(_, _) | TTerm::Dict(_) => "dict",
} }
} }
} }
#[cfg(test)]
mod tests {
pub use super::*;
#[test]
fn test_sizeof() {
assert_eq!(std::mem::size_of::<TTerm>(), 20);
}
}