diff --git a/src/buf/decode.rs b/src/buf/decode.rs index 8676f4f..39dd6c0 100644 --- a/src/buf/decode.rs +++ b/src/buf/decode.rs @@ -226,8 +226,17 @@ impl<'a> Buf<'a> { fn decode_string(&mut self, raw: IRaw) -> Result<(ITerm, IRaw), ParseError> { let (string_raw, rest) = self.take_string(raw)?; - let string = self.push_term(TTerm::Str(string_raw)); - Ok((string, rest)) + + let term = if (string_raw.end - string_raw.start) as usize <= STR_INLINE_MAX { + let b = self.get_bytes(string_raw); + let mut bytes = [0u8; STR_INLINE_MAX]; + bytes[..b.len()].copy_from_slice(b); + TTerm::StrInline(b.len() as u8, bytes) + } else { + TTerm::Str(string_raw) + }; + + Ok((self.push_term(term), rest)) } fn take_string(&mut self, raw: IRaw) -> Result<(IRaw, IRaw), ParseError> { @@ -297,6 +306,14 @@ impl<'a> Buf<'a> { let term = &self.terms[i.0 as usize]; match term { + TTerm::StrInline(len, bytes) => { + eprintln!( + "{} -> {:?} = `{}`", + i.0, + term, + debug_str(&bytes[..*len as usize]) + ); + } TTerm::Str(r) => { eprintln!( "{} -> {:?} = `{}`", @@ -380,7 +397,7 @@ mod tests { #[test] fn list_and_seq() { - let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; cooj dlpa] ]"; + let string = "[ [ h1; h2; h3 ]; hello world [ acc bii; persephonisumiolaodododo dlpa] ]"; eprintln!("{}", string); let (buf, term) = decode(string.as_bytes()).unwrap(); buf.debug(term); @@ -394,12 +411,18 @@ mod tests { assert_eq!(buf.str(l[1]).unwrap(), "h2"); assert_eq!(buf.str(l[2]).unwrap(), "h3"); - assert_eq!(buf.raw(b).unwrap(), b"hello world [ acc bii; cooj dlpa]"); + assert_eq!( + buf.raw(b).unwrap(), + b"hello world [ acc bii; persephonisumiolaodododo dlpa]" + ); let [h, w, l] = buf.seq_of(b).unwrap(); assert_eq!(buf.str(h).unwrap(), "hello"); assert_eq!(buf.str(w).unwrap(), "world"); - assert_eq!(buf.raw(l).unwrap(), b"[ acc bii; cooj dlpa]"); + assert_eq!( + buf.raw(l).unwrap(), + b"[ acc bii; persephonisumiolaodododo dlpa]" + ); let [l1, l2] = buf.list_of(l).unwrap(); assert_eq!(buf.raw(l1).unwrap(), b"acc bii"); @@ -408,16 +431,16 @@ mod tests { assert_eq!(buf.str(s[0]).unwrap(), "acc"); assert_eq!(buf.str(s[1]).unwrap(), "bii"); - assert_eq!(buf.raw(l2).unwrap(), b"cooj dlpa"); + assert_eq!(buf.raw(l2).unwrap(), b"persephonisumiolaodododo dlpa"); let [s2a, s2b] = buf.seq_of(l2).unwrap(); - assert_eq!(buf.str(s2a).unwrap(), "cooj"); + assert_eq!(buf.str(s2a).unwrap(), "persephonisumiolaodododo"); assert_eq!(buf.str(s2b).unwrap(), "dlpa"); } #[test] fn dict() { let string = - "[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=val2}} ]"; + "[ { a = plop; b = hello world }; ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}} ]"; eprintln!("{}", string); let (buf, term) = decode(string.as_bytes()).unwrap(); buf.debug(term); @@ -431,7 +454,7 @@ mod tests { assert_eq!( buf.raw(b).unwrap(), - b"ploplop { e=15; d=12 ;c = {key=val;key2=val2}}" + b"ploplop { e=15; d=12 ;c = {key=val;key2=azertyuiopazertyuiopazertyuiop}}" ); let [ba, bb] = buf.seq_of(b).unwrap(); assert_eq!(buf.str(ba).unwrap(), "ploplop"); @@ -443,7 +466,10 @@ mod tests { let mut iter = buf.dict_iter(bb).unwrap(); let (k1, v1) = iter.next().unwrap(); assert_eq!(k1, "c"); - assert_eq!(buf.raw(v1).unwrap(), b"{key=val;key2=val2}"); + assert_eq!( + buf.raw(v1).unwrap(), + b"{key=val;key2=azertyuiopazertyuiopazertyuiop}" + ); let (k2, v2) = iter.next().unwrap(); assert_eq!(k2, "d"); assert_eq!(buf.str(v2).unwrap(), "12"); diff --git a/src/buf/mod.rs b/src/buf/mod.rs index 72c4ee1..ccfb755 100644 --- a/src/buf/mod.rs +++ b/src/buf/mod.rs @@ -6,6 +6,8 @@ use crate::is_string_char; pub use decode::*; +pub const STR_INLINE_MAX: usize = 18; + pub type Pos = u32; #[derive(Clone, Copy, Debug)] @@ -32,6 +34,7 @@ pub(crate) struct IDict { #[derive(Debug)] pub(crate) enum TTerm { Str(IRaw), + StrInline(u8, [u8; STR_INLINE_MAX]), RawSeq(IRaw, ISeq), RawList(IRaw, ISeq), RawDict(IRaw, IDict), @@ -79,6 +82,7 @@ impl<'a> Buf<'a> { pub fn raw(&self, term: ITerm) -> Result<&[u8], TermError> { match self.get_term(term)? { + TTerm::StrInline(len, bytes) => Ok(&bytes[..*len as usize]), TTerm::Str(r) | TTerm::RawSeq(r, _) | TTerm::RawList(r, _) | TTerm::RawDict(r, _) => { Ok(self.get_bytes(*r)) } @@ -88,6 +92,11 @@ impl<'a> Buf<'a> { pub fn str(&self, term: ITerm) -> Result<&str, TermError> { match self.get_term(term)? { + TTerm::StrInline(len, bytes) => { + let bytes = &bytes[..*len as usize]; + let s = unsafe { std::str::from_utf8_unchecked(bytes) }; + Ok(s) + } TTerm::Str(r) => { let bytes = self.get_bytes(*r); let s = unsafe { std::str::from_utf8_unchecked(bytes) }; @@ -207,11 +216,20 @@ impl<'a> Buf<'a> { // ================= WRITING FUNCTIONS ================ pub fn push_str(&mut self, s: &str) -> Result { - if !s.as_bytes().iter().copied().all(is_string_char) { + let b = s.as_bytes(); + + if !b.iter().copied().all(is_string_char) { return Err(ValueError::BadString); } - let term = TTerm::Str(self.push_bytes(s.as_bytes())); + let term = if b.len() <= STR_INLINE_MAX { + let mut bytes = [0u8; STR_INLINE_MAX]; + bytes[..b.len()].copy_from_slice(b); + TTerm::StrInline(b.len() as u8, bytes) + } else { + TTerm::Str(self.push_bytes(b)) + }; + Ok(self.push_term(term)) } @@ -355,7 +373,7 @@ impl<'a> Buf<'a> { fn push_bytes(&mut self, raw: &[u8]) -> IRaw { let bytes_start = self.bytes.len(); self.bytes.to_mut().extend(raw); - IRaw{ + IRaw { start: bytes_start as Pos, end: self.bytes.len() as Pos, } @@ -365,10 +383,20 @@ impl<'a> Buf<'a> { impl TTerm { fn typename(&self) -> &'static str { match self { - TTerm::Str(_) => "string", + TTerm::Str(_) | TTerm::StrInline(_, _) => "string", TTerm::RawSeq(_, _) | TTerm::Seq(_) => "seq", TTerm::RawList(_, _) | TTerm::List(_) => "list", TTerm::RawDict(_, _) | TTerm::Dict(_) => "dict", } } } + +#[cfg(test)] +mod tests { + pub use super::*; + + #[test] + fn test_sizeof() { + assert_eq!(std::mem::size_of::(), 20); + } +}