diff --git a/src/lib.rs b/src/lib.rs index fa5440a..b133b9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,9 @@ pub mod header; /// Low-level email-specific text-based representation for data pub mod text; +/// Manipulate buffer of bytes +mod pointers; + use nom::{IResult, combinator::into}; /// Parse a whole email including its (MIME) body diff --git a/src/part/composite.rs b/src/part/composite.rs index c12cfa7..12c3dd1 100644 --- a/src/part/composite.rs +++ b/src/part/composite.rs @@ -5,19 +5,28 @@ use crate::imf; use crate::mime; use crate::part::{self, AnyPart, field::MixedField}; use crate::text::boundary::{boundary, Delimiter}; +use crate::pointers; //--- Multipart #[derive(Debug, PartialEq)] pub struct Multipart<'a> { pub mime: mime::MIME<'a, mime::r#type::Multipart>, pub children: Vec>, - pub preamble: &'a [u8], - pub epilogue: &'a [u8], + pub raw_part_inner: &'a [u8], + pub raw_part_outer: &'a [u8], } impl<'a> Multipart<'a> { - pub fn with_epilogue(mut self, e: &'a [u8]) -> Self { - self.epilogue = e; - self + pub fn preamble(&self) -> &'a [u8] { + pointers::parsed(self.raw_part_outer, self.raw_part_inner) + } + pub fn epilogue(&self) -> &'a [u8] { + pointers::rest(self.raw_part_outer, self.raw_part_inner) + } + pub fn preamble_and_body(&self) -> &'a [u8] { + pointers::with_preamble(self.raw_part_outer, self.raw_part_inner) + } + pub fn body_and_epilogue(&self) -> &'a [u8] { + pointers::with_epilogue(self.raw_part_outer, self.raw_part_inner) } } @@ -27,9 +36,15 @@ pub fn multipart<'a>( let m = m.clone(); move |input| { + // init + let outer_orig = input; let bound = m.interpreted_type.boundary.as_bytes(); - let (mut input_loop, preamble) = part::part_raw(bound)(input)?; let mut mparts: Vec = vec![]; + + // skip preamble + let (mut input_loop, _) = part::part_raw(bound)(input)?; + let inner_orig = input_loop; + loop { let input = match boundary(bound)(input_loop) { Err(_) => { @@ -38,8 +53,8 @@ pub fn multipart<'a>( Multipart { mime: m.clone(), children: mparts, - preamble, - epilogue: &[], + raw_part_inner: pointers::parsed(inner_orig, input_loop), + raw_part_outer: pointers::parsed(outer_orig, input_loop), }, )) } @@ -49,8 +64,8 @@ pub fn multipart<'a>( Multipart { mime: m.clone(), children: mparts, - preamble, - epilogue: &[], + raw_part_inner: pointers::parsed(inner_orig, inp), + raw_part_outer: pointers::parsed(outer_orig, &outer_orig[outer_orig.len()..]), }, )) } @@ -73,8 +88,10 @@ pub fn multipart<'a>( let (input, rpart) = part::part_raw(bound)(input)?; // parse mime body - mparts.push(part::to_anypart(mime, rpart)); - + // -- we do not keep the input as we are using the + // part_raw function as our cursor here. + let (_, part) = part::anypart(mime)(rpart)?; + mparts.push(part); input_loop = input; } @@ -88,23 +105,26 @@ pub struct Message<'a> { pub mime: mime::MIME<'a, mime::r#type::DeductibleMessage>, pub imf: imf::Imf<'a>, pub child: Box>, - pub epilogue: &'a [u8], + + pub raw_part: &'a [u8], + pub raw_headers: &'a [u8], + pub raw_body: &'a [u8], } -impl<'a> Message<'a> { - pub fn with_epilogue(mut self, e: &'a [u8]) -> Self { - self.epilogue = e; - self - } -} pub fn message<'a>( m: mime::MIME<'a, mime::r#type::DeductibleMessage>, ) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> { move |input: &[u8]| { + let orig = input; + // parse header fields let (input, (known, unknown, bad)): (_, (Vec::, Vec, Vec<&[u8]>)) = header(part::field::mixed_field)(input)?; + // extract raw parts 1/2 + let raw_headers = pointers::parsed(orig, input); + let body_orig = input; + // aggregate header fields let (naive_mime, imf) = part::field::sections(known); @@ -115,15 +135,19 @@ pub fn message<'a>( let in_mime = naive_mime.to_interpreted::().into(); // parse this mimetype - let part = part::to_anypart(in_mime, input); + let (input, part) = part::anypart(in_mime)(input)?; + + // extract raw parts 2/2 + let raw_body = pointers::parsed(body_orig, input); + let raw_part = pointers::parsed(orig, input); Ok(( - &[], + input, Message { mime: m.clone(), imf, + raw_part, raw_headers, raw_body, child: Box::new(part), - epilogue: &[], }, )) } @@ -149,8 +173,7 @@ mod tests { fields: mime::NaiveMIME::default(), }; - assert_eq!( - multipart(base_mime.clone())(b"This is the preamble. It is to be ignored, though it + let input = b"This is the preamble. It is to be ignored, though it is a handy place for composition agents to include an explanatory note to non-MIME conformant readers. @@ -167,12 +190,29 @@ It DOES end with a linebreak. --simple boundary-- This is the epilogue. It is also to be ignored. -"), +"; + + let inner = b" +--simple boundary + +This is implicitly typed plain US-ASCII text. +It does NOT end with a linebreak. +--simple boundary +Content-type: text/plain; charset=us-ascii + +This is explicitly typed plain US-ASCII text. +It DOES end with a linebreak. + +--simple boundary-- +"; + + assert_eq!( + multipart(base_mime.clone())(input), Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..], Multipart { mime: base_mime, - preamble: &b"This is the preamble. It is to be ignored, though it\nis a handy place for composition agents to include an\nexplanatory note to non-MIME conformant readers.\n"[..], - epilogue: &b""[..], + raw_part_outer: input, + raw_part_inner: inner, children: vec![ AnyPart::Txt(Text { mime: mime::MIME { @@ -259,6 +299,80 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
"# .as_bytes(); + let hdrs = br#"Date: Sat, 8 Jul 2023 07:14:29 +0200 +From: Grrrnd Zero +To: John Doe +CC: =?ISO-8859-1?Q?Andr=E9?= Pirard +Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= +X-Unknown: something something +Bad entry + on multiple lines +Message-ID: +MIME-Version: 1.0 +Content-Type: multipart/alternative; + boundary="b1_e376dc71bafc953c0b0fdeb9983a9956" +Content-Transfer-Encoding: 7bit + +"#; + + let body = br#"This is a multi-part message in MIME format. + +--b1_e376dc71bafc953c0b0fdeb9983a9956 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: quoted-printable + +GZ +OoOoO +oOoOoOoOo +oOoOoOoOoOoOoOoOo +oOoOoOoOoOoOoOoOoOoOoOo +oOoOoOoOoOoOoOoOoOoOoOoOoOoOo +OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO + +--b1_e376dc71bafc953c0b0fdeb9983a9956 +Content-Type: text/html; charset=us-ascii + +
GZ
+OoOoO
+oOoOoOoOo
+oOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
+OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
+
+ +--b1_e376dc71bafc953c0b0fdeb9983a9956-- +"#; + + let inner = br#" +--b1_e376dc71bafc953c0b0fdeb9983a9956 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: quoted-printable + +GZ +OoOoO +oOoOoOoOo +oOoOoOoOoOoOoOoOo +oOoOoOoOoOoOoOoOoOoOoOo +oOoOoOoOoOoOoOoOoOoOoOoOoOoOo +OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO + +--b1_e376dc71bafc953c0b0fdeb9983a9956 +Content-Type: text/html; charset=us-ascii + +
GZ
+OoOoO
+oOoOoOoOo
+oOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOo
+oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
+OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
+
+ +--b1_e376dc71bafc953c0b0fdeb9983a9956-- +"#; + let base_mime = mime::MIME::::default(); assert_eq!( message(base_mime.clone())(fullmail), @@ -266,7 +380,9 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
&[][..], Message { mime: base_mime, - epilogue: &b""[..], + raw_part: fullmail, + raw_headers: hdrs, + raw_body: body, imf: imf::Imf { date: Some(FixedOffset::east_opt(2 * 3600) .unwrap() @@ -361,8 +477,8 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
..mime::NaiveMIME::default() }, }, - preamble: &b"This is a multi-part message in MIME format.\n"[..], - epilogue: &b""[..], + raw_part_inner: inner, + raw_part_outer: body, children: vec![ AnyPart::Txt(Text { mime: mime::MIME { diff --git a/src/part/mod.rs b/src/part/mod.rs index 543cf2d..1d91f91 100644 --- a/src/part/mod.rs +++ b/src/part/mod.rs @@ -59,29 +59,51 @@ impl<'a> AnyPart<'a> { } } } +impl<'a> From> for AnyPart<'a> { + fn from(m: Multipart<'a>) -> Self { + Self::Mult(m) + } +} +impl<'a> From> for AnyPart<'a> { + fn from(m: Message<'a>) -> Self { + Self::Msg(m) + } +} -pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> { - match m { - AnyMIME::Mult(a) => multipart(a)(rpart) - .map(|(rest, multi)| AnyPart::Mult(multi.with_epilogue(rest))) - .unwrap_or(AnyPart::Txt(Text { - mime: mime::MIME::::default(), - body: rpart, - })), - AnyMIME::Msg(a) => message(a)(rpart) - .map(|(rest, msg)| AnyPart::Msg(msg.with_epilogue(rest))) - .unwrap_or(AnyPart::Txt(Text { - mime: mime::MIME::::default(), - body: rpart, - })), - AnyMIME::Txt(a) => AnyPart::Txt(Text { - mime: a, - body: rpart, - }), - AnyMIME::Bin(a) => AnyPart::Bin(Binary { - mime: a, - body: rpart, - }), +/// Parse any type of part +/// +/// ## Note +/// +/// Multiparts are a bit special as they have a clearly delimited beginning +/// and end contrary to all the other parts that are going up to the end of the buffer +pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8], AnyPart<'a>> { + move |input| { + let part = match m { + AnyMIME::Mult(a) => multipart(a)(input) + .map(|(_, multi)| + multi.into()) + .unwrap_or(AnyPart::Txt(Text { + mime: mime::MIME::::default(), + body: input, + })), + AnyMIME::Msg(a) => message(a)(input) + .map(|(_, msg)| msg.into()) + .unwrap_or(AnyPart::Txt(Text { + mime: mime::MIME::::default(), + body: input, + })), + AnyMIME::Txt(a) => AnyPart::Txt(Text { + mime: a, + body: input, + }), + AnyMIME::Bin(a) => AnyPart::Bin(Binary { + mime: a, + body: input, + }), + }; + + // This function always consumes the whole input + Ok((&input[input.len()..], part)) } } diff --git a/src/pointers.rs b/src/pointers.rs new file mode 100644 index 0000000..6001bc7 --- /dev/null +++ b/src/pointers.rs @@ -0,0 +1,55 @@ +pub fn parsed<'a>(input: &'a [u8], rest: &'a [u8]) -> &'a [u8] { + let start = input.as_ptr(); + let offset = rest.as_ptr(); + let idx = (offset as usize - start as usize) / std::mem::size_of::(); + assert!(idx <= input.len()); + &input[..idx] +} + +pub fn rest<'a>(input: &'a [u8], parsed: &'a [u8]) -> &'a [u8] { + let start = input.as_ptr(); + let offset = (&parsed[parsed.len()..]).as_ptr(); + let idx = (offset as usize - start as usize) / std::mem::size_of::(); + assert!(idx <= input.len()); + &input[idx..] +} + +pub fn with_preamble<'a>(input: &'a [u8], parsed: &'a [u8]) -> &'a [u8] { + let start = input.as_ptr(); + let offset = (&parsed[parsed.len()..]).as_ptr(); + let idx = (offset as usize - start as usize) / std::mem::size_of::(); + assert!(idx <= input.len()); + &input[..idx] +} + +pub fn with_epilogue<'a>(input: &'a [u8], rest: &'a [u8]) -> &'a [u8] { + let start = input.as_ptr(); + let offset = rest.as_ptr(); + let idx = (offset as usize - start as usize) / std::mem::size_of::(); + assert!(idx <= input.len()); + &input[idx..] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_all() { + let outer = b"aa bb cc"; + let inner = &outer[3..5]; + assert_eq!(inner, b"bb"); + + let p = parsed(outer, inner); + assert_eq!(p, b"aa "); + + let r = rest(outer, inner); + assert_eq!(r, b" cc"); + + let wp = with_preamble(outer, inner); + assert_eq!(wp, b"aa bb"); + + let we = with_epilogue(outer, inner); + assert_eq!(we, b"bb cc"); + } +}