collect raw stuff

This commit is contained in:
Quentin 2023-07-25 16:20:36 +02:00
parent 91fa0d38c3
commit 987024430b
Signed by: quentin
GPG key ID: E9602264D639FF68
4 changed files with 248 additions and 52 deletions

View file

@ -15,6 +15,9 @@ pub mod header;
/// Low-level email-specific text-based representation for data
pub mod text;
/// Manipulate buffer of bytes
mod pointers;
use nom::{IResult, combinator::into};
/// Parse a whole email including its (MIME) body

View file

@ -5,19 +5,28 @@ use crate::imf;
use crate::mime;
use crate::part::{self, AnyPart, field::MixedField};
use crate::text::boundary::{boundary, Delimiter};
use crate::pointers;
//--- Multipart
#[derive(Debug, PartialEq)]
pub struct Multipart<'a> {
pub mime: mime::MIME<'a, mime::r#type::Multipart>,
pub children: Vec<AnyPart<'a>>,
pub preamble: &'a [u8],
pub epilogue: &'a [u8],
pub raw_part_inner: &'a [u8],
pub raw_part_outer: &'a [u8],
}
impl<'a> Multipart<'a> {
pub fn with_epilogue(mut self, e: &'a [u8]) -> Self {
self.epilogue = e;
self
pub fn preamble(&self) -> &'a [u8] {
pointers::parsed(self.raw_part_outer, self.raw_part_inner)
}
pub fn epilogue(&self) -> &'a [u8] {
pointers::rest(self.raw_part_outer, self.raw_part_inner)
}
pub fn preamble_and_body(&self) -> &'a [u8] {
pointers::with_preamble(self.raw_part_outer, self.raw_part_inner)
}
pub fn body_and_epilogue(&self) -> &'a [u8] {
pointers::with_epilogue(self.raw_part_outer, self.raw_part_inner)
}
}
@ -27,9 +36,15 @@ pub fn multipart<'a>(
let m = m.clone();
move |input| {
// init
let outer_orig = input;
let bound = m.interpreted_type.boundary.as_bytes();
let (mut input_loop, preamble) = part::part_raw(bound)(input)?;
let mut mparts: Vec<AnyPart> = vec![];
// skip preamble
let (mut input_loop, _) = part::part_raw(bound)(input)?;
let inner_orig = input_loop;
loop {
let input = match boundary(bound)(input_loop) {
Err(_) => {
@ -38,8 +53,8 @@ pub fn multipart<'a>(
Multipart {
mime: m.clone(),
children: mparts,
preamble,
epilogue: &[],
raw_part_inner: pointers::parsed(inner_orig, input_loop),
raw_part_outer: pointers::parsed(outer_orig, input_loop),
},
))
}
@ -49,8 +64,8 @@ pub fn multipart<'a>(
Multipart {
mime: m.clone(),
children: mparts,
preamble,
epilogue: &[],
raw_part_inner: pointers::parsed(inner_orig, inp),
raw_part_outer: pointers::parsed(outer_orig, &outer_orig[outer_orig.len()..]),
},
))
}
@ -73,8 +88,10 @@ pub fn multipart<'a>(
let (input, rpart) = part::part_raw(bound)(input)?;
// parse mime body
mparts.push(part::to_anypart(mime, rpart));
// -- we do not keep the input as we are using the
// part_raw function as our cursor here.
let (_, part) = part::anypart(mime)(rpart)?;
mparts.push(part);
input_loop = input;
}
@ -88,23 +105,26 @@ pub struct Message<'a> {
pub mime: mime::MIME<'a, mime::r#type::DeductibleMessage>,
pub imf: imf::Imf<'a>,
pub child: Box<AnyPart<'a>>,
pub epilogue: &'a [u8],
}
impl<'a> Message<'a> {
pub fn with_epilogue(mut self, e: &'a [u8]) -> Self {
self.epilogue = e;
self
}
pub raw_part: &'a [u8],
pub raw_headers: &'a [u8],
pub raw_body: &'a [u8],
}
pub fn message<'a>(
m: mime::MIME<'a, mime::r#type::DeductibleMessage>,
) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
move |input: &[u8]| {
let orig = input;
// parse header fields
let (input, (known, unknown, bad)): (_, (Vec::<MixedField>, Vec<header::Kv>, Vec<&[u8]>)) =
header(part::field::mixed_field)(input)?;
// extract raw parts 1/2
let raw_headers = pointers::parsed(orig, input);
let body_orig = input;
// aggregate header fields
let (naive_mime, imf) = part::field::sections(known);
@ -115,15 +135,19 @@ pub fn message<'a>(
let in_mime = naive_mime.to_interpreted::<mime::WithGenericDefault>().into();
// parse this mimetype
let part = part::to_anypart(in_mime, input);
let (input, part) = part::anypart(in_mime)(input)?;
// extract raw parts 2/2
let raw_body = pointers::parsed(body_orig, input);
let raw_part = pointers::parsed(orig, input);
Ok((
&[],
input,
Message {
mime: m.clone(),
imf,
raw_part, raw_headers, raw_body,
child: Box::new(part),
epilogue: &[],
},
))
}
@ -149,8 +173,7 @@ mod tests {
fields: mime::NaiveMIME::default(),
};
assert_eq!(
multipart(base_mime.clone())(b"This is the preamble. It is to be ignored, though it
let input = b"This is the preamble. It is to be ignored, though it
is a handy place for composition agents to include an
explanatory note to non-MIME conformant readers.
@ -167,12 +190,29 @@ It DOES end with a linebreak.
--simple boundary--
This is the epilogue. It is also to be ignored.
"),
";
let inner = b"
--simple boundary
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
--simple boundary
Content-type: text/plain; charset=us-ascii
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
";
assert_eq!(
multipart(base_mime.clone())(input),
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
Multipart {
mime: base_mime,
preamble: &b"This is the preamble. It is to be ignored, though it\nis a handy place for composition agents to include an\nexplanatory note to non-MIME conformant readers.\n"[..],
epilogue: &b""[..],
raw_part_outer: input,
raw_part_inner: inner,
children: vec![
AnyPart::Txt(Text {
mime: mime::MIME {
@ -259,6 +299,80 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
"#
.as_bytes();
let hdrs = br#"Date: Sat, 8 Jul 2023 07:14:29 +0200
From: Grrrnd Zero <grrrndzero@example.org>
To: John Doe <jdoe@machine.example>
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
X-Unknown: something something
Bad entry
on multiple lines
Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
MIME-Version: 1.0
Content-Type: multipart/alternative;
boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
Content-Transfer-Encoding: 7bit
"#;
let body = br#"This is a multi-part message in MIME format.
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
GZ
OoOoO
oOoOoOoOo
oOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/html; charset=us-ascii
<div style="text-align: center;"><strong>GZ</strong><br />
OoOoO<br />
oOoOoOoOo<br />
oOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
</div>
--b1_e376dc71bafc953c0b0fdeb9983a9956--
"#;
let inner = br#"
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
GZ
OoOoO
oOoOoOoOo
oOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOo
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
--b1_e376dc71bafc953c0b0fdeb9983a9956
Content-Type: text/html; charset=us-ascii
<div style="text-align: center;"><strong>GZ</strong><br />
OoOoO<br />
oOoOoOoOo<br />
oOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOo<br />
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
</div>
--b1_e376dc71bafc953c0b0fdeb9983a9956--
"#;
let base_mime = mime::MIME::<mime::r#type::DeductibleMessage>::default();
assert_eq!(
message(base_mime.clone())(fullmail),
@ -266,7 +380,9 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
&[][..],
Message {
mime: base_mime,
epilogue: &b""[..],
raw_part: fullmail,
raw_headers: hdrs,
raw_body: body,
imf: imf::Imf {
date: Some(FixedOffset::east_opt(2 * 3600)
.unwrap()
@ -361,8 +477,8 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
..mime::NaiveMIME::default()
},
},
preamble: &b"This is a multi-part message in MIME format.\n"[..],
epilogue: &b""[..],
raw_part_inner: inner,
raw_part_outer: body,
children: vec![
AnyPart::Txt(Text {
mime: mime::MIME {

View file

@ -59,29 +59,51 @@ impl<'a> AnyPart<'a> {
}
}
}
impl<'a> From<Multipart<'a>> for AnyPart<'a> {
fn from(m: Multipart<'a>) -> Self {
Self::Mult(m)
}
}
impl<'a> From<Message<'a>> for AnyPart<'a> {
fn from(m: Message<'a>) -> Self {
Self::Msg(m)
}
}
pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> {
match m {
AnyMIME::Mult(a) => multipart(a)(rpart)
.map(|(rest, multi)| AnyPart::Mult(multi.with_epilogue(rest)))
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: rpart,
})),
AnyMIME::Msg(a) => message(a)(rpart)
.map(|(rest, msg)| AnyPart::Msg(msg.with_epilogue(rest)))
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: rpart,
})),
AnyMIME::Txt(a) => AnyPart::Txt(Text {
mime: a,
body: rpart,
}),
AnyMIME::Bin(a) => AnyPart::Bin(Binary {
mime: a,
body: rpart,
}),
/// Parse any type of part
///
/// ## Note
///
/// Multiparts are a bit special as they have a clearly delimited beginning
/// and end contrary to all the other parts that are going up to the end of the buffer
pub fn anypart<'a>(m: AnyMIME<'a>) -> impl FnOnce(&'a [u8]) -> IResult<&'a [u8], AnyPart<'a>> {
move |input| {
let part = match m {
AnyMIME::Mult(a) => multipart(a)(input)
.map(|(_, multi)|
multi.into())
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: input,
})),
AnyMIME::Msg(a) => message(a)(input)
.map(|(_, msg)| msg.into())
.unwrap_or(AnyPart::Txt(Text {
mime: mime::MIME::<mime::r#type::DeductibleText>::default(),
body: input,
})),
AnyMIME::Txt(a) => AnyPart::Txt(Text {
mime: a,
body: input,
}),
AnyMIME::Bin(a) => AnyPart::Bin(Binary {
mime: a,
body: input,
}),
};
// This function always consumes the whole input
Ok((&input[input.len()..], part))
}
}

55
src/pointers.rs Normal file
View file

@ -0,0 +1,55 @@
pub fn parsed<'a>(input: &'a [u8], rest: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = rest.as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[..idx]
}
pub fn rest<'a>(input: &'a [u8], parsed: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = (&parsed[parsed.len()..]).as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[idx..]
}
pub fn with_preamble<'a>(input: &'a [u8], parsed: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = (&parsed[parsed.len()..]).as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[..idx]
}
pub fn with_epilogue<'a>(input: &'a [u8], rest: &'a [u8]) -> &'a [u8] {
let start = input.as_ptr();
let offset = rest.as_ptr();
let idx = (offset as usize - start as usize) / std::mem::size_of::<u8>();
assert!(idx <= input.len());
&input[idx..]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_all() {
let outer = b"aa bb cc";
let inner = &outer[3..5];
assert_eq!(inner, b"bb");
let p = parsed(outer, inner);
assert_eq!(p, b"aa ");
let r = rest(outer, inner);
assert_eq!(r, b" cc");
let wp = with_preamble(outer, inner);
assert_eq!(wp, b"aa bb");
let we = with_epilogue(outer, inner);
assert_eq!(we, b"bb cc");
}
}