eml-codec/src/part/part.rs

224 lines
6.5 KiB
Rust
Raw Normal View History

2023-07-14 19:12:34 +02:00
use nom::{
IResult,
2023-07-17 17:14:08 +02:00
branch::alt,
2023-07-23 12:24:46 +02:00
bytes::complete::{is_not},
2023-07-17 11:44:55 +02:00
multi::many0,
2023-07-23 12:24:46 +02:00
sequence::{pair},
combinator::{map, not, recognize},
2023-07-14 19:12:34 +02:00
};
2023-07-23 12:24:46 +02:00
use crate::mime;
use crate::mime::mime::{AnyMIME};
2023-07-23 09:46:57 +02:00
use crate::rfc5322::{self as imf};
2023-07-23 12:24:46 +02:00
use crate::text::boundary::{Delimiter, boundary};
use crate::text::whitespace::obs_crlf;
use crate::text::ascii::CRLF;
use crate::header::{header, CompFieldList};
2023-07-23 09:46:57 +02:00
2023-07-23 12:24:46 +02:00
pub struct Multipart<'a>(pub mime::mime::Multipart<'a>, pub Vec<AnyPart<'a>>);
pub struct Message<'a>(pub mime::mime::Message<'a>, pub imf::message::Message<'a>, pub Box<AnyPart<'a>>);
pub struct Text<'a>(pub mime::mime::Text<'a>, pub &'a [u8]);
pub struct Binary<'a>(pub mime::mime::Binary<'a>, pub &'a [u8]);
2023-07-23 09:46:57 +02:00
2023-07-23 12:24:46 +02:00
pub enum AnyPart<'a> {
2023-07-23 09:46:57 +02:00
Mult(Multipart<'a>),
Msg(Message<'a>),
Txt(Text<'a>),
Bin(Binary<'a>),
2023-07-22 20:52:35 +02:00
}
2023-07-23 09:46:57 +02:00
pub enum MixedField<'a> {
2023-07-23 12:24:46 +02:00
MIME(mime::field::Content<'a>),
IMF(imf::field::Field<'a>),
2023-07-23 09:46:57 +02:00
}
impl<'a> MixedField<'a> {
2023-07-23 12:24:46 +02:00
pub fn mime(&self) -> Option<&mime::field::Content<'a>> {
2023-07-23 09:46:57 +02:00
match self {
2023-07-23 12:24:46 +02:00
Self::MIME(v) => Some(v),
2023-07-23 09:46:57 +02:00
_ => None,
}
}
2023-07-23 12:24:46 +02:00
pub fn to_mime(self) -> Option<mime::field::Content<'a>> {
2023-07-23 09:46:57 +02:00
match self {
2023-07-23 12:24:46 +02:00
Self::MIME(v) => Some(v),
_ => None,
}
}
pub fn imf(&self) -> Option<&imf::field::Field<'a>> {
match self {
Self::IMF(v) => Some(v),
_ => None,
}
}
pub fn to_imf(self) -> Option<imf::field::Field<'a>> {
match self {
Self::IMF(v) => Some(v),
2023-07-23 09:46:57 +02:00
_ => None,
}
}
}
2023-07-23 12:24:46 +02:00
impl<'a> CompFieldList<'a, MixedField<'a>> {
2023-07-23 09:46:57 +02:00
pub fn sections(self) -> (mime::mime::AnyMIME<'a>, imf::message::Message<'a>) {
let k = self.known();
2023-07-23 12:24:46 +02:00
let (v1, v2): (Vec<MixedField>, Vec<MixedField>) = k.into_iter().partition(|v| v.mime().is_some());
let mime = v1.into_iter().map(|v| v.to_mime()).flatten().collect::<mime::mime::AnyMIME>();
let imf = v2.into_iter().map(|v| v.to_imf()).flatten().collect::<imf::message::Message>();
2023-07-23 09:46:57 +02:00
(mime, imf)
}
}
pub fn mixed_field(input: &[u8]) -> IResult<&[u8], MixedField> {
alt((
2023-07-23 12:24:46 +02:00
map(mime::field::content, MixedField::MIME),
map(imf::field::field, MixedField::IMF),
2023-07-23 09:46:57 +02:00
))(input)
2023-07-22 14:38:43 +02:00
}
2023-07-23 12:24:46 +02:00
pub fn message<'a>(m: mime::mime::Message<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Message<'a>> {
2023-07-23 09:46:57 +02:00
move |input: &[u8]| {
let (input, fields) = header(mixed_field)(input)?;
let (in_mime, imf) = fields.sections();
let part = to_anypart(in_mime, input);
2023-07-23 12:24:46 +02:00
Ok((&[], Message(m.clone(), imf, Box::new(part))))
2023-07-23 09:46:57 +02:00
}
2023-07-22 14:38:43 +02:00
}
2023-07-14 19:12:34 +02:00
2023-07-23 12:24:46 +02:00
pub fn multipart<'a>(m: mime::mime::Multipart<'a>) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Multipart<'a>> {
let m = m.clone();
move |input| {
let bound = m.0.boundary.as_bytes();
let (mut input_loop, _) = part_raw(bound)(input)?;
2023-07-23 09:46:57 +02:00
let mut mparts: Vec<AnyPart> = vec![];
2023-07-22 16:46:36 +02:00
loop {
2023-07-23 12:24:46 +02:00
let input = match boundary(bound)(input_loop) {
Err(_) => return Ok((input_loop, Multipart(m.clone(), mparts))),
Ok((inp, Delimiter::Last)) => return Ok((inp, Multipart(m.clone(), mparts))),
2023-07-22 16:46:36 +02:00
Ok((inp, Delimiter::Next)) => inp,
};
2023-07-18 15:00:38 +02:00
2023-07-22 16:46:36 +02:00
// parse mime headers
2023-07-23 12:24:46 +02:00
let (input, fields) = header(mime::field::content)(input)?;
2023-07-22 17:40:32 +02:00
let mime = fields.to_mime();
2023-07-22 20:52:35 +02:00
2023-07-23 09:46:57 +02:00
// parse raw part
2023-07-23 12:24:46 +02:00
let (input, rpart) = part_raw(bound)(input)?;
2023-07-23 09:46:57 +02:00
2023-07-22 20:52:35 +02:00
// parse mime body
2023-07-23 12:24:46 +02:00
mparts.push(to_anypart(mime, rpart));
2023-07-14 19:12:34 +02:00
2023-07-22 16:46:36 +02:00
input_loop = input;
}
}
2023-07-18 15:00:38 +02:00
}
2023-07-23 12:24:46 +02:00
pub fn to_anypart<'a>(m: AnyMIME<'a>, rpart: &'a [u8]) -> AnyPart<'a> {
match m {
2023-07-23 09:46:57 +02:00
AnyMIME::Mult(a) => map(multipart(a), AnyPart::Mult)(rpart)
2023-07-23 12:24:46 +02:00
.map(|v| v.1)
.unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))),
2023-07-23 09:46:57 +02:00
AnyMIME::Msg(a) => map(message(a), AnyPart::Msg)(rpart)
2023-07-23 12:24:46 +02:00
.map(|v| v.1)
.unwrap_or(AnyPart::Txt(Text(mime::mime::Text::default(), rpart))),
2023-07-23 09:46:57 +02:00
AnyMIME::Txt(a) => AnyPart::Txt(Text(a, rpart)),
AnyMIME::Bin(a) => AnyPart::Bin(Binary(a, rpart)),
}
2023-07-14 19:12:34 +02:00
}
2023-07-23 09:46:57 +02:00
2023-07-23 12:24:46 +02:00
pub fn part_raw<'a>(bound: &[u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> + '_ {
move |input| {
2023-07-17 17:14:08 +02:00
recognize(many0(pair(
not(boundary(bound)),
alt((is_not(CRLF), obs_crlf)),
)))(input)
}
2023-07-17 11:44:55 +02:00
}
2023-07-23 09:46:57 +02:00
/*
2023-07-17 11:44:55 +02:00
pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
move |input: &[u8]| {
recognize(many0(tuple((
is_not(CRLF),
many0(pair(not(boundary(bound)), obs_crlf)),
))))(input)
}
2023-07-23 09:46:57 +02:00
}*/
2023-07-17 11:44:55 +02:00
2023-07-18 15:00:38 +02:00
// FIXME parse email here
2023-07-14 19:12:34 +02:00
#[cfg(test)]
mod tests {
use super::*;
2023-07-17 11:44:55 +02:00
#[test]
fn test_preamble() {
assert_eq!(
2023-07-23 12:38:39 +02:00
part_raw(b"hello")(b"blip
2023-07-17 11:44:55 +02:00
bloup
blip
bloup--
--bim
--bim--
--hello
Field: Body
"),
Ok((
&b"\n--hello\nField: Body\n"[..],
&b"blip\nbloup\n\nblip\nbloup--\n--bim\n--bim--\n"[..],
))
);
}
2023-07-17 17:14:08 +02:00
#[test]
2023-07-23 09:46:57 +02:00
fn test_part_raw() {
2023-07-17 17:14:08 +02:00
assert_eq!(
2023-07-23 12:38:39 +02:00
part_raw(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
2023-07-17 17:14:08 +02:00
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
"),
Ok((
&b"\n--simple boundary--\n"[..],
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
))
);
}
2023-07-23 12:38:39 +02:00
/*
2023-07-17 17:14:08 +02:00
#[test]
fn test_multipart() {
assert_eq!(
multipart(b"simple boundary")(b"This is the preamble. It is to be ignored, though it
is a handy place for composition agents to include an
explanatory note to non-MIME conformant readers.
--simple boundary
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
--simple boundary
Content-type: text/plain; charset=us-ascii
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
This is the epilogue. It is also to be ignored.
"),
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
vec![
&b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
]
)),
);
}
2023-07-23 12:38:39 +02:00
*/
2023-07-14 19:12:34 +02:00
}