diff --git a/src/fragments/part.rs b/src/fragments/part.rs index 1493db9..b01f52e 100644 --- a/src/fragments/part.rs +++ b/src/fragments/part.rs @@ -1,15 +1,16 @@ use nom::{ IResult, + branch::alt, bytes::complete::{is_not, tag}, multi::many0, - sequence::{pair, tuple}, + sequence::{pair, preceded, tuple}, combinator::{not, opt, recognize}, }; use crate::fragments::mime::{Mechanism, Type}; use crate::fragments::model::MessageId; use crate::fragments::misc_token::Unstructured; -use crate::fragments::whitespace::{CRLF, obs_crlf}; +use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf}; #[derive(Debug, PartialEq, Default)] pub struct PartHeader<'a> { @@ -33,7 +34,7 @@ pub enum Delimiter { pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> { move |input: &[u8]| { - let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), obs_crlf))(input)?; + let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?; match last { Some(_) => Ok((rest, Delimiter::Last)), None => Ok((rest, Delimiter::Next)), @@ -41,11 +42,13 @@ pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8] } } -pub fn part(input: &[u8]) -> IResult<&[u8], (PartNode, Delimiter)> { - todo!(); - // parse headers up to CRLF - // parse body up to boundary - // returns (PartNode + Delimiter) +pub fn part<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { + move |input: &[u8]| { + recognize(many0(pair( + not(boundary(bound)), + alt((is_not(CRLF), obs_crlf)), + )))(input) + } } pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { @@ -57,18 +60,30 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], & } } - -pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec>> { +// Returns Ok even if an error is encountered while parsing +// the different mimes. +pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> { move |input: &[u8]| { - - todo!(); + let (mut input_loop, _) = preamble(bound)(input)?; + let mut parts: Vec<&[u8]> = vec![]; + loop { + let input2 = match boundary(bound)(input_loop) { + Err(_) => return Ok((input_loop, parts)), + Ok((inp, Delimiter::Last)) => return Ok((inp, parts)), + Ok((inp, Delimiter::Next)) => inp, + }; + let input3 = match part(bound)(input2) { + Err(_) => return Ok((input2, parts)), + Ok((inp, part)) => { + parts.push(part); + inp + } + }; + + input_loop = input3; + } } - // skip to boundary - // if boundary last stop - // do - // --parse part (return PartNode + Delimiter) - // while boundary not last } #[cfg(test)] @@ -111,4 +126,51 @@ Field: Body )) ); } + + #[test] + fn test_part() { + assert_eq!( + part(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii + +This is explicitly typed plain US-ASCII text. +It DOES end with a linebreak. + +--simple boundary-- +"), + Ok(( + &b"\n--simple boundary--\n"[..], + &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..], + )) + ); + } + + #[test] + fn test_multipart() { + assert_eq!( + multipart(b"simple boundary")(b"This is the preamble. It is to be ignored, though it +is a handy place for composition agents to include an +explanatory note to non-MIME conformant readers. + +--simple boundary + +This is implicitly typed plain US-ASCII text. +It does NOT end with a linebreak. +--simple boundary +Content-type: text/plain; charset=us-ascii + +This is explicitly typed plain US-ASCII text. +It DOES end with a linebreak. + +--simple boundary-- + +This is the epilogue. It is also to be ignored. +"), + Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..], + vec![ + &b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..], + &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..], + ] + )), + ); + } } diff --git a/src/fragments/whitespace.rs b/src/fragments/whitespace.rs index 53a8693..717fce9 100644 --- a/src/fragments/whitespace.rs +++ b/src/fragments/whitespace.rs @@ -5,16 +5,23 @@ use nom::{ character::complete::{crlf, satisfy, space0, space1}, combinator::{opt, recognize}, multi::{many0, many1}, - sequence::{pair, tuple}, + sequence::{pair, terminated, tuple}, IResult, }; use crate::fragments::encoding::encoded_word; +/// Whitespace (space, new line, tab) content and +/// delimited content (eg. comment, line, sections, etc.) + // Bytes CRLF const CR: u8 = 0x0D; const LF: u8 = 0x0A; pub const CRLF: &[u8] = &[CR, LF]; +pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> { + terminated(recognize(many0(line)), obs_crlf)(input) +} + pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { // is_not(CRLF) is a hack, it means "is not CR or LF" // and not "is not CRLF". In other words, it continues while diff --git a/src/multipass/segment.rs b/src/multipass/segment.rs index a6d87b8..71f3656 100644 --- a/src/multipass/segment.rs +++ b/src/multipass/segment.rs @@ -1,12 +1,6 @@ -use nom::{ - combinator::recognize, - multi::many0, - sequence::terminated, -}; - use crate::error::IMFError; use crate::multipass::guess_charset; -use crate::fragments::whitespace::{obs_crlf, line}; +use crate::fragments::whitespace::headers; #[derive(Debug, PartialEq)] pub struct Parsed<'a> { @@ -15,7 +9,7 @@ pub struct Parsed<'a> { } pub fn new<'a>(buffer: &'a [u8]) -> Result, IMFError<'a>> { - terminated(recognize(many0(line)), obs_crlf)(buffer) + headers(buffer) .map_err(|e| IMFError::Segment(e)) .map(|(body, header)| Parsed { header, body }) }