wip parts

2023-07-17 17:14:08 +02:00 · 2023-07-17 17:14:08 +02:00 · 16a5897f4e
commit 16a5897f4e
parent aa2c741921
3 changed files with 89 additions and 26 deletions
--- a/src/fragments/part.rs
+++ b/src/fragments/part.rs
@ -1,15 +1,16 @@
 use nom::{
    IResult,
    branch::alt,
    bytes::complete::{is_not, tag},
    multi::many0,
-    sequence::{pair, tuple},
+    sequence::{pair, preceded, tuple},
    combinator::{not, opt, recognize},
 };
 use crate::fragments::mime::{Mechanism, Type};
 use crate::fragments::model::MessageId;
 use crate::fragments::misc_token::Unstructured;
-use crate::fragments::whitespace::{CRLF, obs_crlf};
+use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
 #[derive(Debug, PartialEq, Default)]
 pub struct PartHeader<'a> {
@ -33,7 +34,7 @@ pub enum Delimiter {
 pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
    move |input: &[u8]| {
-        let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), obs_crlf))(input)?;
+        let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
        match last {
            Some(_) => Ok((rest, Delimiter::Last)),
            None => Ok((rest, Delimiter::Next)),
@ -41,11 +42,13 @@ pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8]
    }
 }
-pub fn part(input: &[u8]) -> IResult<&[u8], (PartNode, Delimiter)> {
+pub fn part<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
-    todo!();
+    move |input: &[u8]| {
-    // parse headers up to CRLF
+        recognize(many0(pair(
-    // parse body up to boundary
+            not(boundary(bound)),
-    // returns (PartNode + Delimiter)
+            alt((is_not(CRLF), obs_crlf)),
        )))(input)
    }
 }
 pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
@ -57,18 +60,30 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
    }
 }
-
+// Returns Ok even if an error is encountered while parsing
-pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<PartNode<'a>>> {
+// the different mimes.
 pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
    move |input: &[u8]| {
-        
+        let (mut input_loop, _) = preamble(bound)(input)?;
-        todo!();
+        let mut parts: Vec<&[u8]> = vec![];
        loop {
            let input2 = match boundary(bound)(input_loop) {
                Err(_) => return Ok((input_loop, parts)),
                Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
                Ok((inp, Delimiter::Next)) => inp,
            };
            let input3 = match part(bound)(input2) {
                Err(_) => return Ok((input2, parts)),
                Ok((inp, part)) => {
                    parts.push(part);
                    inp
                }
            };
            input_loop = input3;
        }
    }
    // skip to boundary
    // if boundary last stop
    // do
    // --parse part (return PartNode + Delimiter)
    // while boundary not last
 }
 #[cfg(test)]
@ -111,4 +126,51 @@ Field: Body
            ))
        );
    }
    #[test]
    fn test_part() {
        assert_eq!(
            part(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
 This is explicitly typed plain US-ASCII text.
 It DOES end with a linebreak.
 --simple boundary--
 "),
            Ok((
                &b"\n--simple boundary--\n"[..], 
                &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
            ))
        );
    }
    #[test]
    fn test_multipart() {
        assert_eq!(
            multipart(b"simple boundary")(b"This is the preamble.  It is to be ignored, though it
 is a handy place for composition agents to include an
 explanatory note to non-MIME conformant readers.
 --simple boundary
 This is implicitly typed plain US-ASCII text.
 It does NOT end with a linebreak.
 --simple boundary
 Content-type: text/plain; charset=us-ascii
 This is explicitly typed plain US-ASCII text.
 It DOES end with a linebreak.
 --simple boundary--
 This is the epilogue. It is also to be ignored.
 "),
            Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
                vec![
                    &b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
                    &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
                ]
            )),
        );
    }
 }
--- a/src/fragments/whitespace.rs
+++ b/src/fragments/whitespace.rs
@ -5,16 +5,23 @@ use nom::{
    character::complete::{crlf, satisfy, space0, space1},
    combinator::{opt, recognize},
    multi::{many0, many1},
-    sequence::{pair, tuple},
+    sequence::{pair, terminated, tuple},
    IResult,
 };
 use crate::fragments::encoding::encoded_word;
 /// Whitespace (space, new line, tab) content and 
 /// delimited content (eg. comment, line, sections, etc.)
 // Bytes CRLF
 const CR: u8 = 0x0D;
 const LF: u8 = 0x0A;
 pub const CRLF: &[u8] = &[CR, LF];
 pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
    terminated(recognize(many0(line)), obs_crlf)(input)
 }
 pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
    // is_not(CRLF) is a hack, it means "is not CR or LF"
    // and not "is not CRLF". In other words, it continues while
--- a/src/multipass/segment.rs
+++ b/src/multipass/segment.rs
@ -1,12 +1,6 @@
 use nom::{
    combinator::recognize,
    multi::many0,
    sequence::terminated,
 };
 use crate::error::IMFError;
 use crate::multipass::guess_charset;
-use crate::fragments::whitespace::{obs_crlf, line};
+use crate::fragments::whitespace::headers;
 #[derive(Debug, PartialEq)]
 pub struct Parsed<'a> {
@ -15,7 +9,7 @@ pub struct Parsed<'a> {
 }
 pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
-    terminated(recognize(many0(line)), obs_crlf)(buffer)
+    headers(buffer)
        .map_err(|e| IMFError::Segment(e))
        .map(|(body, header)| Parsed { header, body })
 }