wip parts

2023-07-17 17:14:08 +02:00 · 2023-07-17 17:14:08 +02:00 · 16a5897f4e
commit 16a5897f4e
parent aa2c741921
3 changed files with 89 additions and 26 deletions
--- a/src/fragments/part.rs
+++ b/src/fragments/part.rs
@ -1,15 +1,16 @@
 use nom::{
    IResult,
+    branch::alt,
    bytes::complete::{is_not, tag},
    multi::many0,
-    sequence::{pair, tuple},
+    sequence::{pair, preceded, tuple},
    combinator::{not, opt, recognize},
 };

 use crate::fragments::mime::{Mechanism, Type};
 use crate::fragments::model::MessageId;
 use crate::fragments::misc_token::Unstructured;
-use crate::fragments::whitespace::{CRLF, obs_crlf};
+use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};

 #[derive(Debug, PartialEq, Default)]
 pub struct PartHeader<'a> {
@ -33,7 +34,7 @@ pub enum Delimiter {

 pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
    move |input: &[u8]| {
-        let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), obs_crlf))(input)?;
+        let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
        match last {
            Some(_) => Ok((rest, Delimiter::Last)),
            None => Ok((rest, Delimiter::Next)),
@ -41,11 +42,13 @@ pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8]
    }
 }

-pub fn part(input: &[u8]) -> IResult<&[u8], (PartNode, Delimiter)> {
-    todo!();
-    // parse headers up to CRLF
-    // parse body up to boundary
-    // returns (PartNode + Delimiter)
+pub fn part<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
+    move |input: &[u8]| {
+        recognize(many0(pair(
+            not(boundary(bound)),
+            alt((is_not(CRLF), obs_crlf)),
+        )))(input)
+    }
 }

 pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
@ -57,18 +60,30 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
    }
 }

-
-pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<PartNode<'a>>> {
+// Returns Ok even if an error is encountered while parsing
+// the different mimes.
+pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
    move |input: &[u8]| {
+        let (mut input_loop, _) = preamble(bound)(input)?;
+        let mut parts: Vec<&[u8]> = vec![];
+        loop {
+            let input2 = match boundary(bound)(input_loop) {
+                Err(_) => return Ok((input_loop, parts)),
+                Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
+                Ok((inp, Delimiter::Next)) => inp,
+            };

-        todo!();
-
+            let input3 = match part(bound)(input2) {
+                Err(_) => return Ok((input2, parts)),
+                Ok((inp, part)) => {
+                    parts.push(part);
+                    inp
+                }
+            };
+
+            input_loop = input3;
+        }
    }
-    // skip to boundary
-    // if boundary last stop
-    // do
-    // --parse part (return PartNode + Delimiter)
-    // while boundary not last
 }

 #[cfg(test)]
@ -111,4 +126,51 @@ Field: Body
            ))
        );
    }
+
+    #[test]
+    fn test_part() {
+        assert_eq!(
+            part(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
+
+This is explicitly typed plain US-ASCII text.
+It DOES end with a linebreak.
+
+--simple boundary--
+"),
+            Ok((
+                &b"\n--simple boundary--\n"[..], 
+                &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
+            ))
+        );
+    }
+
+    #[test]
+    fn test_multipart() {
+        assert_eq!(
+            multipart(b"simple boundary")(b"This is the preamble.  It is to be ignored, though it
+is a handy place for composition agents to include an
+explanatory note to non-MIME conformant readers.
+
+--simple boundary
+
+This is implicitly typed plain US-ASCII text.
+It does NOT end with a linebreak.
+--simple boundary
+Content-type: text/plain; charset=us-ascii
+
+This is explicitly typed plain US-ASCII text.
+It DOES end with a linebreak.
+
+--simple boundary--
+
+This is the epilogue. It is also to be ignored.
+"),
+            Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
+                vec![
+                    &b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
+                    &b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
+                ]
+            )),
+        );
+    }
 }
--- a/src/fragments/whitespace.rs
+++ b/src/fragments/whitespace.rs
@ -5,16 +5,23 @@ use nom::{
    character::complete::{crlf, satisfy, space0, space1},
    combinator::{opt, recognize},
    multi::{many0, many1},
-    sequence::{pair, tuple},
+    sequence::{pair, terminated, tuple},
    IResult,
 };
 use crate::fragments::encoding::encoded_word;

+/// Whitespace (space, new line, tab) content and 
+/// delimited content (eg. comment, line, sections, etc.)
+
 // Bytes CRLF
 const CR: u8 = 0x0D;
 const LF: u8 = 0x0A;
 pub const CRLF: &[u8] = &[CR, LF];

+pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    terminated(recognize(many0(line)), obs_crlf)(input)
+}
+
 pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
    // is_not(CRLF) is a hack, it means "is not CR or LF"
    // and not "is not CRLF". In other words, it continues while
--- a/src/multipass/segment.rs
+++ b/src/multipass/segment.rs
@ -1,12 +1,6 @@
-use nom::{
-    combinator::recognize,
-    multi::many0,
-    sequence::terminated,
-};
-
 use crate::error::IMFError;
 use crate::multipass::guess_charset;
-use crate::fragments::whitespace::{obs_crlf, line};
+use crate::fragments::whitespace::headers;

 #[derive(Debug, PartialEq)]
 pub struct Parsed<'a> {
@ -15,7 +9,7 @@ pub struct Parsed<'a> {
 }

 pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
-    terminated(recognize(many0(line)), obs_crlf)(buffer)
+    headers(buffer)
        .map_err(|e| IMFError::Segment(e))
        .map(|(body, header)| Parsed { header, body })
 }