wip parts

This commit is contained in:
Quentin 2023-07-17 17:14:08 +02:00
parent aa2c741921
commit 16a5897f4e
Signed by: quentin
GPG key ID: E9602264D639FF68
3 changed files with 89 additions and 26 deletions

View file

@ -1,15 +1,16 @@
use nom::{ use nom::{
IResult, IResult,
branch::alt,
bytes::complete::{is_not, tag}, bytes::complete::{is_not, tag},
multi::many0, multi::many0,
sequence::{pair, tuple}, sequence::{pair, preceded, tuple},
combinator::{not, opt, recognize}, combinator::{not, opt, recognize},
}; };
use crate::fragments::mime::{Mechanism, Type}; use crate::fragments::mime::{Mechanism, Type};
use crate::fragments::model::MessageId; use crate::fragments::model::MessageId;
use crate::fragments::misc_token::Unstructured; use crate::fragments::misc_token::Unstructured;
use crate::fragments::whitespace::{CRLF, obs_crlf}; use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
#[derive(Debug, PartialEq, Default)] #[derive(Debug, PartialEq, Default)]
pub struct PartHeader<'a> { pub struct PartHeader<'a> {
@ -33,7 +34,7 @@ pub enum Delimiter {
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> { pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
move |input: &[u8]| { move |input: &[u8]| {
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), obs_crlf))(input)?; let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
match last { match last {
Some(_) => Ok((rest, Delimiter::Last)), Some(_) => Ok((rest, Delimiter::Last)),
None => Ok((rest, Delimiter::Next)), None => Ok((rest, Delimiter::Next)),
@ -41,11 +42,13 @@ pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8]
} }
} }
pub fn part(input: &[u8]) -> IResult<&[u8], (PartNode, Delimiter)> { pub fn part<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
todo!(); move |input: &[u8]| {
// parse headers up to CRLF recognize(many0(pair(
// parse body up to boundary not(boundary(bound)),
// returns (PartNode + Delimiter) alt((is_not(CRLF), obs_crlf)),
)))(input)
}
} }
pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> { pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
@ -57,18 +60,30 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
} }
} }
// Returns Ok even if an error is encountered while parsing
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<PartNode<'a>>> { // the different mimes.
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
move |input: &[u8]| { move |input: &[u8]| {
let (mut input_loop, _) = preamble(bound)(input)?;
todo!(); let mut parts: Vec<&[u8]> = vec![];
loop {
let input2 = match boundary(bound)(input_loop) {
Err(_) => return Ok((input_loop, parts)),
Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
Ok((inp, Delimiter::Next)) => inp,
};
let input3 = match part(bound)(input2) {
Err(_) => return Ok((input2, parts)),
Ok((inp, part)) => {
parts.push(part);
inp
}
};
input_loop = input3;
}
} }
// skip to boundary
// if boundary last stop
// do
// --parse part (return PartNode + Delimiter)
// while boundary not last
} }
#[cfg(test)] #[cfg(test)]
@ -111,4 +126,51 @@ Field: Body
)) ))
); );
} }
#[test]
fn test_part() {
assert_eq!(
part(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
"),
Ok((
&b"\n--simple boundary--\n"[..],
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
))
);
}
#[test]
fn test_multipart() {
assert_eq!(
multipart(b"simple boundary")(b"This is the preamble. It is to be ignored, though it
is a handy place for composition agents to include an
explanatory note to non-MIME conformant readers.
--simple boundary
This is implicitly typed plain US-ASCII text.
It does NOT end with a linebreak.
--simple boundary
Content-type: text/plain; charset=us-ascii
This is explicitly typed plain US-ASCII text.
It DOES end with a linebreak.
--simple boundary--
This is the epilogue. It is also to be ignored.
"),
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
vec![
&b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
]
)),
);
}
} }

View file

@ -5,16 +5,23 @@ use nom::{
character::complete::{crlf, satisfy, space0, space1}, character::complete::{crlf, satisfy, space0, space1},
combinator::{opt, recognize}, combinator::{opt, recognize},
multi::{many0, many1}, multi::{many0, many1},
sequence::{pair, tuple}, sequence::{pair, terminated, tuple},
IResult, IResult,
}; };
use crate::fragments::encoding::encoded_word; use crate::fragments::encoding::encoded_word;
/// Whitespace (space, new line, tab) content and
/// delimited content (eg. comment, line, sections, etc.)
// Bytes CRLF // Bytes CRLF
const CR: u8 = 0x0D; const CR: u8 = 0x0D;
const LF: u8 = 0x0A; const LF: u8 = 0x0A;
pub const CRLF: &[u8] = &[CR, LF]; pub const CRLF: &[u8] = &[CR, LF];
pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(recognize(many0(line)), obs_crlf)(input)
}
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
// is_not(CRLF) is a hack, it means "is not CR or LF" // is_not(CRLF) is a hack, it means "is not CR or LF"
// and not "is not CRLF". In other words, it continues while // and not "is not CRLF". In other words, it continues while

View file

@ -1,12 +1,6 @@
use nom::{
combinator::recognize,
multi::many0,
sequence::terminated,
};
use crate::error::IMFError; use crate::error::IMFError;
use crate::multipass::guess_charset; use crate::multipass::guess_charset;
use crate::fragments::whitespace::{obs_crlf, line}; use crate::fragments::whitespace::headers;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Parsed<'a> { pub struct Parsed<'a> {
@ -15,7 +9,7 @@ pub struct Parsed<'a> {
} }
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> { pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
terminated(recognize(many0(line)), obs_crlf)(buffer) headers(buffer)
.map_err(|e| IMFError::Segment(e)) .map_err(|e| IMFError::Segment(e))
.map(|(body, header)| Parsed { header, body }) .map(|(body, header)| Parsed { header, body })
} }