wip parts
This commit is contained in:
parent
aa2c741921
commit
16a5897f4e
3 changed files with 89 additions and 26 deletions
|
@ -1,15 +1,16 @@
|
|||
use nom::{
|
||||
IResult,
|
||||
branch::alt,
|
||||
bytes::complete::{is_not, tag},
|
||||
multi::many0,
|
||||
sequence::{pair, tuple},
|
||||
sequence::{pair, preceded, tuple},
|
||||
combinator::{not, opt, recognize},
|
||||
};
|
||||
|
||||
use crate::fragments::mime::{Mechanism, Type};
|
||||
use crate::fragments::model::MessageId;
|
||||
use crate::fragments::misc_token::Unstructured;
|
||||
use crate::fragments::whitespace::{CRLF, obs_crlf};
|
||||
use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub struct PartHeader<'a> {
|
||||
|
@ -33,7 +34,7 @@ pub enum Delimiter {
|
|||
|
||||
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
|
||||
move |input: &[u8]| {
|
||||
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), obs_crlf))(input)?;
|
||||
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
|
||||
match last {
|
||||
Some(_) => Ok((rest, Delimiter::Last)),
|
||||
None => Ok((rest, Delimiter::Next)),
|
||||
|
@ -41,11 +42,13 @@ pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8]
|
|||
}
|
||||
}
|
||||
|
||||
pub fn part(input: &[u8]) -> IResult<&[u8], (PartNode, Delimiter)> {
|
||||
todo!();
|
||||
// parse headers up to CRLF
|
||||
// parse body up to boundary
|
||||
// returns (PartNode + Delimiter)
|
||||
pub fn part<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||
move |input: &[u8]| {
|
||||
recognize(many0(pair(
|
||||
not(boundary(bound)),
|
||||
alt((is_not(CRLF), obs_crlf)),
|
||||
)))(input)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||
|
@ -57,18 +60,30 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<PartNode<'a>>> {
|
||||
// Returns Ok even if an error is encountered while parsing
|
||||
// the different mimes.
|
||||
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
|
||||
move |input: &[u8]| {
|
||||
let (mut input_loop, _) = preamble(bound)(input)?;
|
||||
let mut parts: Vec<&[u8]> = vec![];
|
||||
loop {
|
||||
let input2 = match boundary(bound)(input_loop) {
|
||||
Err(_) => return Ok((input_loop, parts)),
|
||||
Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
|
||||
Ok((inp, Delimiter::Next)) => inp,
|
||||
};
|
||||
|
||||
todo!();
|
||||
let input3 = match part(bound)(input2) {
|
||||
Err(_) => return Ok((input2, parts)),
|
||||
Ok((inp, part)) => {
|
||||
parts.push(part);
|
||||
inp
|
||||
}
|
||||
};
|
||||
|
||||
input_loop = input3;
|
||||
}
|
||||
}
|
||||
// skip to boundary
|
||||
// if boundary last stop
|
||||
// do
|
||||
// --parse part (return PartNode + Delimiter)
|
||||
// while boundary not last
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -111,4 +126,51 @@ Field: Body
|
|||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_part() {
|
||||
assert_eq!(
|
||||
part(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
|
||||
|
||||
This is explicitly typed plain US-ASCII text.
|
||||
It DOES end with a linebreak.
|
||||
|
||||
--simple boundary--
|
||||
"),
|
||||
Ok((
|
||||
&b"\n--simple boundary--\n"[..],
|
||||
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multipart() {
|
||||
assert_eq!(
|
||||
multipart(b"simple boundary")(b"This is the preamble. It is to be ignored, though it
|
||||
is a handy place for composition agents to include an
|
||||
explanatory note to non-MIME conformant readers.
|
||||
|
||||
--simple boundary
|
||||
|
||||
This is implicitly typed plain US-ASCII text.
|
||||
It does NOT end with a linebreak.
|
||||
--simple boundary
|
||||
Content-type: text/plain; charset=us-ascii
|
||||
|
||||
This is explicitly typed plain US-ASCII text.
|
||||
It DOES end with a linebreak.
|
||||
|
||||
--simple boundary--
|
||||
|
||||
This is the epilogue. It is also to be ignored.
|
||||
"),
|
||||
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
|
||||
vec![
|
||||
&b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
|
||||
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
|
||||
]
|
||||
)),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,16 +5,23 @@ use nom::{
|
|||
character::complete::{crlf, satisfy, space0, space1},
|
||||
combinator::{opt, recognize},
|
||||
multi::{many0, many1},
|
||||
sequence::{pair, tuple},
|
||||
sequence::{pair, terminated, tuple},
|
||||
IResult,
|
||||
};
|
||||
use crate::fragments::encoding::encoded_word;
|
||||
|
||||
/// Whitespace (space, new line, tab) content and
|
||||
/// delimited content (eg. comment, line, sections, etc.)
|
||||
|
||||
// Bytes CRLF
|
||||
const CR: u8 = 0x0D;
|
||||
const LF: u8 = 0x0A;
|
||||
pub const CRLF: &[u8] = &[CR, LF];
|
||||
|
||||
pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||
terminated(recognize(many0(line)), obs_crlf)(input)
|
||||
}
|
||||
|
||||
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
||||
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
||||
// and not "is not CRLF". In other words, it continues while
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
use nom::{
|
||||
combinator::recognize,
|
||||
multi::many0,
|
||||
sequence::terminated,
|
||||
};
|
||||
|
||||
use crate::error::IMFError;
|
||||
use crate::multipass::guess_charset;
|
||||
use crate::fragments::whitespace::{obs_crlf, line};
|
||||
use crate::fragments::whitespace::headers;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Parsed<'a> {
|
||||
|
@ -15,7 +9,7 @@ pub struct Parsed<'a> {
|
|||
}
|
||||
|
||||
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
|
||||
terminated(recognize(many0(line)), obs_crlf)(buffer)
|
||||
headers(buffer)
|
||||
.map_err(|e| IMFError::Segment(e))
|
||||
.map(|(body, header)| Parsed { header, body })
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue