wip parts
This commit is contained in:
parent
aa2c741921
commit
16a5897f4e
3 changed files with 89 additions and 26 deletions
|
@ -1,15 +1,16 @@
|
||||||
use nom::{
|
use nom::{
|
||||||
IResult,
|
IResult,
|
||||||
|
branch::alt,
|
||||||
bytes::complete::{is_not, tag},
|
bytes::complete::{is_not, tag},
|
||||||
multi::many0,
|
multi::many0,
|
||||||
sequence::{pair, tuple},
|
sequence::{pair, preceded, tuple},
|
||||||
combinator::{not, opt, recognize},
|
combinator::{not, opt, recognize},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::fragments::mime::{Mechanism, Type};
|
use crate::fragments::mime::{Mechanism, Type};
|
||||||
use crate::fragments::model::MessageId;
|
use crate::fragments::model::MessageId;
|
||||||
use crate::fragments::misc_token::Unstructured;
|
use crate::fragments::misc_token::Unstructured;
|
||||||
use crate::fragments::whitespace::{CRLF, obs_crlf};
|
use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub struct PartHeader<'a> {
|
pub struct PartHeader<'a> {
|
||||||
|
@ -33,7 +34,7 @@ pub enum Delimiter {
|
||||||
|
|
||||||
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
|
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
|
||||||
move |input: &[u8]| {
|
move |input: &[u8]| {
|
||||||
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), obs_crlf))(input)?;
|
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
|
||||||
match last {
|
match last {
|
||||||
Some(_) => Ok((rest, Delimiter::Last)),
|
Some(_) => Ok((rest, Delimiter::Last)),
|
||||||
None => Ok((rest, Delimiter::Next)),
|
None => Ok((rest, Delimiter::Next)),
|
||||||
|
@ -41,11 +42,13 @@ pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn part(input: &[u8]) -> IResult<&[u8], (PartNode, Delimiter)> {
|
pub fn part<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||||
todo!();
|
move |input: &[u8]| {
|
||||||
// parse headers up to CRLF
|
recognize(many0(pair(
|
||||||
// parse body up to boundary
|
not(boundary(bound)),
|
||||||
// returns (PartNode + Delimiter)
|
alt((is_not(CRLF), obs_crlf)),
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||||
|
@ -57,18 +60,30 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns Ok even if an error is encountered while parsing
|
||||||
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<PartNode<'a>>> {
|
// the different mimes.
|
||||||
|
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
|
||||||
move |input: &[u8]| {
|
move |input: &[u8]| {
|
||||||
|
let (mut input_loop, _) = preamble(bound)(input)?;
|
||||||
todo!();
|
let mut parts: Vec<&[u8]> = vec![];
|
||||||
|
loop {
|
||||||
|
let input2 = match boundary(bound)(input_loop) {
|
||||||
|
Err(_) => return Ok((input_loop, parts)),
|
||||||
|
Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
|
||||||
|
Ok((inp, Delimiter::Next)) => inp,
|
||||||
|
};
|
||||||
|
|
||||||
|
let input3 = match part(bound)(input2) {
|
||||||
|
Err(_) => return Ok((input2, parts)),
|
||||||
|
Ok((inp, part)) => {
|
||||||
|
parts.push(part);
|
||||||
|
inp
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
input_loop = input3;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// skip to boundary
|
|
||||||
// if boundary last stop
|
|
||||||
// do
|
|
||||||
// --parse part (return PartNode + Delimiter)
|
|
||||||
// while boundary not last
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -111,4 +126,51 @@ Field: Body
|
||||||
))
|
))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_part() {
|
||||||
|
assert_eq!(
|
||||||
|
part(b"simple boundary")(b"Content-type: text/plain; charset=us-ascii
|
||||||
|
|
||||||
|
This is explicitly typed plain US-ASCII text.
|
||||||
|
It DOES end with a linebreak.
|
||||||
|
|
||||||
|
--simple boundary--
|
||||||
|
"),
|
||||||
|
Ok((
|
||||||
|
&b"\n--simple boundary--\n"[..],
|
||||||
|
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multipart() {
|
||||||
|
assert_eq!(
|
||||||
|
multipart(b"simple boundary")(b"This is the preamble. It is to be ignored, though it
|
||||||
|
is a handy place for composition agents to include an
|
||||||
|
explanatory note to non-MIME conformant readers.
|
||||||
|
|
||||||
|
--simple boundary
|
||||||
|
|
||||||
|
This is implicitly typed plain US-ASCII text.
|
||||||
|
It does NOT end with a linebreak.
|
||||||
|
--simple boundary
|
||||||
|
Content-type: text/plain; charset=us-ascii
|
||||||
|
|
||||||
|
This is explicitly typed plain US-ASCII text.
|
||||||
|
It DOES end with a linebreak.
|
||||||
|
|
||||||
|
--simple boundary--
|
||||||
|
|
||||||
|
This is the epilogue. It is also to be ignored.
|
||||||
|
"),
|
||||||
|
Ok((&b"\nThis is the epilogue. It is also to be ignored.\n"[..],
|
||||||
|
vec![
|
||||||
|
&b"\nThis is implicitly typed plain US-ASCII text.\nIt does NOT end with a linebreak."[..],
|
||||||
|
&b"Content-type: text/plain; charset=us-ascii\n\nThis is explicitly typed plain US-ASCII text.\nIt DOES end with a linebreak.\n"[..],
|
||||||
|
]
|
||||||
|
)),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,16 +5,23 @@ use nom::{
|
||||||
character::complete::{crlf, satisfy, space0, space1},
|
character::complete::{crlf, satisfy, space0, space1},
|
||||||
combinator::{opt, recognize},
|
combinator::{opt, recognize},
|
||||||
multi::{many0, many1},
|
multi::{many0, many1},
|
||||||
sequence::{pair, tuple},
|
sequence::{pair, terminated, tuple},
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
use crate::fragments::encoding::encoded_word;
|
use crate::fragments::encoding::encoded_word;
|
||||||
|
|
||||||
|
/// Whitespace (space, new line, tab) content and
|
||||||
|
/// delimited content (eg. comment, line, sections, etc.)
|
||||||
|
|
||||||
// Bytes CRLF
|
// Bytes CRLF
|
||||||
const CR: u8 = 0x0D;
|
const CR: u8 = 0x0D;
|
||||||
const LF: u8 = 0x0A;
|
const LF: u8 = 0x0A;
|
||||||
pub const CRLF: &[u8] = &[CR, LF];
|
pub const CRLF: &[u8] = &[CR, LF];
|
||||||
|
|
||||||
|
pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
terminated(recognize(many0(line)), obs_crlf)(input)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
||||||
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
||||||
// and not "is not CRLF". In other words, it continues while
|
// and not "is not CRLF". In other words, it continues while
|
||||||
|
|
|
@ -1,12 +1,6 @@
|
||||||
use nom::{
|
|
||||||
combinator::recognize,
|
|
||||||
multi::many0,
|
|
||||||
sequence::terminated,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::error::IMFError;
|
use crate::error::IMFError;
|
||||||
use crate::multipass::guess_charset;
|
use crate::multipass::guess_charset;
|
||||||
use crate::fragments::whitespace::{obs_crlf, line};
|
use crate::fragments::whitespace::headers;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Parsed<'a> {
|
pub struct Parsed<'a> {
|
||||||
|
@ -15,7 +9,7 @@ pub struct Parsed<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
|
pub fn new<'a>(buffer: &'a [u8]) -> Result<Parsed<'a>, IMFError<'a>> {
|
||||||
terminated(recognize(many0(line)), obs_crlf)(buffer)
|
headers(buffer)
|
||||||
.map_err(|e| IMFError::Segment(e))
|
.map_err(|e| IMFError::Segment(e))
|
||||||
.map(|(body, header)| Parsed { header, body })
|
.map(|(body, header)| Parsed { header, body })
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue