wip refacto broken

This commit is contained in:
Quentin 2023-07-18 15:00:38 +02:00
parent 16a5897f4e
commit 23c663b943
Signed by: quentin
GPG key ID: E9602264D639FF68
10 changed files with 182 additions and 90 deletions

View file

@ -1,5 +1,5 @@
use crate::error::IMFError; use crate::error::IMFError;
use crate::fragments::lazy::Field as Lazy; use crate::fragments::lazy::{Field as Lazy, MIMEField as LazyMIME};
use crate::fragments::mime::{Mechanism, Type, Version}; use crate::fragments::mime::{Mechanism, Type, Version};
use crate::fragments::misc_token::{PhraseList, Unstructured}; use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList}; use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList};
@ -38,10 +38,7 @@ pub enum Field<'a> {
// MIME RFC2045 // MIME RFC2045
MIMEVersion(Version), MIMEVersion(Version),
ContentType(Type<'a>), MIME(MIMEField<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(MessageId<'a>),
ContentDescription(Unstructured),
// 3.6.8. Optional Fields // 3.6.8. Optional Fields
Optional(&'a str, Unstructured), Optional(&'a str, Unstructured),
@ -49,12 +46,22 @@ pub enum Field<'a> {
// None // None
Rescue(&'a str), Rescue(&'a str),
} }
use Field::*;
#[derive(Debug, PartialEq)]
pub enum MIMEField<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(MessageId<'a>),
ContentDescription(Unstructured),
Optional(&'a str, Unstructured),
Rescue(&'a str),
}
impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> { impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
type Error = IMFError<'a>; type Error = IMFError<'a>;
fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> { fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> {
use Field::*;
match l { match l {
Lazy::Date(v) => v.try_into().map(|v| Date(v)), Lazy::Date(v) => v.try_into().map(|v| Date(v)),
Lazy::From(v) => v.try_into().map(|v| From(v)), Lazy::From(v) => v.try_into().map(|v| From(v)),
@ -72,12 +79,25 @@ impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
Lazy::Received(v) => v.try_into().map(|v| Received(v)), Lazy::Received(v) => v.try_into().map(|v| Received(v)),
Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)), Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)),
Lazy::MIMEVersion(v) => v.try_into().map(|v| MIMEVersion(v)), Lazy::MIMEVersion(v) => v.try_into().map(|v| MIMEVersion(v)),
Lazy::ContentType(v) => v.try_into().map(|v| ContentType(v)), Lazy::MIME(v) => v.try_into().map(|v| MIME(v)),
Lazy::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
Lazy::ContentID(v) => v.try_into().map(|v| ContentID(v)),
Lazy::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)), Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
Lazy::Rescue(v) => Ok(Rescue(v)), Lazy::Rescue(v) => Ok(Rescue(v)),
} }
} }
} }
impl<'a> TryFrom<&'a LazyMIME<'a>> for MIMEField<'a> {
type Error = IMFError<'a>;
fn try_from(l: &'a LazyMIME<'a>) -> Result<Self, Self::Error> {
use MIMEField::*;
match l {
LazyMIME::ContentType(v) => v.try_into().map(|v| ContentType(v)),
LazyMIME::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
LazyMIME::ContentID(v) => v.try_into().map(|v| ContentID(v)),
LazyMIME::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
LazyMIME::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
LazyMIME::Rescue(v) => Ok(Rescue(v)),
}
}
}

View file

@ -1,4 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use nom::{ use nom::{
IResult, IResult,
branch::alt, branch::alt,
@ -13,6 +16,21 @@ use base64::{Engine as _, engine::general_purpose};
use crate::fragments::mime; use crate::fragments::mime;
const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
pub fn header_decode(input: &[u8]) -> Cow<str> {
// Create detector
let mut detector = EncodingDetector::new();
detector.feed(input, IS_LAST_BUFFER);
// Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, _, _) = enc.decode(input);
header
}
pub fn encoded_word(input: &str) -> IResult<&str, String> { pub fn encoded_word(input: &str) -> IResult<&str, String> {
alt((encoded_word_quoted, encoded_word_base64))(input) alt((encoded_word_quoted, encoded_word_base64))(input)
} }

View file

@ -81,10 +81,7 @@ pub enum Field<'a> {
// MIME RFC 2045 // MIME RFC 2045
MIMEVersion(Version<'a>), MIMEVersion(Version<'a>),
ContentType(Type<'a>), MIME(MIMEField<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(Identifier<'a>),
ContentDescription(Unstructured<'a>),
// 3.6.8. Optional Fields // 3.6.8. Optional Fields
Optional(&'a str, Unstructured<'a>), Optional(&'a str, Unstructured<'a>),
@ -92,16 +89,35 @@ pub enum Field<'a> {
// None // None
Rescue(&'a str), Rescue(&'a str),
} }
use Field::*;
impl<'a> From<&'a str> for Field<'a> { impl<'a> From<&'a str> for Field<'a> {
fn from(input: &'a str) -> Self { fn from(input: &'a str) -> Self {
match correct_field(input) { match correct_field(input) {
Ok((_, field)) => field, Ok((_, field)) => field,
Err(_) => Rescue(input), Err(_) => Field::Rescue(input),
} }
} }
} }
#[derive(Debug, PartialEq)]
pub enum MIMEField<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(Identifier<'a>),
ContentDescription(Unstructured<'a>),
Optional(&'a str, Unstructured<'a>),
Rescue(&'a str),
}
impl<'a> From<&'a str> for MIMEField<'a> {
fn from(input: &'a str) -> Self {
match correct_mime_field(input) {
Ok((_, field)) => field,
Err(_) => MIMEField::Rescue(input),
}
}
}
/// Optional field /// Optional field
/// ///
/// ```abnf /// ```abnf
@ -119,6 +135,7 @@ fn field_name(input: &str) -> IResult<&str, &str> {
} }
fn correct_field(input: &str) -> IResult<&str, Field> { fn correct_field(input: &str) -> IResult<&str, Field> {
use Field::*;
field_name(input).map(|(rest, name)| { field_name(input).map(|(rest, name)| {
( (
"", "",
@ -144,14 +161,30 @@ fn correct_field(input: &str) -> IResult<&str, Field> {
"return-path" => ReturnPath(Mailbox(rest)), "return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)), "received" => Received(ReceivedLog(rest)),
"mime-version" => MIMEVersion(Version(rest)), "content-type" => MIME(MIMEField::ContentType(Type(rest))),
"content-type" => ContentType(Type(rest)), "content-transfer-encoding" => MIME(MIMEField::ContentTransferEncoding(Mechanism(rest))),
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)), "content-id" => MIME(MIMEField::ContentID(Identifier(rest))),
"content-id" => ContentID(Identifier(rest)), "content-description" => MIME(MIMEField::ContentDescription(Unstructured(rest))),
"content-description" => ContentDescription(Unstructured(rest)),
"mime-version" => MIMEVersion(Version(rest)),
_ => Optional(name, Unstructured(rest)), _ => Optional(name, Unstructured(rest)),
}, },
) )
}) })
} }
fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> {
use MIMEField::*;
field_name(input).map(|(rest, name)| {
(
"",
match name.to_lowercase().as_ref() {
"content-type" => ContentType(Type(rest)),
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
"content-id" => ContentID(Identifier(rest)),
"content-description" => ContentDescription(Unstructured(rest)),
_ => Optional(name, Unstructured(rest)),
}
)
})
}

View file

@ -11,19 +11,21 @@ use crate::fragments::mime::{Mechanism, Type};
use crate::fragments::model::MessageId; use crate::fragments::model::MessageId;
use crate::fragments::misc_token::Unstructured; use crate::fragments::misc_token::Unstructured;
use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf}; use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
use crate::fragments::{eager,lazy};
use crate::fragments::section::MIMESection;
#[derive(Debug, PartialEq, Default)]
pub struct PartHeader<'a> {
pub content_type: Option<&'a Type<'a>>, #[derive(Debug, PartialEq)]
pub content_transfer_encoding: Option<&'a Mechanism<'a>>, pub enum PartNodeLazy<'a>{
pub content_id: Option<&'a MessageId<'a>>, Discrete(MIMESection<'a>, &'a [u8]),
pub content_description: Option<&'a Unstructured>, Composite(MIMESection<'a>, &'a [u8]),
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum PartNode<'a> { pub enum PartNode<'a> {
Discrete(PartHeader<'a>, &'a [u8]), Discrete(MIMESection<'a>, &'a [u8]),
Composite(PartHeader<'a>, Vec<PartNode<'a>>), Composite(MIMESection<'a>, Vec<PartNode<'a>>),
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -32,6 +34,14 @@ pub enum Delimiter {
Last Last
} }
const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
fn part_node_lazy(input: &[u8]) -> IResult<&[u8], PartNodeLazy> {
//let mime = header.iter().map(|e| eager::MIMEField::from(lazy::MIMEField::from(e)));
todo!();
}
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> { pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
move |input: &[u8]| { move |input: &[u8]| {
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?; let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
@ -60,6 +70,9 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
} }
} }
// FIXME parse email here
// Returns Ok even if an error is encountered while parsing // Returns Ok even if an error is encountered while parsing
// the different mimes. // the different mimes.
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> { pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
@ -67,21 +80,21 @@ pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8],
let (mut input_loop, _) = preamble(bound)(input)?; let (mut input_loop, _) = preamble(bound)(input)?;
let mut parts: Vec<&[u8]> = vec![]; let mut parts: Vec<&[u8]> = vec![];
loop { loop {
let input2 = match boundary(bound)(input_loop) { let input = match boundary(bound)(input_loop) {
Err(_) => return Ok((input_loop, parts)), Err(_) => return Ok((input_loop, parts)),
Ok((inp, Delimiter::Last)) => return Ok((inp, parts)), Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
Ok((inp, Delimiter::Next)) => inp, Ok((inp, Delimiter::Next)) => inp,
}; };
let input3 = match part(bound)(input2) { let input = match part(bound)(input) {
Err(_) => return Ok((input2, parts)), Err(_) => return Ok((input, parts)),
Ok((inp, part)) => { Ok((inp, part)) => {
parts.push(part); parts.push(part);
inp inp
} }
}; };
input_loop = input3; input_loop = input;
} }
} }
} }

View file

@ -1,6 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::fragments::eager::Field; use crate::fragments::eager::{Field, MIMEField};
use crate::fragments::lazy; use crate::fragments::lazy;
use crate::fragments::misc_token::{PhraseList, Unstructured}; use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::mime::{Version,Type,Mechanism}; use crate::fragments::mime::{Version,Type,Mechanism};
@ -43,13 +43,20 @@ pub struct Section<'a> {
// MIME // MIME
pub mime_version: Option<&'a Version>, pub mime_version: Option<&'a Version>,
pub mime: MIMESection<'a>,
// Recovery
pub bad_fields: Vec<&'a lazy::Field<'a>>,
pub unparsed: Vec<&'a str>,
}
#[derive(Debug, PartialEq, Default)]
pub struct MIMESection<'a> {
pub content_type: Option<&'a Type<'a>>, pub content_type: Option<&'a Type<'a>>,
pub content_transfer_encoding: Option<&'a Mechanism<'a>>, pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
pub content_id: Option<&'a MessageId<'a>>, pub content_id: Option<&'a MessageId<'a>>,
pub content_description: Option<&'a Unstructured>, pub content_description: Option<&'a Unstructured>,
pub optional: HashMap<&'a str, &'a Unstructured>,
// Recovery
pub bad_fields: Vec<&'a lazy::Field<'a>>,
pub unparsed: Vec<&'a str>, pub unparsed: Vec<&'a str>,
} }
@ -80,12 +87,36 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
} }
Field::Rescue(v) => section.unparsed.push(v), Field::Rescue(v) => section.unparsed.push(v),
Field::MIMEVersion(v) => section.mime_version = Some(v), Field::MIMEVersion(v) => section.mime_version = Some(v),
Field::ContentType(v) => section.content_type = Some(v), Field::MIME(v) => match v {
Field::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v), MIMEField::ContentType(v) => section.mime.content_type = Some(v),
Field::ContentID(v) => section.content_id = Some(v), MIMEField::ContentTransferEncoding(v) => section.mime.content_transfer_encoding = Some(v),
Field::ContentDescription(v) => section.content_description = Some(v), MIMEField::ContentID(v) => section.mime.content_id = Some(v),
MIMEField::ContentDescription(v) => section.mime.content_description = Some(v),
MIMEField::Optional(k, v) => {
section.mime.optional.insert(k, v);
}
MIMEField::Rescue(v) => section.mime.unparsed.push(v),
},
} }
} }
section section
} }
} }
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
let mut section = MIMESection::default();
for field in iter {
match field {
MIMEField::ContentType(v) => section.content_type = Some(v),
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
MIMEField::ContentID(v) => section.content_id = Some(v),
MIMEField::ContentDescription(v) => section.content_description = Some(v),
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
MIMEField::Rescue(v) => section.unparsed.push(v),
};
}
section
}
}

View file

@ -22,6 +22,10 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(recognize(many0(line)), obs_crlf)(input) terminated(recognize(many0(line)), obs_crlf)(input)
} }
pub fn fields(input: &str) -> IResult<&str, Vec<&str>> {
all_consuming(many0(foldable_line))(input)
}
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
// is_not(CRLF) is a hack, it means "is not CR or LF" // is_not(CRLF) is a hack, it means "is not CR or LF"
// and not "is not CRLF". In other words, it continues while // and not "is not CRLF". In other words, it continues while

View file

@ -8,7 +8,7 @@ use nom::{
}; };
use crate::error::IMFError; use crate::error::IMFError;
use crate::fragments::whitespace; use crate::fragments::fields;
use crate::multipass::field_lazy; use crate::multipass::field_lazy;
use crate::multipass::guess_charset; use crate::multipass::guess_charset;
@ -19,7 +19,7 @@ pub struct Parsed<'a> {
} }
pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> { pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> {
all_consuming(many0(foldable_line))(&gcha.header) fields(&gcha.header)
.map_err(|e| IMFError::ExtractFields(e)) .map_err(|e| IMFError::ExtractFields(e))
.map(|(_, fields)| Parsed { .map(|(_, fields)| Parsed {
fields, fields,
@ -33,20 +33,6 @@ impl<'a> Parsed<'a> {
} }
} }
/// ```abnf
/// fold_line = any *(1*(crlf WS) any) crlf
/// ```
fn foldable_line(input: &str) -> IResult<&str, &str> {
recognize(tuple((
is_not("\r\n"),
many0(pair(
many1(pair(whitespace::perm_crlf, space1)),
is_not("\r\n"),
)),
whitespace::perm_crlf,
)))(input)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -63,10 +63,10 @@ mod tests {
Parsed { Parsed {
fields: vec![ fields: vec![
lazy::Field::MIMEVersion(lazy::Version("1.0 \r\n")), lazy::Field::MIMEVersion(lazy::Version("1.0 \r\n")),
lazy::Field::ContentType(lazy::Type("multipart/alternative; boundary=\"bound\"\r\n")), lazy::Field::MIME(lazy::MIMEField::ContentType(lazy::Type("multipart/alternative; boundary=\"bound\"\r\n"))),
lazy::Field::ContentTransferEncoding(lazy::Mechanism("7bit\r\n")), lazy::Field::MIME(lazy::MIMEField::ContentTransferEncoding(lazy::Mechanism("7bit\r\n"))),
lazy::Field::ContentID(lazy::Identifier("<foo4*foo1@bar.net>\r\n")), lazy::Field::MIME(lazy::MIMEField::ContentID(lazy::Identifier("<foo4*foo1@bar.net>\r\n"))),
lazy::Field::ContentDescription(lazy::Unstructured("hello world\r\n")), lazy::Field::MIME(lazy::MIMEField::ContentDescription(lazy::Unstructured("hello world\r\n"))),
], ],
body: b"Hello world!", body: b"Hello world!",
} }

View file

@ -1,35 +1,19 @@
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use std::borrow::Cow; use std::borrow::Cow;
use crate::error::IMFError; use crate::error::IMFError;
use crate::fragments::encoding;
use crate::multipass::extract_fields; use crate::multipass::extract_fields;
use crate::multipass::segment; use crate::multipass::segment;
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct Parsed<'a> { pub struct Parsed<'a> {
pub header: Cow<'a, str>, pub header: Cow<'a, str>,
pub encoding: &'static Encoding,
pub malformed: bool,
pub body: &'a [u8], pub body: &'a [u8],
} }
const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> { pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> {
// Create detector
let mut detector = EncodingDetector::new();
detector.feed(&seg.header, IS_LAST_BUFFER);
// Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, encoding, malformed) = enc.decode(&seg.header);
Parsed { Parsed {
header, header: encoding::header_decode(&seg.header),
encoding,
malformed,
body: seg.body, body: seg.body,
} }
} }

View file

@ -230,6 +230,7 @@ for all folk to come=
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())), subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
mime_version: Some(&mime::Version{ major: 1, minor: 0 }), mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
mime: section::MIMESection {
content_type: Some(&mime::Type::Text(mime::TextDesc { content_type: Some(&mime::Type::Text(mime::TextDesc {
charset: Some(mime::EmailCharset::ISO_8859_1), charset: Some(mime::EmailCharset::ISO_8859_1),
subtype: mime::TextSubtype::Plain, subtype: mime::TextSubtype::Plain,
@ -241,6 +242,8 @@ for all folk to come=
right: "example.com" right: "example.com"
}), }),
content_description: Some(&misc_token::Unstructured("hello".into())), content_description: Some(&misc_token::Unstructured("hello".into())),
..section::MIMESection::default()
},
..section::Section::default() ..section::Section::default()
} }
); );