wip refacto broken

This commit is contained in:
Quentin 2023-07-18 15:00:38 +02:00
parent 16a5897f4e
commit 23c663b943
Signed by: quentin
GPG key ID: E9602264D639FF68
10 changed files with 182 additions and 90 deletions

View file

@ -1,5 +1,5 @@
use crate::error::IMFError;
use crate::fragments::lazy::Field as Lazy;
use crate::fragments::lazy::{Field as Lazy, MIMEField as LazyMIME};
use crate::fragments::mime::{Mechanism, Type, Version};
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList};
@ -38,10 +38,7 @@ pub enum Field<'a> {
// MIME RFC2045
MIMEVersion(Version),
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(MessageId<'a>),
ContentDescription(Unstructured),
MIME(MIMEField<'a>),
// 3.6.8. Optional Fields
Optional(&'a str, Unstructured),
@ -49,12 +46,22 @@ pub enum Field<'a> {
// None
Rescue(&'a str),
}
use Field::*;
#[derive(Debug, PartialEq)]
pub enum MIMEField<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(MessageId<'a>),
ContentDescription(Unstructured),
Optional(&'a str, Unstructured),
Rescue(&'a str),
}
impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
type Error = IMFError<'a>;
fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> {
use Field::*;
match l {
Lazy::Date(v) => v.try_into().map(|v| Date(v)),
Lazy::From(v) => v.try_into().map(|v| From(v)),
@ -72,12 +79,25 @@ impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
Lazy::Received(v) => v.try_into().map(|v| Received(v)),
Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)),
Lazy::MIMEVersion(v) => v.try_into().map(|v| MIMEVersion(v)),
Lazy::ContentType(v) => v.try_into().map(|v| ContentType(v)),
Lazy::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
Lazy::ContentID(v) => v.try_into().map(|v| ContentID(v)),
Lazy::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
Lazy::MIME(v) => v.try_into().map(|v| MIME(v)),
Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
Lazy::Rescue(v) => Ok(Rescue(v)),
}
}
}
impl<'a> TryFrom<&'a LazyMIME<'a>> for MIMEField<'a> {
type Error = IMFError<'a>;
fn try_from(l: &'a LazyMIME<'a>) -> Result<Self, Self::Error> {
use MIMEField::*;
match l {
LazyMIME::ContentType(v) => v.try_into().map(|v| ContentType(v)),
LazyMIME::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
LazyMIME::ContentID(v) => v.try_into().map(|v| ContentID(v)),
LazyMIME::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
LazyMIME::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
LazyMIME::Rescue(v) => Ok(Rescue(v)),
}
}
}

View file

@ -1,4 +1,7 @@
use std::borrow::Cow;
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use nom::{
IResult,
branch::alt,
@ -13,6 +16,21 @@ use base64::{Engine as _, engine::general_purpose};
use crate::fragments::mime;
const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
pub fn header_decode(input: &[u8]) -> Cow<str> {
// Create detector
let mut detector = EncodingDetector::new();
detector.feed(input, IS_LAST_BUFFER);
// Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, _, _) = enc.decode(input);
header
}
pub fn encoded_word(input: &str) -> IResult<&str, String> {
alt((encoded_word_quoted, encoded_word_base64))(input)
}

View file

@ -81,10 +81,7 @@ pub enum Field<'a> {
// MIME RFC 2045
MIMEVersion(Version<'a>),
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(Identifier<'a>),
ContentDescription(Unstructured<'a>),
MIME(MIMEField<'a>),
// 3.6.8. Optional Fields
Optional(&'a str, Unstructured<'a>),
@ -92,16 +89,35 @@ pub enum Field<'a> {
// None
Rescue(&'a str),
}
use Field::*;
impl<'a> From<&'a str> for Field<'a> {
fn from(input: &'a str) -> Self {
match correct_field(input) {
Ok((_, field)) => field,
Err(_) => Rescue(input),
Err(_) => Field::Rescue(input),
}
}
}
#[derive(Debug, PartialEq)]
pub enum MIMEField<'a> {
ContentType(Type<'a>),
ContentTransferEncoding(Mechanism<'a>),
ContentID(Identifier<'a>),
ContentDescription(Unstructured<'a>),
Optional(&'a str, Unstructured<'a>),
Rescue(&'a str),
}
impl<'a> From<&'a str> for MIMEField<'a> {
fn from(input: &'a str) -> Self {
match correct_mime_field(input) {
Ok((_, field)) => field,
Err(_) => MIMEField::Rescue(input),
}
}
}
/// Optional field
///
/// ```abnf
@ -119,6 +135,7 @@ fn field_name(input: &str) -> IResult<&str, &str> {
}
fn correct_field(input: &str) -> IResult<&str, Field> {
use Field::*;
field_name(input).map(|(rest, name)| {
(
"",
@ -144,14 +161,30 @@ fn correct_field(input: &str) -> IResult<&str, Field> {
"return-path" => ReturnPath(Mailbox(rest)),
"received" => Received(ReceivedLog(rest)),
"mime-version" => MIMEVersion(Version(rest)),
"content-type" => ContentType(Type(rest)),
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
"content-id" => ContentID(Identifier(rest)),
"content-description" => ContentDescription(Unstructured(rest)),
"content-type" => MIME(MIMEField::ContentType(Type(rest))),
"content-transfer-encoding" => MIME(MIMEField::ContentTransferEncoding(Mechanism(rest))),
"content-id" => MIME(MIMEField::ContentID(Identifier(rest))),
"content-description" => MIME(MIMEField::ContentDescription(Unstructured(rest))),
"mime-version" => MIMEVersion(Version(rest)),
_ => Optional(name, Unstructured(rest)),
},
)
})
}
fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> {
use MIMEField::*;
field_name(input).map(|(rest, name)| {
(
"",
match name.to_lowercase().as_ref() {
"content-type" => ContentType(Type(rest)),
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
"content-id" => ContentID(Identifier(rest)),
"content-description" => ContentDescription(Unstructured(rest)),
_ => Optional(name, Unstructured(rest)),
}
)
})
}

View file

@ -11,19 +11,21 @@ use crate::fragments::mime::{Mechanism, Type};
use crate::fragments::model::MessageId;
use crate::fragments::misc_token::Unstructured;
use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
use crate::fragments::{eager,lazy};
use crate::fragments::section::MIMESection;
#[derive(Debug, PartialEq, Default)]
pub struct PartHeader<'a> {
pub content_type: Option<&'a Type<'a>>,
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
pub content_id: Option<&'a MessageId<'a>>,
pub content_description: Option<&'a Unstructured>,
#[derive(Debug, PartialEq)]
pub enum PartNodeLazy<'a>{
Discrete(MIMESection<'a>, &'a [u8]),
Composite(MIMESection<'a>, &'a [u8]),
}
#[derive(Debug, PartialEq)]
pub enum PartNode<'a> {
Discrete(PartHeader<'a>, &'a [u8]),
Composite(PartHeader<'a>, Vec<PartNode<'a>>),
Discrete(MIMESection<'a>, &'a [u8]),
Composite(MIMESection<'a>, Vec<PartNode<'a>>),
}
#[derive(Debug, PartialEq)]
@ -32,6 +34,14 @@ pub enum Delimiter {
Last
}
const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
fn part_node_lazy(input: &[u8]) -> IResult<&[u8], PartNodeLazy> {
//let mime = header.iter().map(|e| eager::MIMEField::from(lazy::MIMEField::from(e)));
todo!();
}
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
move |input: &[u8]| {
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
@ -60,6 +70,9 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
}
}
// FIXME parse email here
// Returns Ok even if an error is encountered while parsing
// the different mimes.
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
@ -67,21 +80,21 @@ pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8],
let (mut input_loop, _) = preamble(bound)(input)?;
let mut parts: Vec<&[u8]> = vec![];
loop {
let input2 = match boundary(bound)(input_loop) {
let input = match boundary(bound)(input_loop) {
Err(_) => return Ok((input_loop, parts)),
Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
Ok((inp, Delimiter::Next)) => inp,
};
let input3 = match part(bound)(input2) {
Err(_) => return Ok((input2, parts)),
let input = match part(bound)(input) {
Err(_) => return Ok((input, parts)),
Ok((inp, part)) => {
parts.push(part);
inp
}
};
input_loop = input3;
input_loop = input;
}
}
}

View file

@ -1,6 +1,6 @@
use std::collections::HashMap;
use crate::fragments::eager::Field;
use crate::fragments::eager::{Field, MIMEField};
use crate::fragments::lazy;
use crate::fragments::misc_token::{PhraseList, Unstructured};
use crate::fragments::mime::{Version,Type,Mechanism};
@ -43,13 +43,20 @@ pub struct Section<'a> {
// MIME
pub mime_version: Option<&'a Version>,
pub mime: MIMESection<'a>,
// Recovery
pub bad_fields: Vec<&'a lazy::Field<'a>>,
pub unparsed: Vec<&'a str>,
}
#[derive(Debug, PartialEq, Default)]
pub struct MIMESection<'a> {
pub content_type: Option<&'a Type<'a>>,
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
pub content_id: Option<&'a MessageId<'a>>,
pub content_description: Option<&'a Unstructured>,
// Recovery
pub bad_fields: Vec<&'a lazy::Field<'a>>,
pub optional: HashMap<&'a str, &'a Unstructured>,
pub unparsed: Vec<&'a str>,
}
@ -80,12 +87,36 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
}
Field::Rescue(v) => section.unparsed.push(v),
Field::MIMEVersion(v) => section.mime_version = Some(v),
Field::ContentType(v) => section.content_type = Some(v),
Field::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
Field::ContentID(v) => section.content_id = Some(v),
Field::ContentDescription(v) => section.content_description = Some(v),
Field::MIME(v) => match v {
MIMEField::ContentType(v) => section.mime.content_type = Some(v),
MIMEField::ContentTransferEncoding(v) => section.mime.content_transfer_encoding = Some(v),
MIMEField::ContentID(v) => section.mime.content_id = Some(v),
MIMEField::ContentDescription(v) => section.mime.content_description = Some(v),
MIMEField::Optional(k, v) => {
section.mime.optional.insert(k, v);
}
MIMEField::Rescue(v) => section.mime.unparsed.push(v),
},
}
}
section
}
}
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
let mut section = MIMESection::default();
for field in iter {
match field {
MIMEField::ContentType(v) => section.content_type = Some(v),
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
MIMEField::ContentID(v) => section.content_id = Some(v),
MIMEField::ContentDescription(v) => section.content_description = Some(v),
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
MIMEField::Rescue(v) => section.unparsed.push(v),
};
}
section
}
}

View file

@ -22,6 +22,10 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
terminated(recognize(many0(line)), obs_crlf)(input)
}
pub fn fields(input: &str) -> IResult<&str, Vec<&str>> {
all_consuming(many0(foldable_line))(input)
}
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
// is_not(CRLF) is a hack, it means "is not CR or LF"
// and not "is not CRLF". In other words, it continues while

View file

@ -8,7 +8,7 @@ use nom::{
};
use crate::error::IMFError;
use crate::fragments::whitespace;
use crate::fragments::fields;
use crate::multipass::field_lazy;
use crate::multipass::guess_charset;
@ -19,7 +19,7 @@ pub struct Parsed<'a> {
}
pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> {
all_consuming(many0(foldable_line))(&gcha.header)
fields(&gcha.header)
.map_err(|e| IMFError::ExtractFields(e))
.map(|(_, fields)| Parsed {
fields,
@ -33,20 +33,6 @@ impl<'a> Parsed<'a> {
}
}
/// ```abnf
/// fold_line = any *(1*(crlf WS) any) crlf
/// ```
fn foldable_line(input: &str) -> IResult<&str, &str> {
recognize(tuple((
is_not("\r\n"),
many0(pair(
many1(pair(whitespace::perm_crlf, space1)),
is_not("\r\n"),
)),
whitespace::perm_crlf,
)))(input)
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -63,10 +63,10 @@ mod tests {
Parsed {
fields: vec![
lazy::Field::MIMEVersion(lazy::Version("1.0 \r\n")),
lazy::Field::ContentType(lazy::Type("multipart/alternative; boundary=\"bound\"\r\n")),
lazy::Field::ContentTransferEncoding(lazy::Mechanism("7bit\r\n")),
lazy::Field::ContentID(lazy::Identifier("<foo4*foo1@bar.net>\r\n")),
lazy::Field::ContentDescription(lazy::Unstructured("hello world\r\n")),
lazy::Field::MIME(lazy::MIMEField::ContentType(lazy::Type("multipart/alternative; boundary=\"bound\"\r\n"))),
lazy::Field::MIME(lazy::MIMEField::ContentTransferEncoding(lazy::Mechanism("7bit\r\n"))),
lazy::Field::MIME(lazy::MIMEField::ContentID(lazy::Identifier("<foo4*foo1@bar.net>\r\n"))),
lazy::Field::MIME(lazy::MIMEField::ContentDescription(lazy::Unstructured("hello world\r\n"))),
],
body: b"Hello world!",
}

View file

@ -1,35 +1,19 @@
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use std::borrow::Cow;
use crate::error::IMFError;
use crate::fragments::encoding;
use crate::multipass::extract_fields;
use crate::multipass::segment;
#[derive(Debug, PartialEq)]
pub struct Parsed<'a> {
pub header: Cow<'a, str>,
pub encoding: &'static Encoding,
pub malformed: bool,
pub body: &'a [u8],
}
const IS_LAST_BUFFER: bool = true;
const ALLOW_UTF8: bool = true;
const NO_TLD: Option<&[u8]> = None;
pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> {
// Create detector
let mut detector = EncodingDetector::new();
detector.feed(&seg.header, IS_LAST_BUFFER);
// Get encoding
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
let (header, encoding, malformed) = enc.decode(&seg.header);
Parsed {
header,
encoding,
malformed,
header: encoding::header_decode(&seg.header),
body: seg.body,
}
}

View file

@ -230,17 +230,20 @@ for all folk to come=
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
content_type: Some(&mime::Type::Text(mime::TextDesc {
charset: Some(mime::EmailCharset::ISO_8859_1),
subtype: mime::TextSubtype::Plain,
unknown_parameters: vec![]
})),
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
content_id: Some(&model::MessageId {
left: "a",
right: "example.com"
}),
content_description: Some(&misc_token::Unstructured("hello".into())),
mime: section::MIMESection {
content_type: Some(&mime::Type::Text(mime::TextDesc {
charset: Some(mime::EmailCharset::ISO_8859_1),
subtype: mime::TextSubtype::Plain,
unknown_parameters: vec![]
})),
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
content_id: Some(&model::MessageId {
left: "a",
right: "example.com"
}),
content_description: Some(&misc_token::Unstructured("hello".into())),
..section::MIMESection::default()
},
..section::Section::default()
}
);