wip refacto broken
This commit is contained in:
parent
16a5897f4e
commit
23c663b943
10 changed files with 182 additions and 90 deletions
|
@ -1,5 +1,5 @@
|
|||
use crate::error::IMFError;
|
||||
use crate::fragments::lazy::Field as Lazy;
|
||||
use crate::fragments::lazy::{Field as Lazy, MIMEField as LazyMIME};
|
||||
use crate::fragments::mime::{Mechanism, Type, Version};
|
||||
use crate::fragments::misc_token::{PhraseList, Unstructured};
|
||||
use crate::fragments::model::{AddressList, MailboxList, MailboxRef, MessageId, MessageIdList};
|
||||
|
@ -38,10 +38,7 @@ pub enum Field<'a> {
|
|||
|
||||
// MIME RFC2045
|
||||
MIMEVersion(Version),
|
||||
ContentType(Type<'a>),
|
||||
ContentTransferEncoding(Mechanism<'a>),
|
||||
ContentID(MessageId<'a>),
|
||||
ContentDescription(Unstructured),
|
||||
MIME(MIMEField<'a>),
|
||||
|
||||
// 3.6.8. Optional Fields
|
||||
Optional(&'a str, Unstructured),
|
||||
|
@ -49,12 +46,22 @@ pub enum Field<'a> {
|
|||
// None
|
||||
Rescue(&'a str),
|
||||
}
|
||||
use Field::*;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MIMEField<'a> {
|
||||
ContentType(Type<'a>),
|
||||
ContentTransferEncoding(Mechanism<'a>),
|
||||
ContentID(MessageId<'a>),
|
||||
ContentDescription(Unstructured),
|
||||
Optional(&'a str, Unstructured),
|
||||
Rescue(&'a str),
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(l: &'a Lazy<'a>) -> Result<Self, Self::Error> {
|
||||
use Field::*;
|
||||
match l {
|
||||
Lazy::Date(v) => v.try_into().map(|v| Date(v)),
|
||||
Lazy::From(v) => v.try_into().map(|v| From(v)),
|
||||
|
@ -72,12 +79,25 @@ impl<'a> TryFrom<&'a Lazy<'a>> for Field<'a> {
|
|||
Lazy::Received(v) => v.try_into().map(|v| Received(v)),
|
||||
Lazy::ReturnPath(v) => v.try_into().map(|v| ReturnPath(v)),
|
||||
Lazy::MIMEVersion(v) => v.try_into().map(|v| MIMEVersion(v)),
|
||||
Lazy::ContentType(v) => v.try_into().map(|v| ContentType(v)),
|
||||
Lazy::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
|
||||
Lazy::ContentID(v) => v.try_into().map(|v| ContentID(v)),
|
||||
Lazy::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
|
||||
Lazy::MIME(v) => v.try_into().map(|v| MIME(v)),
|
||||
Lazy::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
|
||||
Lazy::Rescue(v) => Ok(Rescue(v)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<&'a LazyMIME<'a>> for MIMEField<'a> {
|
||||
type Error = IMFError<'a>;
|
||||
|
||||
fn try_from(l: &'a LazyMIME<'a>) -> Result<Self, Self::Error> {
|
||||
use MIMEField::*;
|
||||
match l {
|
||||
LazyMIME::ContentType(v) => v.try_into().map(|v| ContentType(v)),
|
||||
LazyMIME::ContentTransferEncoding(v) => v.try_into().map(|v| ContentTransferEncoding(v)),
|
||||
LazyMIME::ContentID(v) => v.try_into().map(|v| ContentID(v)),
|
||||
LazyMIME::ContentDescription(v) => v.try_into().map(|v| ContentDescription(v)),
|
||||
LazyMIME::Optional(k, v) => v.try_into().map(|v| Optional(k, v)),
|
||||
LazyMIME::Rescue(v) => Ok(Rescue(v)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
use std::borrow::Cow;
|
||||
use chardetng::EncodingDetector;
|
||||
use encoding_rs::Encoding;
|
||||
|
||||
use nom::{
|
||||
IResult,
|
||||
branch::alt,
|
||||
|
@ -13,6 +16,21 @@ use base64::{Engine as _, engine::general_purpose};
|
|||
|
||||
use crate::fragments::mime;
|
||||
|
||||
const IS_LAST_BUFFER: bool = true;
|
||||
const ALLOW_UTF8: bool = true;
|
||||
const NO_TLD: Option<&[u8]> = None;
|
||||
|
||||
pub fn header_decode(input: &[u8]) -> Cow<str> {
|
||||
// Create detector
|
||||
let mut detector = EncodingDetector::new();
|
||||
detector.feed(input, IS_LAST_BUFFER);
|
||||
|
||||
// Get encoding
|
||||
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
|
||||
let (header, _, _) = enc.decode(input);
|
||||
header
|
||||
}
|
||||
|
||||
pub fn encoded_word(input: &str) -> IResult<&str, String> {
|
||||
alt((encoded_word_quoted, encoded_word_base64))(input)
|
||||
}
|
||||
|
|
|
@ -81,10 +81,7 @@ pub enum Field<'a> {
|
|||
|
||||
// MIME RFC 2045
|
||||
MIMEVersion(Version<'a>),
|
||||
ContentType(Type<'a>),
|
||||
ContentTransferEncoding(Mechanism<'a>),
|
||||
ContentID(Identifier<'a>),
|
||||
ContentDescription(Unstructured<'a>),
|
||||
MIME(MIMEField<'a>),
|
||||
|
||||
// 3.6.8. Optional Fields
|
||||
Optional(&'a str, Unstructured<'a>),
|
||||
|
@ -92,16 +89,35 @@ pub enum Field<'a> {
|
|||
// None
|
||||
Rescue(&'a str),
|
||||
}
|
||||
use Field::*;
|
||||
|
||||
impl<'a> From<&'a str> for Field<'a> {
|
||||
fn from(input: &'a str) -> Self {
|
||||
match correct_field(input) {
|
||||
Ok((_, field)) => field,
|
||||
Err(_) => Rescue(input),
|
||||
Err(_) => Field::Rescue(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MIMEField<'a> {
|
||||
ContentType(Type<'a>),
|
||||
ContentTransferEncoding(Mechanism<'a>),
|
||||
ContentID(Identifier<'a>),
|
||||
ContentDescription(Unstructured<'a>),
|
||||
|
||||
Optional(&'a str, Unstructured<'a>),
|
||||
Rescue(&'a str),
|
||||
}
|
||||
impl<'a> From<&'a str> for MIMEField<'a> {
|
||||
fn from(input: &'a str) -> Self {
|
||||
match correct_mime_field(input) {
|
||||
Ok((_, field)) => field,
|
||||
Err(_) => MIMEField::Rescue(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optional field
|
||||
///
|
||||
/// ```abnf
|
||||
|
@ -119,6 +135,7 @@ fn field_name(input: &str) -> IResult<&str, &str> {
|
|||
}
|
||||
|
||||
fn correct_field(input: &str) -> IResult<&str, Field> {
|
||||
use Field::*;
|
||||
field_name(input).map(|(rest, name)| {
|
||||
(
|
||||
"",
|
||||
|
@ -144,14 +161,30 @@ fn correct_field(input: &str) -> IResult<&str, Field> {
|
|||
"return-path" => ReturnPath(Mailbox(rest)),
|
||||
"received" => Received(ReceivedLog(rest)),
|
||||
|
||||
"mime-version" => MIMEVersion(Version(rest)),
|
||||
"content-type" => ContentType(Type(rest)),
|
||||
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
|
||||
"content-id" => ContentID(Identifier(rest)),
|
||||
"content-description" => ContentDescription(Unstructured(rest)),
|
||||
"content-type" => MIME(MIMEField::ContentType(Type(rest))),
|
||||
"content-transfer-encoding" => MIME(MIMEField::ContentTransferEncoding(Mechanism(rest))),
|
||||
"content-id" => MIME(MIMEField::ContentID(Identifier(rest))),
|
||||
"content-description" => MIME(MIMEField::ContentDescription(Unstructured(rest))),
|
||||
|
||||
"mime-version" => MIMEVersion(Version(rest)),
|
||||
_ => Optional(name, Unstructured(rest)),
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn correct_mime_field(input: &str) -> IResult<&str, MIMEField> {
|
||||
use MIMEField::*;
|
||||
field_name(input).map(|(rest, name)| {
|
||||
(
|
||||
"",
|
||||
match name.to_lowercase().as_ref() {
|
||||
"content-type" => ContentType(Type(rest)),
|
||||
"content-transfer-encoding" => ContentTransferEncoding(Mechanism(rest)),
|
||||
"content-id" => ContentID(Identifier(rest)),
|
||||
"content-description" => ContentDescription(Unstructured(rest)),
|
||||
_ => Optional(name, Unstructured(rest)),
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -11,19 +11,21 @@ use crate::fragments::mime::{Mechanism, Type};
|
|||
use crate::fragments::model::MessageId;
|
||||
use crate::fragments::misc_token::Unstructured;
|
||||
use crate::fragments::whitespace::{CRLF, headers, line, obs_crlf};
|
||||
use crate::fragments::{eager,lazy};
|
||||
use crate::fragments::section::MIMESection;
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub struct PartHeader<'a> {
|
||||
pub content_type: Option<&'a Type<'a>>,
|
||||
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
|
||||
pub content_id: Option<&'a MessageId<'a>>,
|
||||
pub content_description: Option<&'a Unstructured>,
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum PartNodeLazy<'a>{
|
||||
Discrete(MIMESection<'a>, &'a [u8]),
|
||||
Composite(MIMESection<'a>, &'a [u8]),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum PartNode<'a> {
|
||||
Discrete(PartHeader<'a>, &'a [u8]),
|
||||
Composite(PartHeader<'a>, Vec<PartNode<'a>>),
|
||||
Discrete(MIMESection<'a>, &'a [u8]),
|
||||
Composite(MIMESection<'a>, Vec<PartNode<'a>>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
@ -32,6 +34,14 @@ pub enum Delimiter {
|
|||
Last
|
||||
}
|
||||
|
||||
const IS_LAST_BUFFER: bool = true;
|
||||
const ALLOW_UTF8: bool = true;
|
||||
const NO_TLD: Option<&[u8]> = None;
|
||||
fn part_node_lazy(input: &[u8]) -> IResult<&[u8], PartNodeLazy> {
|
||||
//let mime = header.iter().map(|e| eager::MIMEField::from(lazy::MIMEField::from(e)));
|
||||
todo!();
|
||||
}
|
||||
|
||||
pub fn boundary<'a>(boundary: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Delimiter> {
|
||||
move |input: &[u8]| {
|
||||
let (rest, (_, _, _, last, _)) = tuple((obs_crlf, tag(b"--"), tag(boundary), opt(tag(b"--")), opt(obs_crlf)))(input)?;
|
||||
|
@ -60,6 +70,9 @@ pub fn preamble<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], &
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME parse email here
|
||||
|
||||
|
||||
// Returns Ok even if an error is encountered while parsing
|
||||
// the different mimes.
|
||||
pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
|
||||
|
@ -67,21 +80,21 @@ pub fn multipart<'a>(bound: &'a [u8]) -> impl Fn(&'a [u8]) -> IResult<&'a [u8],
|
|||
let (mut input_loop, _) = preamble(bound)(input)?;
|
||||
let mut parts: Vec<&[u8]> = vec![];
|
||||
loop {
|
||||
let input2 = match boundary(bound)(input_loop) {
|
||||
let input = match boundary(bound)(input_loop) {
|
||||
Err(_) => return Ok((input_loop, parts)),
|
||||
Ok((inp, Delimiter::Last)) => return Ok((inp, parts)),
|
||||
Ok((inp, Delimiter::Next)) => inp,
|
||||
};
|
||||
|
||||
let input3 = match part(bound)(input2) {
|
||||
Err(_) => return Ok((input2, parts)),
|
||||
let input = match part(bound)(input) {
|
||||
Err(_) => return Ok((input, parts)),
|
||||
Ok((inp, part)) => {
|
||||
parts.push(part);
|
||||
inp
|
||||
}
|
||||
};
|
||||
|
||||
input_loop = input3;
|
||||
input_loop = input;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use crate::fragments::eager::Field;
|
||||
use crate::fragments::eager::{Field, MIMEField};
|
||||
use crate::fragments::lazy;
|
||||
use crate::fragments::misc_token::{PhraseList, Unstructured};
|
||||
use crate::fragments::mime::{Version,Type,Mechanism};
|
||||
|
@ -43,13 +43,20 @@ pub struct Section<'a> {
|
|||
|
||||
// MIME
|
||||
pub mime_version: Option<&'a Version>,
|
||||
pub mime: MIMESection<'a>,
|
||||
|
||||
// Recovery
|
||||
pub bad_fields: Vec<&'a lazy::Field<'a>>,
|
||||
pub unparsed: Vec<&'a str>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub struct MIMESection<'a> {
|
||||
pub content_type: Option<&'a Type<'a>>,
|
||||
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
|
||||
pub content_id: Option<&'a MessageId<'a>>,
|
||||
pub content_description: Option<&'a Unstructured>,
|
||||
|
||||
// Recovery
|
||||
pub bad_fields: Vec<&'a lazy::Field<'a>>,
|
||||
pub optional: HashMap<&'a str, &'a Unstructured>,
|
||||
pub unparsed: Vec<&'a str>,
|
||||
}
|
||||
|
||||
|
@ -80,12 +87,36 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
|
|||
}
|
||||
Field::Rescue(v) => section.unparsed.push(v),
|
||||
Field::MIMEVersion(v) => section.mime_version = Some(v),
|
||||
Field::ContentType(v) => section.content_type = Some(v),
|
||||
Field::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
|
||||
Field::ContentID(v) => section.content_id = Some(v),
|
||||
Field::ContentDescription(v) => section.content_description = Some(v),
|
||||
Field::MIME(v) => match v {
|
||||
MIMEField::ContentType(v) => section.mime.content_type = Some(v),
|
||||
MIMEField::ContentTransferEncoding(v) => section.mime.content_transfer_encoding = Some(v),
|
||||
MIMEField::ContentID(v) => section.mime.content_id = Some(v),
|
||||
MIMEField::ContentDescription(v) => section.mime.content_description = Some(v),
|
||||
MIMEField::Optional(k, v) => {
|
||||
section.mime.optional.insert(k, v);
|
||||
}
|
||||
MIMEField::Rescue(v) => section.mime.unparsed.push(v),
|
||||
|
||||
},
|
||||
}
|
||||
}
|
||||
section
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromIterator<&'a MIMEField<'a>> for MIMESection<'a> {
|
||||
fn from_iter<I: IntoIterator<Item = &'a MIMEField<'a>>>(iter: I) -> Self {
|
||||
let mut section = MIMESection::default();
|
||||
for field in iter {
|
||||
match field {
|
||||
MIMEField::ContentType(v) => section.content_type = Some(v),
|
||||
MIMEField::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
|
||||
MIMEField::ContentID(v) => section.content_id = Some(v),
|
||||
MIMEField::ContentDescription(v) => section.content_description = Some(v),
|
||||
MIMEField::Optional(k, v) => { section.optional.insert(k, v); },
|
||||
MIMEField::Rescue(v) => section.unparsed.push(v),
|
||||
};
|
||||
}
|
||||
section
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,10 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
|||
terminated(recognize(many0(line)), obs_crlf)(input)
|
||||
}
|
||||
|
||||
pub fn fields(input: &str) -> IResult<&str, Vec<&str>> {
|
||||
all_consuming(many0(foldable_line))(input)
|
||||
}
|
||||
|
||||
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
||||
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
||||
// and not "is not CRLF". In other words, it continues while
|
||||
|
|
|
@ -8,7 +8,7 @@ use nom::{
|
|||
};
|
||||
|
||||
use crate::error::IMFError;
|
||||
use crate::fragments::whitespace;
|
||||
use crate::fragments::fields;
|
||||
use crate::multipass::field_lazy;
|
||||
use crate::multipass::guess_charset;
|
||||
|
||||
|
@ -19,7 +19,7 @@ pub struct Parsed<'a> {
|
|||
}
|
||||
|
||||
pub fn new<'a>(gcha: &'a guess_charset::Parsed<'a>) -> Result<Parsed<'a>, IMFError<'a>> {
|
||||
all_consuming(many0(foldable_line))(&gcha.header)
|
||||
fields(&gcha.header)
|
||||
.map_err(|e| IMFError::ExtractFields(e))
|
||||
.map(|(_, fields)| Parsed {
|
||||
fields,
|
||||
|
@ -33,20 +33,6 @@ impl<'a> Parsed<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// ```abnf
|
||||
/// fold_line = any *(1*(crlf WS) any) crlf
|
||||
/// ```
|
||||
fn foldable_line(input: &str) -> IResult<&str, &str> {
|
||||
recognize(tuple((
|
||||
is_not("\r\n"),
|
||||
many0(pair(
|
||||
many1(pair(whitespace::perm_crlf, space1)),
|
||||
is_not("\r\n"),
|
||||
)),
|
||||
whitespace::perm_crlf,
|
||||
)))(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
@ -63,10 +63,10 @@ mod tests {
|
|||
Parsed {
|
||||
fields: vec![
|
||||
lazy::Field::MIMEVersion(lazy::Version("1.0 \r\n")),
|
||||
lazy::Field::ContentType(lazy::Type("multipart/alternative; boundary=\"bound\"\r\n")),
|
||||
lazy::Field::ContentTransferEncoding(lazy::Mechanism("7bit\r\n")),
|
||||
lazy::Field::ContentID(lazy::Identifier("<foo4*foo1@bar.net>\r\n")),
|
||||
lazy::Field::ContentDescription(lazy::Unstructured("hello world\r\n")),
|
||||
lazy::Field::MIME(lazy::MIMEField::ContentType(lazy::Type("multipart/alternative; boundary=\"bound\"\r\n"))),
|
||||
lazy::Field::MIME(lazy::MIMEField::ContentTransferEncoding(lazy::Mechanism("7bit\r\n"))),
|
||||
lazy::Field::MIME(lazy::MIMEField::ContentID(lazy::Identifier("<foo4*foo1@bar.net>\r\n"))),
|
||||
lazy::Field::MIME(lazy::MIMEField::ContentDescription(lazy::Unstructured("hello world\r\n"))),
|
||||
],
|
||||
body: b"Hello world!",
|
||||
}
|
||||
|
|
|
@ -1,35 +1,19 @@
|
|||
use chardetng::EncodingDetector;
|
||||
use encoding_rs::Encoding;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::error::IMFError;
|
||||
use crate::fragments::encoding;
|
||||
use crate::multipass::extract_fields;
|
||||
use crate::multipass::segment;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Parsed<'a> {
|
||||
pub header: Cow<'a, str>,
|
||||
pub encoding: &'static Encoding,
|
||||
pub malformed: bool,
|
||||
pub body: &'a [u8],
|
||||
}
|
||||
|
||||
const IS_LAST_BUFFER: bool = true;
|
||||
const ALLOW_UTF8: bool = true;
|
||||
const NO_TLD: Option<&[u8]> = None;
|
||||
|
||||
pub fn new<'a>(seg: &'a segment::Parsed<'a>) -> Parsed<'a> {
|
||||
// Create detector
|
||||
let mut detector = EncodingDetector::new();
|
||||
detector.feed(&seg.header, IS_LAST_BUFFER);
|
||||
|
||||
// Get encoding
|
||||
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
|
||||
let (header, encoding, malformed) = enc.decode(&seg.header);
|
||||
Parsed {
|
||||
header,
|
||||
encoding,
|
||||
malformed,
|
||||
header: encoding::header_decode(&seg.header),
|
||||
body: seg.body,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -230,17 +230,20 @@ for all folk to come=
|
|||
|
||||
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
|
||||
mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
|
||||
content_type: Some(&mime::Type::Text(mime::TextDesc {
|
||||
charset: Some(mime::EmailCharset::ISO_8859_1),
|
||||
subtype: mime::TextSubtype::Plain,
|
||||
unknown_parameters: vec![]
|
||||
})),
|
||||
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
|
||||
content_id: Some(&model::MessageId {
|
||||
left: "a",
|
||||
right: "example.com"
|
||||
}),
|
||||
content_description: Some(&misc_token::Unstructured("hello".into())),
|
||||
mime: section::MIMESection {
|
||||
content_type: Some(&mime::Type::Text(mime::TextDesc {
|
||||
charset: Some(mime::EmailCharset::ISO_8859_1),
|
||||
subtype: mime::TextSubtype::Plain,
|
||||
unknown_parameters: vec![]
|
||||
})),
|
||||
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
|
||||
content_id: Some(&model::MessageId {
|
||||
left: "a",
|
||||
right: "example.com"
|
||||
}),
|
||||
content_description: Some(&misc_token::Unstructured("hello".into())),
|
||||
..section::MIMESection::default()
|
||||
},
|
||||
..section::Section::default()
|
||||
}
|
||||
);
|
||||
|
|
Loading…
Add table
Reference in a new issue