implement mime headers
This commit is contained in:
parent
4e82941fd0
commit
6b3343f137
5 changed files with 131 additions and 24 deletions
|
@ -17,8 +17,8 @@ use crate::fragments::quoted::quoted_string;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct Version {
|
pub struct Version {
|
||||||
major: u32,
|
pub major: u32,
|
||||||
minor: u32,
|
pub minor: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
@ -40,9 +40,9 @@ pub enum Type<'a> {
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct MultipartDesc<'a> {
|
pub struct MultipartDesc<'a> {
|
||||||
boundary: String,
|
pub boundary: String,
|
||||||
subtype: MultipartSubtype<'a>,
|
pub subtype: MultipartSubtype<'a>,
|
||||||
unknown_parameters: Vec<Parameter<'a>>,
|
pub unknown_parameters: Vec<Parameter<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
@ -57,8 +57,8 @@ pub enum MultipartSubtype<'a> {
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct MessageDesc<'a> {
|
pub struct MessageDesc<'a> {
|
||||||
subtype: MessageSubtype<'a>,
|
pub subtype: MessageSubtype<'a>,
|
||||||
unknown_parameters: Vec<Parameter<'a>>,
|
pub unknown_parameters: Vec<Parameter<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
@ -71,9 +71,9 @@ pub enum MessageSubtype<'a> {
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct TextDesc<'a> {
|
pub struct TextDesc<'a> {
|
||||||
charset: Option<EmailCharset<'a>>,
|
pub charset: Option<EmailCharset<'a>>,
|
||||||
subtype: TextSubtype<'a>,
|
pub subtype: TextSubtype<'a>,
|
||||||
unknown_parameters: Vec<Parameter<'a>>,
|
pub unknown_parameters: Vec<Parameter<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
|
|
@ -2,7 +2,7 @@ use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{tag, take_while1},
|
bytes::complete::{tag, take_while1},
|
||||||
character::complete::space0,
|
character::complete::space0,
|
||||||
combinator::{into, opt},
|
combinator::{into, map, opt},
|
||||||
multi::{many0, many1, separated_list1},
|
multi::{many0, many1, separated_list1},
|
||||||
sequence::tuple,
|
sequence::tuple,
|
||||||
IResult,
|
IResult,
|
||||||
|
@ -14,6 +14,7 @@ use crate::fragments::lazy;
|
||||||
use crate::fragments::quoted::quoted_string;
|
use crate::fragments::quoted::quoted_string;
|
||||||
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
|
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
|
||||||
use crate::fragments::words::{atom, is_vchar};
|
use crate::fragments::words::{atom, is_vchar};
|
||||||
|
use crate::fragments::encoding::encoded_word;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
pub struct Unstructured(pub String);
|
pub struct Unstructured(pub String);
|
||||||
|
@ -47,7 +48,7 @@ impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
|
||||||
/// word = atom / quoted-string
|
/// word = atom / quoted-string
|
||||||
/// ```
|
/// ```
|
||||||
pub fn word(input: &str) -> IResult<&str, Cow<str>> {
|
pub fn word(input: &str) -> IResult<&str, Cow<str>> {
|
||||||
alt((into(quoted_string), into(atom)))(input)
|
alt((into(quoted_string), into(encoded_word), into(atom)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Phrase
|
/// Phrase
|
||||||
|
@ -70,31 +71,46 @@ fn is_unstructured(c: char) -> bool {
|
||||||
is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00'
|
is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum UnstrToken {
|
||||||
|
Init,
|
||||||
|
Encoded,
|
||||||
|
Plain,
|
||||||
|
}
|
||||||
|
|
||||||
/// Unstructured header field body
|
/// Unstructured header field body
|
||||||
///
|
///
|
||||||
/// ```abnf
|
/// ```abnf
|
||||||
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
|
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
|
||||||
/// ```
|
/// ```
|
||||||
pub fn unstructured(input: &str) -> IResult<&str, String> {
|
pub fn unstructured(input: &str) -> IResult<&str, String> {
|
||||||
let (input, r) = many0(tuple((opt(fws), take_while1(is_unstructured))))(input)?;
|
let (input, r) = many0(tuple((opt(fws), alt((
|
||||||
|
map(encoded_word, |v| (Cow::Owned(v), UnstrToken::Encoded)),
|
||||||
|
map(take_while1(is_unstructured), |v| (Cow::Borrowed(v), UnstrToken::Plain)),
|
||||||
|
)))))(input)?;
|
||||||
|
|
||||||
let (input, _) = space0(input)?;
|
let (input, _) = space0(input)?;
|
||||||
|
|
||||||
// Try to optimize for the most common cases
|
// Try to optimize for the most common cases
|
||||||
let body = match r.as_slice() {
|
let body = match r.as_slice() {
|
||||||
[(None, content)] => content.to_string(),
|
// Optimization when there is only one line
|
||||||
[(Some(_), content)] => " ".to_string() + content,
|
[(None, (content, _))] | [(_, (content, UnstrToken::Encoded))] => content.to_string(),
|
||||||
lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
|
[(Some(_), (content, _))] => " ".to_string() + content,
|
||||||
let (may_ws, content) = item;
|
// Generic case, with multiple lines
|
||||||
match may_ws {
|
lines => lines.iter().fold(
|
||||||
Some(_) => acc + " " + content,
|
(&UnstrToken::Init, String::with_capacity(255)),
|
||||||
None => acc + content,
|
|(prev_token, result), (may_ws, (content, current_token))| {
|
||||||
}
|
let new_res = match (may_ws, prev_token, current_token) {
|
||||||
}),
|
(_, UnstrToken::Encoded, UnstrToken::Encoded) | (None, _, _) => result + content,
|
||||||
|
_ => result + " " + content,
|
||||||
|
};
|
||||||
|
(current_token, new_res)
|
||||||
|
}).1,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok((input, body))
|
Ok((input, body))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
|
@ -3,6 +3,7 @@ use std::collections::HashMap;
|
||||||
use crate::fragments::eager::Field;
|
use crate::fragments::eager::Field;
|
||||||
use crate::fragments::lazy;
|
use crate::fragments::lazy;
|
||||||
use crate::fragments::misc_token::{PhraseList, Unstructured};
|
use crate::fragments::misc_token::{PhraseList, Unstructured};
|
||||||
|
use crate::fragments::mime::{Version,Type,Mechanism};
|
||||||
use crate::fragments::model::{AddressRef, MailboxRef, MessageId};
|
use crate::fragments::model::{AddressRef, MailboxRef, MessageId};
|
||||||
use crate::fragments::trace::ReceivedLog;
|
use crate::fragments::trace::ReceivedLog;
|
||||||
use chrono::{DateTime, FixedOffset};
|
use chrono::{DateTime, FixedOffset};
|
||||||
|
@ -40,6 +41,13 @@ pub struct Section<'a> {
|
||||||
// 3.6.8. Optional Fields
|
// 3.6.8. Optional Fields
|
||||||
pub optional: HashMap<&'a str, &'a Unstructured>,
|
pub optional: HashMap<&'a str, &'a Unstructured>,
|
||||||
|
|
||||||
|
// MIME
|
||||||
|
pub mime_version: Option<&'a Version>,
|
||||||
|
pub content_type: Option<&'a Type<'a>>,
|
||||||
|
pub content_transfer_encoding: Option<&'a Mechanism<'a>>,
|
||||||
|
pub content_id: Option<&'a MessageId<'a>>,
|
||||||
|
pub content_description: Option<&'a Unstructured>,
|
||||||
|
|
||||||
// Recovery
|
// Recovery
|
||||||
pub bad_fields: Vec<&'a lazy::Field<'a>>,
|
pub bad_fields: Vec<&'a lazy::Field<'a>>,
|
||||||
pub unparsed: Vec<&'a str>,
|
pub unparsed: Vec<&'a str>,
|
||||||
|
@ -71,7 +79,11 @@ impl<'a> FromIterator<&'a Field<'a>> for Section<'a> {
|
||||||
section.optional.insert(k, v);
|
section.optional.insert(k, v);
|
||||||
}
|
}
|
||||||
Field::Rescue(v) => section.unparsed.push(v),
|
Field::Rescue(v) => section.unparsed.push(v),
|
||||||
_ => todo!(),
|
Field::MIMEVersion(v) => section.mime_version = Some(v),
|
||||||
|
Field::ContentType(v) => section.content_type = Some(v),
|
||||||
|
Field::ContentTransferEncoding(v) => section.content_transfer_encoding = Some(v),
|
||||||
|
Field::ContentID(v) => section.content_id = Some(v),
|
||||||
|
Field::ContentDescription(v) => section.content_description = Some(v),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
section
|
section
|
||||||
|
|
|
@ -8,6 +8,7 @@ use nom::{
|
||||||
sequence::tuple,
|
sequence::tuple,
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
use crate::fragments::encoding::encoded_word;
|
||||||
|
|
||||||
// --- whitespaces and comments
|
// --- whitespaces and comments
|
||||||
|
|
||||||
|
@ -75,7 +76,7 @@ pub fn comment(input: &str) -> IResult<&str, ()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ccontent(input: &str) -> IResult<&str, &str> {
|
pub fn ccontent(input: &str) -> IResult<&str, &str> {
|
||||||
alt((recognize(ctext), recognize(quoted_pair), recognize(comment)))(input)
|
alt((recognize(ctext), recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ctext(input: &str) -> IResult<&str, char> {
|
pub fn ctext(input: &str) -> IResult<&str, char> {
|
||||||
|
@ -155,4 +156,12 @@ mod tests {
|
||||||
Ok(("wouch", "(double (comment) is fun) "))
|
Ok(("wouch", "(double (comment) is fun) "))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cfws_encoded_word() {
|
||||||
|
assert_eq!(
|
||||||
|
cfws("(=?US-ASCII?Q?Keith_Moore?=)"),
|
||||||
|
Ok(("", "(=?US-ASCII?Q?Keith_Moore?=)")),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,7 +172,77 @@ This is a reply to your hello.
|
||||||
"Héron: Raté\n Raté raté\n",
|
"Héron: Raté\n Raté raté\n",
|
||||||
"Not a real header but should still recover\n",
|
"Not a real header but should still recover\n",
|
||||||
],
|
],
|
||||||
|
..section::Section::default()
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_headers_mime() {
|
||||||
|
use imf_codec::fragments::mime;
|
||||||
|
let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
|
||||||
|
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
|
||||||
|
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
|
||||||
|
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
|
||||||
|
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=ISO-8859-1
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
Content-ID: <a@example.com>
|
||||||
|
Content-Description: hello
|
||||||
|
|
||||||
|
Now's the time =
|
||||||
|
for all folk to come=
|
||||||
|
to the aid of their country.
|
||||||
|
"#
|
||||||
|
.as_bytes();
|
||||||
|
|
||||||
|
parser(fullmail, |parsed_section| {
|
||||||
|
assert_eq!(
|
||||||
|
parsed_section,
|
||||||
|
§ion::Section {
|
||||||
|
from: vec![
|
||||||
|
&model::MailboxRef {
|
||||||
|
name: Some("Keith Moore".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "moore".into(),
|
||||||
|
domain: "cs.utk.edu".into(),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
|
||||||
|
to: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: Some("Keld Jørn Simonsen".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "keld".into(),
|
||||||
|
domain: "dkuug.dk".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
cc: vec![&model::AddressRef::Single(model::MailboxRef {
|
||||||
|
name: Some("André Pirard".into()),
|
||||||
|
addrspec: model::AddrSpec {
|
||||||
|
local_part: "PIRARD".into(),
|
||||||
|
domain: "vm1.ulg.ac.be".into(),
|
||||||
|
}
|
||||||
|
})],
|
||||||
|
|
||||||
|
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
|
||||||
|
mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
|
||||||
|
content_type: Some(&mime::Type::Text(mime::TextDesc {
|
||||||
|
charset: Some(mime::EmailCharset::ISO_8859_1),
|
||||||
|
subtype: mime::TextSubtype::Plain,
|
||||||
|
unknown_parameters: vec![]
|
||||||
|
})),
|
||||||
|
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
|
||||||
|
content_id: Some(&model::MessageId {
|
||||||
|
left: "a",
|
||||||
|
right: "example.com"
|
||||||
|
}),
|
||||||
|
content_description: Some(&misc_token::Unstructured("hello".into())),
|
||||||
|
..section::Section::default()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue