wip refactor
This commit is contained in:
parent
23c663b943
commit
a503eb1de6
35 changed files with 746 additions and 1038 deletions
|
@ -1,129 +0,0 @@
|
||||||
use nom::{
|
|
||||||
branch::alt,
|
|
||||||
bytes::complete::{tag, take_while1},
|
|
||||||
character::complete::space0,
|
|
||||||
combinator::{into, map, opt},
|
|
||||||
multi::{many0, many1, separated_list1},
|
|
||||||
sequence::tuple,
|
|
||||||
IResult,
|
|
||||||
};
|
|
||||||
use std::borrow::Cow;
|
|
||||||
|
|
||||||
use crate::error::IMFError;
|
|
||||||
use crate::fragments::lazy;
|
|
||||||
use crate::fragments::quoted::quoted_string;
|
|
||||||
use crate::fragments::whitespace::{fws, is_obs_no_ws_ctl};
|
|
||||||
use crate::fragments::words::{atom, is_vchar};
|
|
||||||
use crate::fragments::encoding::encoded_word;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
|
||||||
pub struct Unstructured(pub String);
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
|
||||||
pub struct PhraseList(pub Vec<String>);
|
|
||||||
|
|
||||||
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
|
|
||||||
type Error = IMFError<'a>;
|
|
||||||
|
|
||||||
fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
|
|
||||||
unstructured(input.0)
|
|
||||||
.map(|(_, v)| Unstructured(v))
|
|
||||||
.map_err(|e| IMFError::Unstructured(e))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
|
|
||||||
type Error = IMFError<'a>;
|
|
||||||
|
|
||||||
fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
|
|
||||||
separated_list1(tag(","), phrase)(p.0)
|
|
||||||
.map(|(_, q)| PhraseList(q))
|
|
||||||
.map_err(|e| IMFError::PhraseList(e))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Word
|
|
||||||
///
|
|
||||||
/// ```abnf
|
|
||||||
/// word = atom / quoted-string
|
|
||||||
/// ```
|
|
||||||
pub fn word(input: &str) -> IResult<&str, Cow<str>> {
|
|
||||||
alt((into(quoted_string), into(encoded_word), into(atom)))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Phrase
|
|
||||||
///
|
|
||||||
/// ```abnf
|
|
||||||
/// phrase = 1*word / obs-phrase
|
|
||||||
/// ```
|
|
||||||
pub fn phrase(input: &str) -> IResult<&str, String> {
|
|
||||||
let (input, words) = many1(word)(input)?;
|
|
||||||
let phrase = words.join(" ");
|
|
||||||
Ok((input, phrase))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compatible unstructured input
|
|
||||||
///
|
|
||||||
/// ```abnf
|
|
||||||
/// obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
|
||||||
/// ```
|
|
||||||
fn is_unstructured(c: char) -> bool {
|
|
||||||
is_vchar(c) || is_obs_no_ws_ctl(c) || c == '\x00'
|
|
||||||
}
|
|
||||||
|
|
||||||
enum UnstrToken {
|
|
||||||
Init,
|
|
||||||
Encoded,
|
|
||||||
Plain,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Unstructured header field body
|
|
||||||
///
|
|
||||||
/// ```abnf
|
|
||||||
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
|
|
||||||
/// ```
|
|
||||||
pub fn unstructured(input: &str) -> IResult<&str, String> {
|
|
||||||
let (input, r) = many0(tuple((opt(fws), alt((
|
|
||||||
map(encoded_word, |v| (Cow::Owned(v), UnstrToken::Encoded)),
|
|
||||||
map(take_while1(is_unstructured), |v| (Cow::Borrowed(v), UnstrToken::Plain)),
|
|
||||||
)))))(input)?;
|
|
||||||
|
|
||||||
let (input, _) = space0(input)?;
|
|
||||||
|
|
||||||
// Try to optimize for the most common cases
|
|
||||||
let body = match r.as_slice() {
|
|
||||||
// Optimization when there is only one line
|
|
||||||
[(None, (content, _))] | [(_, (content, UnstrToken::Encoded))] => content.to_string(),
|
|
||||||
[(Some(_), (content, _))] => " ".to_string() + content,
|
|
||||||
// Generic case, with multiple lines
|
|
||||||
lines => lines.iter().fold(
|
|
||||||
(&UnstrToken::Init, String::with_capacity(255)),
|
|
||||||
|(prev_token, result), (may_ws, (content, current_token))| {
|
|
||||||
let new_res = match (may_ws, prev_token, current_token) {
|
|
||||||
(_, UnstrToken::Encoded, UnstrToken::Encoded) | (None, _, _) => result + content,
|
|
||||||
_ => result + " " + content,
|
|
||||||
};
|
|
||||||
(current_token, new_res)
|
|
||||||
}).1,
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok((input, body))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
#[test]
|
|
||||||
fn test_phrase() {
|
|
||||||
assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
|
|
||||||
assert_eq!(
|
|
||||||
phrase("salut \"le\" monde"),
|
|
||||||
Ok(("", "salut le monde".into()))
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
phrase("fin\r\n du\r\nmonde"),
|
|
||||||
Ok(("\r\nmonde", "fin du".into()))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,23 +0,0 @@
|
||||||
// Model
|
|
||||||
pub mod model;
|
|
||||||
|
|
||||||
// Generic
|
|
||||||
pub mod misc_token;
|
|
||||||
mod quoted;
|
|
||||||
pub mod whitespace;
|
|
||||||
mod words;
|
|
||||||
|
|
||||||
// Header specific
|
|
||||||
mod address;
|
|
||||||
mod datetime;
|
|
||||||
pub mod eager;
|
|
||||||
mod identification;
|
|
||||||
pub mod lazy;
|
|
||||||
mod mailbox;
|
|
||||||
pub mod section;
|
|
||||||
pub mod trace;
|
|
||||||
|
|
||||||
// MIME related
|
|
||||||
pub mod mime;
|
|
||||||
pub mod encoding;
|
|
||||||
pub mod part;
|
|
|
@ -1,146 +0,0 @@
|
||||||
use chrono::{DateTime, FixedOffset};
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct AddrSpec {
|
|
||||||
pub local_part: String,
|
|
||||||
pub domain: String,
|
|
||||||
}
|
|
||||||
impl AddrSpec {
|
|
||||||
pub fn fully_qualified(&self) -> String {
|
|
||||||
format!("{}@{}", self.local_part, self.domain)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct MailboxRef {
|
|
||||||
// The actual "email address" like hello@example.com
|
|
||||||
pub addrspec: AddrSpec,
|
|
||||||
pub name: Option<String>,
|
|
||||||
}
|
|
||||||
impl From<AddrSpec> for MailboxRef {
|
|
||||||
fn from(addr: AddrSpec) -> Self {
|
|
||||||
MailboxRef {
|
|
||||||
name: None,
|
|
||||||
addrspec: addr,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub type MailboxList = Vec<MailboxRef>;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct GroupRef {
|
|
||||||
pub name: String,
|
|
||||||
pub participants: Vec<MailboxRef>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum AddressRef {
|
|
||||||
Single(MailboxRef),
|
|
||||||
Many(GroupRef),
|
|
||||||
}
|
|
||||||
impl From<MailboxRef> for AddressRef {
|
|
||||||
fn from(mx: MailboxRef) -> Self {
|
|
||||||
AddressRef::Single(mx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl From<GroupRef> for AddressRef {
|
|
||||||
fn from(grp: GroupRef) -> Self {
|
|
||||||
AddressRef::Many(grp)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub type AddressList = Vec<AddressRef>;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub struct MessageId<'a> {
|
|
||||||
pub left: &'a str,
|
|
||||||
pub right: &'a str,
|
|
||||||
}
|
|
||||||
pub type MessageIdList<'a> = Vec<MessageId<'a>>;
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum FieldBody<'a, T> {
|
|
||||||
Correct(T),
|
|
||||||
Failed(&'a str),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
|
||||||
pub enum Field<'a> {
|
|
||||||
// 3.6.1. The Origination Date Field
|
|
||||||
Date(FieldBody<'a, Option<DateTime<FixedOffset>>>),
|
|
||||||
|
|
||||||
// 3.6.2. Originator Fields
|
|
||||||
From(FieldBody<'a, Vec<MailboxRef>>),
|
|
||||||
Sender(FieldBody<'a, MailboxRef>),
|
|
||||||
ReplyTo(FieldBody<'a, Vec<AddressRef>>),
|
|
||||||
|
|
||||||
// 3.6.3. Destination Address Fields
|
|
||||||
To(FieldBody<'a, Vec<AddressRef>>),
|
|
||||||
Cc(FieldBody<'a, Vec<AddressRef>>),
|
|
||||||
Bcc(FieldBody<'a, Vec<AddressRef>>),
|
|
||||||
|
|
||||||
// 3.6.4. Identification Fields
|
|
||||||
MessageID(FieldBody<'a, MessageId<'a>>),
|
|
||||||
InReplyTo(FieldBody<'a, Vec<MessageId<'a>>>),
|
|
||||||
References(FieldBody<'a, Vec<MessageId<'a>>>),
|
|
||||||
|
|
||||||
// 3.6.5. Informational Fields
|
|
||||||
Subject(FieldBody<'a, String>),
|
|
||||||
Comments(FieldBody<'a, String>),
|
|
||||||
Keywords(FieldBody<'a, Vec<String>>),
|
|
||||||
|
|
||||||
// 3.6.6 Resent Fields (not implemented)
|
|
||||||
// 3.6.7 Trace Fields
|
|
||||||
Received(FieldBody<'a, &'a str>),
|
|
||||||
ReturnPath(FieldBody<'a, Option<MailboxRef>>),
|
|
||||||
|
|
||||||
// 3.6.8. Optional Fields
|
|
||||||
Optional(&'a str, String),
|
|
||||||
|
|
||||||
// None
|
|
||||||
Rescue(&'a str),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Permissive Header Section
|
|
||||||
///
|
|
||||||
/// This is a structure intended for parsing/decoding,
|
|
||||||
/// hence it's support cases where the email is considered
|
|
||||||
/// as invalid according to RFC5322 but for which we can
|
|
||||||
/// still extract some data.
|
|
||||||
#[derive(Debug, PartialEq, Default)]
|
|
||||||
pub struct HeaderSection<'a> {
|
|
||||||
// 3.6.1. The Origination Date Field
|
|
||||||
pub date: Option<DateTime<FixedOffset>>,
|
|
||||||
|
|
||||||
// 3.6.2. Originator Fields
|
|
||||||
pub from: Vec<MailboxRef>,
|
|
||||||
pub sender: Option<MailboxRef>,
|
|
||||||
pub reply_to: Vec<AddressRef>,
|
|
||||||
|
|
||||||
// 3.6.3. Destination Address Fields
|
|
||||||
pub to: Vec<AddressRef>,
|
|
||||||
pub cc: Vec<AddressRef>,
|
|
||||||
pub bcc: Vec<AddressRef>,
|
|
||||||
|
|
||||||
// 3.6.4. Identification Fields
|
|
||||||
pub msg_id: Option<MessageId<'a>>,
|
|
||||||
pub in_reply_to: Vec<MessageId<'a>>,
|
|
||||||
pub references: Vec<MessageId<'a>>,
|
|
||||||
|
|
||||||
// 3.6.5. Informational Fields
|
|
||||||
pub subject: Option<String>,
|
|
||||||
pub comments: Vec<String>,
|
|
||||||
pub keywords: Vec<String>,
|
|
||||||
|
|
||||||
// 3.6.6 Not implemented
|
|
||||||
// 3.6.7 Trace Fields
|
|
||||||
pub return_path: Vec<MailboxRef>,
|
|
||||||
pub received: Vec<&'a str>,
|
|
||||||
|
|
||||||
// 3.6.8. Optional Fields
|
|
||||||
pub optional: HashMap<&'a str, String>,
|
|
||||||
|
|
||||||
// Recovery
|
|
||||||
pub bad_fields: Vec<Field<'a>>,
|
|
||||||
pub unparsed: Vec<&'a str>,
|
|
||||||
}
|
|
|
@ -1,116 +0,0 @@
|
||||||
use crate::fragments::whitespace::cfws;
|
|
||||||
use nom::{
|
|
||||||
bytes::complete::{tag, take_while1},
|
|
||||||
combinator::{opt, recognize},
|
|
||||||
multi::many0,
|
|
||||||
sequence::{delimited, pair},
|
|
||||||
IResult,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// VCHAR definition
|
|
||||||
pub fn is_vchar(c: char) -> bool {
|
|
||||||
(c >= '\x21' && c <= '\x7E') || !c.is_ascii()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Sequence of visible chars with the UTF-8 extension
|
|
||||||
///
|
|
||||||
/// ```abnf
|
|
||||||
/// VCHAR = %x21-7E
|
|
||||||
/// ; visible (printing) characters
|
|
||||||
/// VCHAR =/ UTF8-non-ascii
|
|
||||||
/// SEQ = 1*VCHAR
|
|
||||||
///```
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
|
|
||||||
take_while1(is_vchar)(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Atom allowed characters
|
|
||||||
fn is_atext(c: char) -> bool {
|
|
||||||
c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Atom
|
|
||||||
///
|
|
||||||
/// `[CFWS] 1*atext [CFWS]`
|
|
||||||
pub fn atom(input: &str) -> IResult<&str, &str> {
|
|
||||||
delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// dot-atom-text
|
|
||||||
///
|
|
||||||
/// `1*atext *("." 1*atext)`
|
|
||||||
pub fn dot_atom_text(input: &str) -> IResult<&str, &str> {
|
|
||||||
recognize(pair(
|
|
||||||
take_while1(is_atext),
|
|
||||||
many0(pair(tag("."), take_while1(is_atext))),
|
|
||||||
))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// dot-atom
|
|
||||||
///
|
|
||||||
/// `[CFWS] dot-atom-text [CFWS]`
|
|
||||||
pub fn dot_atom(input: &str) -> IResult<&str, &str> {
|
|
||||||
delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn is_special(c: char) -> bool {
|
|
||||||
c == '('
|
|
||||||
|| c == ')'
|
|
||||||
|| c == '<'
|
|
||||||
|| c == '>'
|
|
||||||
|| c == '['
|
|
||||||
|| c == ']'
|
|
||||||
|| c == ':'
|
|
||||||
|| c == ';'
|
|
||||||
|| c == '@'
|
|
||||||
|| c == '\\'
|
|
||||||
|| c == ','
|
|
||||||
|| c == '.'
|
|
||||||
|| c == '"'
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_vchar_seq() {
|
|
||||||
assert_eq!(vchar_seq("hello world"), Ok((" world", "hello")));
|
|
||||||
assert_eq!(vchar_seq("hello👋 world"), Ok((" world", "hello👋")));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_atext() {
|
|
||||||
assert!(is_atext('='));
|
|
||||||
assert!(is_atext('5'));
|
|
||||||
assert!(is_atext('Q'));
|
|
||||||
assert!(!is_atext(' '));
|
|
||||||
assert!(is_atext('É')); // support utf8
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_atom() {
|
|
||||||
assert_eq!(
|
|
||||||
atom("(skip) imf_codec (hidden) aerogramme"),
|
|
||||||
Ok(("aerogramme", "imf_codec"))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_dot_atom_text() {
|
|
||||||
assert_eq!(
|
|
||||||
dot_atom_text("quentin.dufour.io abcdef"),
|
|
||||||
Ok((" abcdef", "quentin.dufour.io"))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_dot_atom() {
|
|
||||||
assert_eq!(
|
|
||||||
dot_atom(" (skip) quentin.dufour.io abcdef"),
|
|
||||||
Ok(("abcdef", "quentin.dufour.io"))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
27
src/headers.rs
Normal file
27
src/headers.rs
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
use nom::{
|
||||||
|
self,
|
||||||
|
combinator::{all_consuming, recognize},
|
||||||
|
multi::many0,
|
||||||
|
sequence::terminated,
|
||||||
|
IResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::text::whitespace::{foldable_line, line, obs_crlf};
|
||||||
|
|
||||||
|
pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
|
||||||
|
let (body, hdrs) = segment(input)?;
|
||||||
|
let (_, fields) = fields(hdrs)?;
|
||||||
|
Ok((body, fields))
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- part 1, segment
|
||||||
|
fn segment(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
terminated(recognize(many0(line)), obs_crlf)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- part 2, isolate fields
|
||||||
|
fn fields(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
|
||||||
|
let (rest, parsed) = all_consuming(many0(foldable_line))(input)?;
|
||||||
|
Ok((rest, parsed))
|
||||||
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod fragments;
|
//pub mod mime;
|
||||||
pub mod multipass;
|
//pub mod message;
|
||||||
|
pub mod headers;
|
||||||
|
pub mod text;
|
||||||
|
|
|
@ -292,18 +292,6 @@ pub fn version(input: &str) -> IResult<&str, Version> {
|
||||||
Ok((rest, Version { major, minor }))
|
Ok((rest, Version { major, minor }))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Token allowed characters
|
|
||||||
fn is_token_text(c: char) -> bool {
|
|
||||||
c.is_ascii() && !c.is_ascii_control() && !c.is_ascii_whitespace() && !"()<>@,;:\\\"/[]?=".contains(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Token
|
|
||||||
///
|
|
||||||
/// `[CFWS] 1*token_text [CFWS]`
|
|
||||||
pub fn token(input: &str) -> IResult<&str, &str> {
|
|
||||||
delimited(opt(cfws), take_while1(is_token_text), opt(cfws))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
|
pub fn parameter(input: &str) -> IResult<&str, Parameter> {
|
||||||
let (rest, (pname, _, pvalue)) = tuple((
|
let (rest, (pname, _, pvalue)) = tuple((
|
||||||
token,
|
token,
|
10
src/parse.rs
10
src/parse.rs
|
@ -1,8 +1,9 @@
|
||||||
use imf_codec::fragments::section::Section;
|
//use imf_codec::fragments::section::Section;
|
||||||
use imf_codec::multipass::segment;
|
//use imf_codec::multipass::segment;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
|
||||||
|
/*
|
||||||
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
||||||
where
|
where
|
||||||
F: FnOnce(&Section) -> (),
|
F: FnOnce(&Section) -> (),
|
||||||
|
@ -15,9 +16,10 @@ where
|
||||||
let section = field_body.section();
|
let section = field_body.section();
|
||||||
|
|
||||||
func(§ion.fields);
|
func(§ion.fields);
|
||||||
}
|
}*/
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
/*
|
||||||
// Read full mail in memory
|
// Read full mail in memory
|
||||||
let mut rawmail = Vec::new();
|
let mut rawmail = Vec::new();
|
||||||
io::stdin().lock().read_to_end(&mut rawmail).unwrap();
|
io::stdin().lock().read_to_end(&mut rawmail).unwrap();
|
||||||
|
@ -30,4 +32,6 @@ fn main() {
|
||||||
assert!(section.from.len() > 0);
|
assert!(section.from.len() > 0);
|
||||||
assert!(section.bad_fields.len() == 0);
|
assert!(section.bad_fields.len() == 0);
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
|
println!("hello world");
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,9 +11,32 @@ use crate::error::IMFError;
|
||||||
use crate::fragments::lazy;
|
use crate::fragments::lazy;
|
||||||
use crate::fragments::mailbox::mailbox;
|
use crate::fragments::mailbox::mailbox;
|
||||||
use crate::fragments::misc_token::phrase;
|
use crate::fragments::misc_token::phrase;
|
||||||
use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef};
|
//use crate::fragments::model::{AddressList, AddressRef, GroupRef, MailboxList, MailboxRef};
|
||||||
use crate::fragments::whitespace::cfws;
|
use crate::fragments::whitespace::cfws;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct GroupRef {
|
||||||
|
pub name: String,
|
||||||
|
pub participants: Vec<MailboxRef>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum AddressRef {
|
||||||
|
Single(MailboxRef),
|
||||||
|
Many(GroupRef),
|
||||||
|
}
|
||||||
|
impl From<MailboxRef> for AddressRef {
|
||||||
|
fn from(mx: MailboxRef) -> Self {
|
||||||
|
AddressRef::Single(mx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl From<GroupRef> for AddressRef {
|
||||||
|
fn from(grp: GroupRef) -> Self {
|
||||||
|
AddressRef::Many(grp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub type AddressList = Vec<AddressRef>;
|
||||||
|
|
||||||
impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
|
impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
|
||||||
type Error = IMFError<'a>;
|
type Error = IMFError<'a>;
|
||||||
|
|
|
@ -14,6 +14,14 @@ use crate::fragments::model::{MessageId, MessageIdList};
|
||||||
use crate::fragments::whitespace::cfws;
|
use crate::fragments::whitespace::cfws;
|
||||||
use crate::fragments::words::dot_atom_text;
|
use crate::fragments::words::dot_atom_text;
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct MessageId<'a> {
|
||||||
|
pub left: &'a str,
|
||||||
|
pub right: &'a str,
|
||||||
|
}
|
||||||
|
pub type MessageIdList<'a> = Vec<MessageId<'a>>;
|
||||||
|
|
||||||
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
|
impl<'a> TryFrom<&'a lazy::Identifier<'a>> for MessageId<'a> {
|
||||||
type Error = IMFError<'a>;
|
type Error = IMFError<'a>;
|
||||||
|
|
|
@ -10,11 +10,37 @@ use nom::{
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use crate::fragments::misc_token::{phrase, word};
|
use crate::fragments::misc_token::{phrase, word};
|
||||||
use crate::fragments::model::{AddrSpec, MailboxRef};
|
|
||||||
use crate::fragments::quoted::quoted_string;
|
use crate::fragments::quoted::quoted_string;
|
||||||
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
|
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
|
||||||
use crate::fragments::words::{atom, dot_atom};
|
use crate::fragments::words::{atom, dot_atom};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct AddrSpec {
|
||||||
|
pub local_part: String,
|
||||||
|
pub domain: String,
|
||||||
|
}
|
||||||
|
impl AddrSpec {
|
||||||
|
pub fn fully_qualified(&self) -> String {
|
||||||
|
format!("{}@{}", self.local_part, self.domain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct MailboxRef {
|
||||||
|
// The actual "email address" like hello@example.com
|
||||||
|
pub addrspec: AddrSpec,
|
||||||
|
pub name: Option<String>,
|
||||||
|
}
|
||||||
|
impl From<AddrSpec> for MailboxRef {
|
||||||
|
fn from(addr: AddrSpec) -> Self {
|
||||||
|
MailboxRef {
|
||||||
|
name: None,
|
||||||
|
addrspec: addr,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub type MailboxList = Vec<MailboxRef>;
|
||||||
|
|
||||||
/// Mailbox
|
/// Mailbox
|
||||||
///
|
///
|
||||||
/// ```abnf
|
/// ```abnf
|
142
src/text/ascii.rs
Normal file
142
src/text/ascii.rs
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
// ASCII
|
||||||
|
// -- CONTROL CHARACTERS
|
||||||
|
pub const NULL: u8 = 0x00; // NULL
|
||||||
|
pub const SOH: u8 = 0x01; // START OF HEADER
|
||||||
|
pub const STX: u8 = 0x02; // START OF TEXT
|
||||||
|
pub const ETX: u8 = 0x03; // END OF TEXT
|
||||||
|
pub const EOT: u8 = 0x04; //
|
||||||
|
pub const ANQ: u8 = 0x05;
|
||||||
|
pub const ACK: u8 = 0x06;
|
||||||
|
pub const BEL: u8 = 0x07;
|
||||||
|
pub const BS: u8 = 0x08; // BACKSPACE
|
||||||
|
pub const HT: u8 = 0x09; // horizontal tab
|
||||||
|
pub const LF: u8 = 0x0A;
|
||||||
|
pub const VT: u8 = 0x0B;
|
||||||
|
pub const FF: u8 = 0x0C;
|
||||||
|
pub const CR: u8 = 0x0D;
|
||||||
|
pub const SO: u8 = 0x0E;
|
||||||
|
pub const SI: u8 = 0x0F;
|
||||||
|
pub const DLE: u8 = 0x10;
|
||||||
|
pub const DC1: u8 = 0x11;
|
||||||
|
pub const DC2: u8 = 0x12;
|
||||||
|
pub const DC3: u8 = 0x13;
|
||||||
|
pub const DC4 : u8 = 0x14;
|
||||||
|
pub const NAK: u8 = 0x15;
|
||||||
|
pub const SYN: u8 = 0x16;
|
||||||
|
pub const ETB: u8 = 0x17;
|
||||||
|
pub const CAN: u8 = 0x18;
|
||||||
|
pub const EM: u8 = 0x19;
|
||||||
|
pub const SUB: u8 = 0x1A;
|
||||||
|
pub const ESC: u8 = 0x1B;
|
||||||
|
pub const FS: u8 = 0x1C;
|
||||||
|
pub const GS: u8 = 0x1D;
|
||||||
|
pub const RS: u8 = 0x1E;
|
||||||
|
pub const US: u8 = 0x1F;
|
||||||
|
pub const DEL: u8 = 0x7F;
|
||||||
|
|
||||||
|
// -- GRAPHIC CHARACTERS
|
||||||
|
pub const SP: u8 = 0x20; // space
|
||||||
|
pub const EXCLAMATION: u8 = 0x21; // !
|
||||||
|
pub const DQUOTE: u8 = 0x22; // "
|
||||||
|
pub const NUM: u8 = 0x23; // #
|
||||||
|
pub const DOLLAR: u8 = 0x24; // $
|
||||||
|
pub const PERCENT: u8 = 0x25; // %
|
||||||
|
pub const AMPERSAND: u8 = 0x26; // &
|
||||||
|
pub const SQUOTE: u8 = 0x27; // '
|
||||||
|
pub const LEFT_PAR: u8 = 0x28; // (
|
||||||
|
pub const RIGHT_PAR: u8 = 0x29; // )
|
||||||
|
pub const ASTERISK: u8 = 0x2A; // *
|
||||||
|
pub const PLUS: u8 = 0x2B; // +
|
||||||
|
pub const COMMA: u8 = 0x2C; // ,
|
||||||
|
pub const MINUS: u8 = 0x2D; // -
|
||||||
|
pub const PERIOD: u8 = 0x2E; // .
|
||||||
|
pub const SLASH: u8 = 0x2F; // /
|
||||||
|
pub const N0: u8 = 0x30; // 0
|
||||||
|
pub const N1: u8 = 0x31; // 1
|
||||||
|
pub const N2: u8 = 0x32; // 2
|
||||||
|
pub const N3: u8 = 0x33; // 3
|
||||||
|
pub const N4: u8 = 0x34; // 4
|
||||||
|
pub const N5: u8 = 0x35; // 5
|
||||||
|
pub const N6: u8 = 0x36; // 6
|
||||||
|
pub const N7: u8 = 0x37; // 7
|
||||||
|
pub const N8: u8 = 0x38; // 8
|
||||||
|
pub const N9: u8 = 0x39; // 9
|
||||||
|
pub const COL: u8 = 0x3A; // :
|
||||||
|
pub const SEM_COL: u8 = 0x3B; // ;
|
||||||
|
pub const LT: u8 = 0x3C; // <
|
||||||
|
pub const EQ: u8 = 0x3D; // =
|
||||||
|
pub const GT: u8 = 0x3E; // >
|
||||||
|
pub const QUESTION: u8 = 0x3F; // ?
|
||||||
|
pub const AT: u8 = 0x40; // @
|
||||||
|
pub const LCA: u8 = 0x41; // A
|
||||||
|
pub const LCB: u8 = 0x42; // B
|
||||||
|
pub const LCC: u8 = 0x43; // C
|
||||||
|
pub const LCD: u8 = 0x44; // D
|
||||||
|
pub const LCE: u8 = 0x45; // E
|
||||||
|
pub const LCF: u8 = 0x46; // F
|
||||||
|
pub const LCG: u8 = 0x47; // G
|
||||||
|
pub const LCH: u8 = 0x48; // H
|
||||||
|
pub const LCI: u8 = 0x49; // I
|
||||||
|
pub const LCJ: u8 = 0x4A; // J
|
||||||
|
pub const LCK: u8 = 0x4B; // K
|
||||||
|
pub const LCL: u8 = 0x4C; // L
|
||||||
|
pub const LCM: u8 = 0x4D; // M
|
||||||
|
pub const LCN: u8 = 0x4E; // N
|
||||||
|
pub const LCO: u8 = 0x4F; // O
|
||||||
|
pub const LCP: u8 = 0x50; // P
|
||||||
|
pub const LCQ: u8 = 0x51; // Q
|
||||||
|
pub const LCR: u8 = 0x52; // R
|
||||||
|
pub const LCS: u8 = 0x53; // S
|
||||||
|
pub const LCT: u8 = 0x54; // T
|
||||||
|
pub const LCU: u8 = 0x55; // U
|
||||||
|
pub const LCV: u8 = 0x56; // V
|
||||||
|
pub const LCW: u8 = 0x57; // W
|
||||||
|
pub const LCX: u8 = 0x58; // X
|
||||||
|
pub const LCY: u8 = 0x59; // Y
|
||||||
|
pub const LCZ: u8 = 0x5A; // Z
|
||||||
|
pub const LEFT_BRACKET: u8 = 0x5B; // [
|
||||||
|
pub const BACKSLASH: u8 = 0x5C; // \
|
||||||
|
pub const RIGHT_BRACKET: u8 = 0x5D; // ]
|
||||||
|
pub const CARRET: u8 = 0x5E; // ^
|
||||||
|
pub const UNDERSCORE: u8 = 0x5F; // _
|
||||||
|
pub const GRAVE: u8 = 0x60; // `
|
||||||
|
pub const LSA: u8 = 0x61; // a
|
||||||
|
pub const LSB: u8 = 0x62; // b
|
||||||
|
pub const LSC: u8 = 0x63; // c
|
||||||
|
pub const LSD: u8 = 0x64; // d
|
||||||
|
pub const LSE: u8 = 0x65; // e
|
||||||
|
pub const LSF: u8 = 0x66; // f
|
||||||
|
pub const LSG: u8 = 0x67; // g
|
||||||
|
pub const LSH: u8 = 0x68; // h
|
||||||
|
pub const LSI: u8 = 0x69; // i
|
||||||
|
pub const LSJ: u8 = 0x6A; // j
|
||||||
|
pub const LSK: u8 = 0x6B; // k
|
||||||
|
pub const LSL: u8 = 0x6C; // l
|
||||||
|
pub const LSM: u8 = 0x6D; // m
|
||||||
|
pub const LSN: u8 = 0x6E; // n
|
||||||
|
pub const LSO: u8 = 0x6F; // o
|
||||||
|
pub const LSP: u8 = 0x70; // p
|
||||||
|
pub const LSQ: u8 = 0x71; // q
|
||||||
|
pub const LSR: u8 = 0x72; // r
|
||||||
|
pub const LSS: u8 = 0x73; // s
|
||||||
|
pub const LST: u8 = 0x74; // t
|
||||||
|
pub const LSU: u8 = 0x75; // u
|
||||||
|
pub const LSV: u8 = 0x76; // v
|
||||||
|
pub const LSW: u8 = 0x77; // w
|
||||||
|
pub const LSX: u8 = 0x78; // x
|
||||||
|
pub const LSY: u8 = 0x79; // y
|
||||||
|
pub const LSZ: u8 = 0x7A; // z
|
||||||
|
pub const LEFT_CURLY: u8 = 0x7B; // {
|
||||||
|
pub const PIPE: u8 = 0x7C; // |
|
||||||
|
pub const RIGHT_CURLY: u8 = 0x7D; // }
|
||||||
|
pub const TILDE: u8 = 0x7E; // ~
|
||||||
|
|
||||||
|
// GROUP OF CHARACTERS
|
||||||
|
// -- CRLF
|
||||||
|
pub const CRLF: &[u8] = &[CR, LF];
|
||||||
|
|
||||||
|
// -- WHITESPACE
|
||||||
|
pub const WS: &[u8] = &[HT, SP];
|
||||||
|
|
||||||
|
pub const GRAPHIC_BEGIN: u8 = SP;
|
||||||
|
pub const GRAPHIC_END: u8 = TILDE;
|
43
src/text/buffer.rs
Normal file
43
src/text/buffer.rs
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
use encoding_rs::Encoding;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Default)]
|
||||||
|
pub struct Text<'a> {
|
||||||
|
parts: Vec<&'a [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Text<'a> {
|
||||||
|
pub fn push(&mut self, e: &[u8]) {
|
||||||
|
self.parts.push(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
let enc = encoding_rs::UTF_8;
|
||||||
|
let size = self.parts.iter().fold(0, |acc, v| acc + v.len());
|
||||||
|
|
||||||
|
self.parts.iter().fold(
|
||||||
|
String::with_capacity(size),
|
||||||
|
|mut acc, v| {
|
||||||
|
let (content, _) = enc.decode_without_bom_handling(v);
|
||||||
|
acc.push_str(content.as_ref());
|
||||||
|
acc
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_text() {
|
||||||
|
let mut text = Text::default();
|
||||||
|
text.push(b"hello");
|
||||||
|
text.push(&[ascii::SP]);
|
||||||
|
text.push(b"world");
|
||||||
|
assert_eq!(
|
||||||
|
text.to_string(),
|
||||||
|
"hello world".to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,3 @@
|
||||||
use std::borrow::Cow;
|
|
||||||
use chardetng::EncodingDetector;
|
|
||||||
use encoding_rs::Encoding;
|
use encoding_rs::Encoding;
|
||||||
|
|
||||||
use nom::{
|
use nom::{
|
||||||
|
@ -7,92 +5,107 @@ use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{tag, take, take_while1, take_while},
|
bytes::complete::{tag, take, take_while1, take_while},
|
||||||
character::complete::{one_of},
|
character::complete::{one_of},
|
||||||
|
character::is_alphanumeric,
|
||||||
combinator::map,
|
combinator::map,
|
||||||
sequence::{preceded, terminated, tuple},
|
sequence::{preceded, terminated, tuple},
|
||||||
multi::many0,
|
multi::many0,
|
||||||
};
|
};
|
||||||
use encoding_rs::Encoding;
|
|
||||||
use base64::{Engine as _, engine::general_purpose};
|
use base64::{Engine as _, engine::general_purpose};
|
||||||
|
|
||||||
use crate::fragments::mime;
|
use crate::text::words;
|
||||||
|
use crate::text::ascii;
|
||||||
|
|
||||||
const IS_LAST_BUFFER: bool = true;
|
pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
|
||||||
const ALLOW_UTF8: bool = true;
|
|
||||||
const NO_TLD: Option<&[u8]> = None;
|
|
||||||
|
|
||||||
pub fn header_decode(input: &[u8]) -> Cow<str> {
|
|
||||||
// Create detector
|
|
||||||
let mut detector = EncodingDetector::new();
|
|
||||||
detector.feed(input, IS_LAST_BUFFER);
|
|
||||||
|
|
||||||
// Get encoding
|
|
||||||
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
|
|
||||||
let (header, _, _) = enc.decode(input);
|
|
||||||
header
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encoded_word(input: &str) -> IResult<&str, String> {
|
|
||||||
alt((encoded_word_quoted, encoded_word_base64))(input)
|
alt((encoded_word_quoted, encoded_word_base64))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encoded_word_quoted(input: &str) -> IResult<&str, String> {
|
pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
|
||||||
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
|
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
|
||||||
tag("=?"), mime::token,
|
tag("=?"), words::mime_token,
|
||||||
tag("?"), one_of("Qq"),
|
tag("?"), one_of("Qq"),
|
||||||
tag("?"), ptext,
|
tag("?"), ptext,
|
||||||
tag("?=")))(input)?;
|
tag("?=")))(input)?;
|
||||||
|
|
||||||
let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
|
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
|
||||||
let parsed = decode_quoted_encoding(renc, txt.iter());
|
let parsed = EncodedWord::Quoted(QuotedWord { enc: renc, chunks: txt });
|
||||||
Ok((rest, parsed))
|
Ok((rest, parsed))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encoded_word_base64(input: &str) -> IResult<&str, String> {
|
pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> {
|
||||||
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
|
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
|
||||||
tag("=?"), mime::token,
|
tag("=?"), words::mime_token,
|
||||||
tag("?"), one_of("Bb"),
|
tag("?"), one_of("Bb"),
|
||||||
tag("?"), btext,
|
tag("?"), btext,
|
||||||
tag("?=")))(input)?;
|
tag("?=")))(input)?;
|
||||||
|
|
||||||
let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
|
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
|
||||||
let parsed = general_purpose::STANDARD_NO_PAD.decode(txt).map(|d| renc.decode(d.as_slice()).0.to_string()).unwrap_or("".into());
|
let parsed = EncodedWord::Base64(Base64Word { enc: renc, content: txt });
|
||||||
|
|
||||||
Ok((rest, parsed))
|
Ok((rest, parsed))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decode_quoted_encoding<'a>(enc: &'static Encoding, q: impl Iterator<Item = &'a QuotedChunk<'a>>) -> String {
|
#[derive(PartialEq,Debug)]
|
||||||
q.fold(
|
pub enum EncodedWord<'a> {
|
||||||
|
Quoted(QuotedWord<'a>),
|
||||||
|
Base64(Base64Word<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq,Debug)]
|
||||||
|
pub struct Base64Word<'a> {
|
||||||
|
pub enc: &'static Encoding,
|
||||||
|
pub content: &'a [u8],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Base64Word<'a> {
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
general_purpose::STANDARD_NO_PAD
|
||||||
|
.decode(self.content)
|
||||||
|
.map(|d| self.enc.decode(d.as_slice()).0.to_string())
|
||||||
|
.unwrap_or("".into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq,Debug)]
|
||||||
|
pub struct QuotedWord<'a> {
|
||||||
|
pub enc: &'static Encoding,
|
||||||
|
pub chunks: Vec<QuotedChunk<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> QuotedWord<'a> {
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
self.chunks.iter().fold(
|
||||||
String::new(),
|
String::new(),
|
||||||
|mut acc, c| {
|
|mut acc, c| {
|
||||||
let dec = match c {
|
match c {
|
||||||
QuotedChunk::Safe(v) => Cow::Borrowed(*v),
|
QuotedChunk::Safe(v) => {
|
||||||
QuotedChunk::Space => Cow::Borrowed(" "),
|
let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
|
||||||
|
acc.push_str(content.as_ref());
|
||||||
|
}
|
||||||
|
QuotedChunk::Space => acc.push(' '),
|
||||||
QuotedChunk::Encoded(v) => {
|
QuotedChunk::Encoded(v) => {
|
||||||
let w = &[*v];
|
let w = &[*v];
|
||||||
let (d, _, _) = enc.decode(w);
|
let (d, _) = self.enc.decode_without_bom_handling(w);
|
||||||
Cow::Owned(d.into_owned())
|
acc.push_str(d.as_ref());
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
acc.push_str(dec.as_ref());
|
|
||||||
acc
|
acc
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(PartialEq,Debug)]
|
#[derive(PartialEq,Debug)]
|
||||||
pub enum QuotedChunk<'a> {
|
pub enum QuotedChunk<'a> {
|
||||||
Safe(&'a str),
|
Safe(&'a [u8]),
|
||||||
Encoded(u8),
|
Encoded(u8),
|
||||||
Space,
|
Space,
|
||||||
}
|
}
|
||||||
|
|
||||||
//quoted_printable
|
//quoted_printable
|
||||||
pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> {
|
pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
|
||||||
many0(alt((safe_char2, encoded_space, hex_octet)))(input)
|
many0(alt((safe_char2, encoded_space, hex_octet)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
|
fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
|
||||||
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
|
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,8 +114,8 @@ fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
|
||||||
/// 8-bit values which correspond to printable ASCII characters other
|
/// 8-bit values which correspond to printable ASCII characters other
|
||||||
/// than "=", "?", and "_" (underscore), MAY be represented as those
|
/// than "=", "?", and "_" (underscore), MAY be represented as those
|
||||||
/// characters.
|
/// characters.
|
||||||
fn is_safe_char2(c: char) -> bool {
|
fn is_safe_char2(c: u8) -> bool {
|
||||||
c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '='
|
c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -111,28 +124,30 @@ fn is_safe_char(c: char) -> bool {
|
||||||
(c >= '\x3e' && c <= '\x7e')
|
(c >= '\x3e' && c <= '\x7e')
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> {
|
fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
|
||||||
map(tag("_"), |_| QuotedChunk::Space)(input)
|
map(tag("_"), |_| QuotedChunk::Space)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
|
fn hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
|
||||||
use nom::error::*;
|
use nom::error::*;
|
||||||
|
|
||||||
let (rest, hstr) = preceded(tag("="), take(2usize))(input)?;
|
let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
|
||||||
|
|
||||||
let parsed = u8::from_str_radix(hstr, 16)
|
let (hstr, _) = encoding_rs::UTF_8.decode_without_bom_handling(hbytes);
|
||||||
|
|
||||||
|
let parsed = u8::from_str_radix(hstr.as_ref(), 16)
|
||||||
.map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
|
.map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
|
||||||
|
|
||||||
Ok((rest, QuotedChunk::Encoded(parsed)))
|
Ok((rest, QuotedChunk::Encoded(parsed)))
|
||||||
}
|
}
|
||||||
|
|
||||||
//base64 (maybe use a crate)
|
//base64 (maybe use a crate)
|
||||||
pub fn btext(input: &str) -> IResult<&str, &str> {
|
pub fn btext(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
terminated(take_while(is_bchar), many0(tag("=")))(input)
|
terminated(take_while(is_bchar), many0(tag("=")))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_bchar(c: char) -> bool {
|
fn is_bchar(c: u8) -> bool {
|
||||||
c.is_ascii_alphanumeric() || c == '+' || c == '/'
|
is_alphanumeric(c) || c == ascii::PLUS || c == ascii::SLASH
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
166
src/text/misc_token.rs
Normal file
166
src/text/misc_token.rs
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
use nom::{
|
||||||
|
branch::alt,
|
||||||
|
bytes::complete::take_while1,
|
||||||
|
character::complete::space0,
|
||||||
|
combinator::{into, map, opt},
|
||||||
|
multi::{many0, many1},
|
||||||
|
sequence::{preceded, tuple},
|
||||||
|
IResult,
|
||||||
|
};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use crate::text::{
|
||||||
|
quoted::quoted_string,
|
||||||
|
whitespace::{fws, is_obs_no_ws_ctl},
|
||||||
|
words::{atom, is_vchar},
|
||||||
|
encoding::{self, encoded_word},
|
||||||
|
buffer,
|
||||||
|
ascii,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Default)]
|
||||||
|
pub struct PhraseList(pub Vec<String>);
|
||||||
|
|
||||||
|
/*
|
||||||
|
impl<'a> TryFrom<&'a lazy::Unstructured<'a>> for Unstructured {
|
||||||
|
type Error = IMFError<'a>;
|
||||||
|
|
||||||
|
fn try_from(input: &'a lazy::Unstructured<'a>) -> Result<Self, Self::Error> {
|
||||||
|
unstructured(input.0)
|
||||||
|
.map(|(_, v)| Unstructured(v))
|
||||||
|
.map_err(|e| IMFError::Unstructured(e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> TryFrom<&'a lazy::PhraseList<'a>> for PhraseList {
|
||||||
|
type Error = IMFError<'a>;
|
||||||
|
|
||||||
|
fn try_from(p: &'a lazy::PhraseList<'a>) -> Result<Self, Self::Error> {
|
||||||
|
separated_list1(tag(","), phrase)(p.0)
|
||||||
|
.map(|(_, q)| PhraseList(q))
|
||||||
|
.map_err(|e| IMFError::PhraseList(e))
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
|
pub enum Word<'a> {
|
||||||
|
Quoted(buffer::Text<'a>),
|
||||||
|
Encoded(encoding::EncodedWord<'a>),
|
||||||
|
Atom(&'a [u8]),
|
||||||
|
}
|
||||||
|
impl<'a> Word<'a> {
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
Word::Quoted(v) => v.to_string(),
|
||||||
|
Word::Encoded(v) => v.to_string(),
|
||||||
|
Word::Atom(v) => v.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Word
|
||||||
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// word = atom / quoted-string
|
||||||
|
/// ```
|
||||||
|
pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
|
||||||
|
alt((
|
||||||
|
map(quoted_string, |v| Word::Quoted(v)),
|
||||||
|
map(encoded_word, |v| Word::Encoded(v)),
|
||||||
|
map(atom, |v| Word::Atom(v))
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Phrase<'a>(pub Vec<Word<'a>>);
|
||||||
|
impl<'a> Phrase<'a> {
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
self.0.join(" ")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Phrase
|
||||||
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// phrase = 1*word / obs-phrase
|
||||||
|
/// ```
|
||||||
|
pub fn phrase(input: &[u8]) -> IResult<&[u8], Phrase> {
|
||||||
|
let (input, phrase) = map(many1(word), |v| Phrase(v))(input)?;
|
||||||
|
Ok((input, phrase))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compatible unstructured input
|
||||||
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
||||||
|
/// ```
|
||||||
|
fn is_unstructured(c: u8) -> bool {
|
||||||
|
is_vchar(c) || is_obs_no_ws_ctl(c) || c == ascii::NULL
|
||||||
|
}
|
||||||
|
|
||||||
|
enum UnstrToken<'a> {
|
||||||
|
Init,
|
||||||
|
Encoded(encoding::EncodedWord<'a>),
|
||||||
|
Plain(&'a [u8]),
|
||||||
|
}
|
||||||
|
impl<'a> UnstrToken<'a> {
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
UnstrToken::Init => "".into(),
|
||||||
|
UnstrToken::Encoded(e) => e.to_string(),
|
||||||
|
UnstrToken::Plain(e) => encoding_rs::UTF_8.decode_without_bom_handling(e).into_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
|
||||||
|
impl<'a> Unstructured<'a> {
|
||||||
|
pub fn to_string(&self) -> String {
|
||||||
|
self.0.iter().fold(
|
||||||
|
(&UnstrToken::Init, String::new()),
|
||||||
|
|(prev_token, result), current_token| {
|
||||||
|
match (prev_token, current_token) {
|
||||||
|
(UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()),
|
||||||
|
(UnstrToken::EncodedWord(_), UnstrToken::EncodedWord(v)) => result.push_str(v.to_string()).as_ref(),
|
||||||
|
(_, v) => {
|
||||||
|
result.push(' ');
|
||||||
|
result.push_str(v.to_string().as_ref())
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unstructured header field body
|
||||||
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
|
||||||
|
/// ```
|
||||||
|
pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured> {
|
||||||
|
let (input, r) = many0(preceded(opt(fws), alt((
|
||||||
|
map(encoded_word, |v| UnstrToken::Encoded(v)),
|
||||||
|
map(take_while1(is_unstructured), |v| UnstrToken::Plain(v)),
|
||||||
|
))))(input)?;
|
||||||
|
|
||||||
|
let (input, _) = space0(input)?;
|
||||||
|
Ok((input, Unstructured(r)))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn test_phrase() {
|
||||||
|
assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
|
||||||
|
assert_eq!(
|
||||||
|
phrase("salut \"le\" monde"),
|
||||||
|
Ok(("", "salut le monde".into()))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
phrase("fin\r\n du\r\nmonde"),
|
||||||
|
Ok(("\r\nmonde", "fin du".into()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
7
src/text/mod.rs
Normal file
7
src/text/mod.rs
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
pub mod ascii;
|
||||||
|
pub mod encoding;
|
||||||
|
pub mod misc_token;
|
||||||
|
pub mod quoted;
|
||||||
|
pub mod whitespace;
|
||||||
|
pub mod words;
|
||||||
|
pub mod buffer;
|
|
@ -1,14 +1,16 @@
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::tag,
|
bytes::complete::{take_while1, tag},
|
||||||
character::complete::{anychar, satisfy},
|
character::complete::anychar,
|
||||||
combinator::opt,
|
combinator::{recognize, opt},
|
||||||
multi::many0,
|
multi::many0,
|
||||||
sequence::{pair, preceded},
|
sequence::{pair, preceded},
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
|
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
|
||||||
|
use crate::text::ascii;
|
||||||
|
use crate::text::buffer;
|
||||||
|
|
||||||
/// Quoted pair
|
/// Quoted pair
|
||||||
///
|
///
|
||||||
|
@ -16,8 +18,8 @@ use crate::fragments::whitespace::{cfws, fws, is_obs_no_ws_ctl};
|
||||||
/// quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
|
/// quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
|
||||||
/// obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
|
/// obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
|
||||||
/// ```
|
/// ```
|
||||||
pub fn quoted_pair(input: &str) -> IResult<&str, char> {
|
pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
preceded(tag("\\"), anychar)(input)
|
preceded(tag(&[ascii::SLASH]), anychar)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Allowed characters in quote
|
/// Allowed characters in quote
|
||||||
|
@ -28,11 +30,11 @@ pub fn quoted_pair(input: &str) -> IResult<&str, char> {
|
||||||
/// %d93-126 / ; "\" or the quote character
|
/// %d93-126 / ; "\" or the quote character
|
||||||
/// obs-qtext
|
/// obs-qtext
|
||||||
/// ```
|
/// ```
|
||||||
fn is_restr_qtext(c: char) -> bool {
|
fn is_restr_qtext(c: u8) -> bool {
|
||||||
c == '\x21' || (c >= '\x23' && c <= '\x5B') || (c >= '\x5D' && c <= '\x7E')
|
c == ascii::EXCLAMATION || (c >= ascii::NUM && c <= ascii::LEFT_BRACKET) || (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_qtext(c: char) -> bool {
|
fn is_qtext(c: u8) -> bool {
|
||||||
is_restr_qtext(c) || is_obs_no_ws_ctl(c)
|
is_restr_qtext(c) || is_obs_no_ws_ctl(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,8 +43,8 @@ fn is_qtext(c: char) -> bool {
|
||||||
/// ```abnf
|
/// ```abnf
|
||||||
/// qcontent = qtext / quoted-pair
|
/// qcontent = qtext / quoted-pair
|
||||||
/// ```
|
/// ```
|
||||||
fn qcontent(input: &str) -> IResult<&str, char> {
|
fn qcontent(input: &u8) -> IResult<&[u8], &[u8]> {
|
||||||
alt((satisfy(is_qtext), quoted_pair))(input)
|
alt((take_while1(is_qtext), recognize(quoted_pair)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Quoted string
|
/// Quoted string
|
||||||
|
@ -52,7 +54,7 @@ fn qcontent(input: &str) -> IResult<&str, char> {
|
||||||
/// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
|
/// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
|
||||||
/// [CFWS]
|
/// [CFWS]
|
||||||
/// ```
|
/// ```
|
||||||
pub fn quoted_string(input: &str) -> IResult<&str, String> {
|
pub fn quoted_string(input: &[u8]) -> IResult<&[u8], buffer::Text> {
|
||||||
let (input, _) = opt(cfws)(input)?;
|
let (input, _) = opt(cfws)(input)?;
|
||||||
let (input, _) = tag("\"")(input)?;
|
let (input, _) = tag("\"")(input)?;
|
||||||
let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
|
let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
|
||||||
|
@ -60,11 +62,11 @@ pub fn quoted_string(input: &str) -> IResult<&str, String> {
|
||||||
// Rebuild string
|
// Rebuild string
|
||||||
let mut qstring = content
|
let mut qstring = content
|
||||||
.iter()
|
.iter()
|
||||||
.fold(String::with_capacity(16), |mut acc, (maybe_wsp, c)| {
|
.fold(buffer::Text::default(), |mut acc, (maybe_wsp, c)| {
|
||||||
if let Some(wsp) = maybe_wsp {
|
if let Some(wsp) = maybe_wsp {
|
||||||
acc.push(*wsp);
|
acc.push(&[ascii::SP]);
|
||||||
}
|
}
|
||||||
acc.push(*c);
|
acc.push(c);
|
||||||
acc
|
acc
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -84,13 +86,22 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_quoted_string() {
|
fn test_quoted_string() {
|
||||||
|
let mut text = buffer::Text::default();
|
||||||
|
text.push(b"hello");
|
||||||
|
text.push(&[ascii::DQUOTE]);
|
||||||
|
text.push(b"world");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
quoted_string(" \"hello\\\"world\" "),
|
quoted_string(b" \"hello\\\"world\" "),
|
||||||
Ok(("", "hello\"world".to_string()))
|
Ok(("", text))
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let mut text = buffer::Text::default();
|
||||||
|
text.push(b"hello");
|
||||||
|
text.push(&[ascii::SP]);
|
||||||
|
text.push(b"world");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
quoted_string("\"hello\r\n world\""),
|
quoted_string(b"\"hello\r\n world\""),
|
||||||
Ok(("", "hello world".to_string()))
|
Ok(("", text))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,71 +1,68 @@
|
||||||
use crate::fragments::quoted::quoted_pair;
|
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{is_not, tag},
|
bytes::complete::{is_not, tag, take_while1},
|
||||||
character::complete::{crlf, satisfy, space0, space1},
|
character::complete::{space0, space1},
|
||||||
combinator::{opt, recognize},
|
combinator::{opt, recognize},
|
||||||
multi::{many0, many1},
|
multi::{many0, many1},
|
||||||
sequence::{pair, terminated, tuple},
|
sequence::{pair, tuple},
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
use crate::fragments::encoding::encoded_word;
|
use crate::text::encoding::encoded_word;
|
||||||
|
use crate::text::quoted::quoted_pair;
|
||||||
|
use crate::text::ascii;
|
||||||
|
|
||||||
/// Whitespace (space, new line, tab) content and
|
/// Whitespace (space, new line, tab) content and
|
||||||
/// delimited content (eg. comment, line, sections, etc.)
|
/// delimited content (eg. comment, line, sections, etc.)
|
||||||
|
|
||||||
// Bytes CRLF
|
/// Obsolete/Compatible CRLF
|
||||||
const CR: u8 = 0x0D;
|
///
|
||||||
const LF: u8 = 0x0A;
|
/// Theoretically, all lines must end with \r\n
|
||||||
pub const CRLF: &[u8] = &[CR, LF];
|
/// but some mail servers like Dovecot support malformated emails,
|
||||||
|
/// for example with only \n eol. It works because
|
||||||
|
/// \r or \n is allowed nowhere else, so we also add this support.
|
||||||
|
|
||||||
pub fn headers(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
terminated(recognize(many0(line)), obs_crlf)(input)
|
alt((tag(ascii::CRLF), tag(&[ascii::CR]), tag(&[ascii::LF])))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fields(input: &str) -> IResult<&str, Vec<&str>> {
|
|
||||||
all_consuming(many0(foldable_line))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
pub fn line(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
|
||||||
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
// is_not(CRLF) is a hack, it means "is not CR or LF"
|
||||||
// and not "is not CRLF". In other words, it continues while
|
// and not "is not CRLF". In other words, it continues while
|
||||||
// it does not encounter 0x0D or 0x0A.
|
// it does not encounter 0x0D or 0x0A.
|
||||||
pair(is_not(CRLF), obs_crlf)(input)
|
pair(is_not(ascii::CRLF), obs_crlf)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
/// ```abnf
|
||||||
alt((tag(CRLF), tag(&[CR]), tag(&[LF])))(input)
|
/// fold_line = any *(1*(crlf WS) any) crlf
|
||||||
|
/// ```
|
||||||
|
pub fn foldable_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
recognize(tuple((
|
||||||
|
is_not(ascii::CRLF),
|
||||||
|
many0(pair(
|
||||||
|
many1(pair(obs_crlf, space1)),
|
||||||
|
is_not(ascii::CRLF),
|
||||||
|
)),
|
||||||
|
obs_crlf,
|
||||||
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// --- whitespaces and comments
|
// --- whitespaces and comments
|
||||||
|
|
||||||
// Note: WSP = SP / HTAB = %x20 / %x09
|
// Note: WSP = SP / HTAB = %x20 / %x09
|
||||||
// nom::*::space0 = *WSP
|
// nom::*::space0 = *WSP
|
||||||
// nom::*::space1 = 1*WSP
|
// nom::*::space1 = 1*WSP
|
||||||
|
|
||||||
/// Permissive CRLF
|
|
||||||
///
|
|
||||||
/// Theoretically, all lines must end with \r\n
|
|
||||||
/// but some mail servers like Dovecot support malformated emails,
|
|
||||||
/// for example with only \n eol. It works because
|
|
||||||
/// \r or \n is allowed nowhere else, so we also add this support.
|
|
||||||
pub fn perm_crlf(input: &str) -> IResult<&str, &str> {
|
|
||||||
alt((crlf, tag("\r"), tag("\n")))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Permissive foldable white space
|
/// Permissive foldable white space
|
||||||
///
|
///
|
||||||
/// Folding white space are used for long headers splitted on multiple lines.
|
/// Folding white space are used for long headers splitted on multiple lines.
|
||||||
/// The obsolete syntax allowes multiple lines without content; implemented for compatibility
|
/// The obsolete syntax allowes multiple lines without content; implemented for compatibility
|
||||||
/// reasons
|
/// reasons
|
||||||
pub fn fws(input: &str) -> IResult<&str, char> {
|
pub fn fws(input: &[u8]) -> IResult<&[u8], u8> {
|
||||||
let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
|
let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
|
||||||
Ok((input, ' '))
|
Ok((input, ascii::SP))
|
||||||
}
|
}
|
||||||
fn fold_marker(input: &str) -> IResult<&str, &str> {
|
fn fold_marker(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
let (input, _) = space0(input)?;
|
let (input, _) = space0(input)?;
|
||||||
let (input, _) = perm_crlf(input)?;
|
let (input, _) = obs_crlf(input)?;
|
||||||
space1(input)
|
space1(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,17 +82,17 @@ fn fold_marker(input: &str) -> IResult<&str, &str> {
|
||||||
///
|
///
|
||||||
/// CFWS = (1*([FWS] comment) [FWS]) / FWS
|
/// CFWS = (1*([FWS] comment) [FWS]) / FWS
|
||||||
/// ```
|
/// ```
|
||||||
pub fn cfws(input: &str) -> IResult<&str, &str> {
|
pub fn cfws(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
alt((recognize(comments), recognize(fws)))(input)
|
alt((recognize(comments), recognize(fws)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn comments(input: &str) -> IResult<&str, ()> {
|
pub fn comments(input: &[u8]) -> IResult<&[u8], ()> {
|
||||||
let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
|
let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
|
||||||
let (input, _) = opt(fws)(input)?;
|
let (input, _) = opt(fws)(input)?;
|
||||||
Ok((input, ()))
|
Ok((input, ()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn comment(input: &str) -> IResult<&str, ()> {
|
pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
|
||||||
let (input, _) = tag("(")(input)?;
|
let (input, _) = tag("(")(input)?;
|
||||||
let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
|
let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
|
||||||
let (input, _) = opt(fws)(input)?;
|
let (input, _) = opt(fws)(input)?;
|
||||||
|
@ -103,12 +100,16 @@ pub fn comment(input: &str) -> IResult<&str, ()> {
|
||||||
Ok((input, ()))
|
Ok((input, ()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ccontent(input: &str) -> IResult<&str, &str> {
|
pub fn ccontent(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
alt((recognize(ctext), recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
|
alt((ctext, recognize(quoted_pair), recognize(encoded_word), recognize(comment)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ctext(input: &str) -> IResult<&str, char> {
|
pub fn ctext(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
satisfy(is_ctext)(input)
|
take_while1(is_ctext)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_ctext(c: u8) -> bool {
|
||||||
|
is_restr_ctext(c) || is_obs_no_ws_ctl(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if it's a comment text character
|
/// Check if it's a comment text character
|
||||||
|
@ -119,15 +120,10 @@ pub fn ctext(input: &str) -> IResult<&str, char> {
|
||||||
/// %d93-126 / ; "(", ")", or "\"
|
/// %d93-126 / ; "(", ")", or "\"
|
||||||
/// obs-ctext
|
/// obs-ctext
|
||||||
///```
|
///```
|
||||||
pub fn is_restr_ctext(c: char) -> bool {
|
pub fn is_restr_ctext(c: u8) -> bool {
|
||||||
(c >= '\x21' && c <= '\x27')
|
(c >= ascii::EXCLAMATION && c <= ascii::SQUOTE)
|
||||||
|| (c >= '\x2A' && c <= '\x5B')
|
|| (c >= ascii::ASTERISK && c <= ascii::LEFT_BRACKET)
|
||||||
|| (c >= '\x5D' && c <= '\x7E')
|
|| (c >= ascii::RIGHT_BRACKET && c <= ascii::TILDE)
|
||||||
|| !c.is_ascii()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_ctext(c: char) -> bool {
|
|
||||||
is_restr_ctext(c) || is_obs_no_ws_ctl(c)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// US ASCII control characters without effect
|
/// US ASCII control characters without effect
|
||||||
|
@ -139,12 +135,12 @@ pub fn is_ctext(c: char) -> bool {
|
||||||
/// %d14-31 / ; return, line feed, and
|
/// %d14-31 / ; return, line feed, and
|
||||||
/// %d127 ; white space characters
|
/// %d127 ; white space characters
|
||||||
/// ```
|
/// ```
|
||||||
pub fn is_obs_no_ws_ctl(c: char) -> bool {
|
pub fn is_obs_no_ws_ctl(c: u8) -> bool {
|
||||||
(c >= '\x01' && c <= '\x08')
|
(c >= ascii::SOH && c <= ascii::BS)
|
||||||
|| c == '\x0b'
|
|| c == ascii::VT
|
||||||
|| c == '\x0b'
|
|| c == ascii::FF
|
||||||
|| (c >= '\x0e' && c <= '\x1f')
|
|| (c >= ascii::SO && c <= ascii::US)
|
||||||
|| c == '\x7F'
|
|| c == ascii::DEL
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -152,10 +148,10 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_perm_crlf() {
|
fn test_obs_crlf() {
|
||||||
assert_eq!(perm_crlf("\rworld"), Ok(("world", "\r")));
|
assert_eq!(obs_crlf("\rworld"), Ok(("world", "\r")));
|
||||||
assert_eq!(perm_crlf("\r\nworld"), Ok(("world", "\r\n")));
|
assert_eq!(obs_crlf("\r\nworld"), Ok(("world", "\r\n")));
|
||||||
assert_eq!(perm_crlf("\nworld"), Ok(("world", "\n")));
|
assert_eq!(obs_crlf("\nworld"), Ok(("world", "\n")));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
133
src/text/words.rs
Normal file
133
src/text/words.rs
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
use crate::text::whitespace::cfws;
|
||||||
|
use crate::text::ascii;
|
||||||
|
use nom::{
|
||||||
|
bytes::complete::{tag, take_while1},
|
||||||
|
character::is_alphanumeric,
|
||||||
|
combinator::{opt, recognize},
|
||||||
|
multi::many0,
|
||||||
|
sequence::{delimited, pair},
|
||||||
|
IResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn is_vchar(c: u8) -> bool {
|
||||||
|
c >= ascii::EXCLAMATION && c <= ascii::TILDE
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MIME Token allowed characters
|
||||||
|
///
|
||||||
|
/// forbidden: ()<>@,;:\"/[]?=
|
||||||
|
fn is_mime_token_text(c: u8) -> bool {
|
||||||
|
is_alphanumeric(c)
|
||||||
|
|| c == ascii::EXCLAMATION
|
||||||
|
|| c == ascii::NUM
|
||||||
|
|| c == ascii::DOLLAR
|
||||||
|
|| c == ascii::PERCENT
|
||||||
|
|| c == ascii::AMPERSAND
|
||||||
|
|| c == ascii::SQUOTE
|
||||||
|
|| c == ascii::ASTERISK
|
||||||
|
|| c == ascii::PLUS
|
||||||
|
|| c == ascii::MINUS
|
||||||
|
|| c == ascii::PERIOD
|
||||||
|
|| c == ascii::CARRET
|
||||||
|
|| c == ascii::UNDERSCORE
|
||||||
|
|| c == ascii::GRAVE
|
||||||
|
|| c == ascii::LEFT_CURLY
|
||||||
|
|| c == ascii::PIPE
|
||||||
|
|| c == ascii::RIGHT_CURLY
|
||||||
|
|| c == ascii::TILDE
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MIME Token
|
||||||
|
///
|
||||||
|
/// `[CFWS] 1*token_text [CFWS]`
|
||||||
|
pub fn mime_token(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
delimited(opt(cfws), take_while1(is_mime_token_text), opt(cfws))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Atom allowed characters
|
||||||
|
///
|
||||||
|
/// authorized: !#$%&'*+-/=?^_`{|}~
|
||||||
|
fn is_atext(c: u8) -> bool {
|
||||||
|
is_alphanumeric(c)
|
||||||
|
|| c == ascii::EXCLAMATION
|
||||||
|
|| c == ascii::NUM
|
||||||
|
|| c == ascii::DOLLAR
|
||||||
|
|| c == ascii::PERCENT
|
||||||
|
|| c == ascii::AMPERSAND
|
||||||
|
|| c == ascii::SQUOTE
|
||||||
|
|| c == ascii::ASTERISK
|
||||||
|
|| c == ascii::PLUS
|
||||||
|
|| c == ascii::MINUS
|
||||||
|
|| c == ascii::SLASH
|
||||||
|
|| c == ascii::EQ
|
||||||
|
|| c == ascii::QUESTION
|
||||||
|
|| c == ascii::CARRET
|
||||||
|
|| c == ascii::UNDERSCORE
|
||||||
|
|| c == ascii::GRAVE
|
||||||
|
|| c == ascii::LEFT_CURLY
|
||||||
|
|| c == ascii::PIPE
|
||||||
|
|| c == ascii::RIGHT_CURLY
|
||||||
|
|| c == ascii::TILDE
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Atom
|
||||||
|
///
|
||||||
|
/// `[CFWS] 1*atext [CFWS]`
|
||||||
|
pub fn atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// dot-atom-text
|
||||||
|
///
|
||||||
|
/// `1*atext *("." 1*atext)`
|
||||||
|
pub fn dot_atom_text(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
recognize(pair(
|
||||||
|
take_while1(is_atext),
|
||||||
|
many0(pair(tag("."), take_while1(is_atext))),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// dot-atom
|
||||||
|
///
|
||||||
|
/// `[CFWS] dot-atom-text [CFWS]`
|
||||||
|
pub fn dot_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||||
|
delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_atext() {
|
||||||
|
assert!(is_atext('=' as u8));
|
||||||
|
assert!(is_atext('5' as u8));
|
||||||
|
assert!(is_atext('Q' as u8));
|
||||||
|
assert!(!is_atext(' ' as u8));
|
||||||
|
//assert!(is_atext('É')); // support utf8
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_atom() {
|
||||||
|
assert_eq!(
|
||||||
|
atom(b"(skip) imf_codec (hidden) aerogramme"),
|
||||||
|
Ok((&b"aerogramme"[..], &b"imf_codec"[..]))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dot_atom_text() {
|
||||||
|
assert_eq!(
|
||||||
|
dot_atom_text("quentin.dufour.io abcdef"),
|
||||||
|
Ok((" abcdef", "quentin.dufour.io"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dot_atom() {
|
||||||
|
assert_eq!(
|
||||||
|
dot_atom(" (skip) quentin.dufour.io abcdef"),
|
||||||
|
Ok(("abcdef", "quentin.dufour.io"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
129
tests/enron.rs
129
tests/enron.rs
|
@ -1,129 +0,0 @@
|
||||||
use imf_codec::fragments::section;
|
|
||||||
use imf_codec::multipass;
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::Read;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use walkdir::WalkDir;
|
|
||||||
|
|
||||||
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
|
||||||
where
|
|
||||||
F: FnOnce(§ion::Section) -> (),
|
|
||||||
{
|
|
||||||
let seg = multipass::segment::new(input).unwrap();
|
|
||||||
let charset = seg.charset();
|
|
||||||
let fields = charset.fields().unwrap();
|
|
||||||
let field_names = fields.names();
|
|
||||||
let field_body = field_names.body();
|
|
||||||
let section = field_body.section();
|
|
||||||
|
|
||||||
func(§ion.fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[ignore]
|
|
||||||
fn test_enron500k() {
|
|
||||||
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
||||||
d.push("resources/enron/maildir/");
|
|
||||||
let prefix_sz = d.as_path().to_str().unwrap().len();
|
|
||||||
//d.push("williams-w3/");
|
|
||||||
|
|
||||||
let known_bad_fields = HashSet::from([
|
|
||||||
"white-s/calendar/113.", // To: east <7..>
|
|
||||||
"skilling-j/inbox/223.", // From: pep <performance.>
|
|
||||||
"jones-t/all_documents/9806.", // To: <"tibor.vizkelety":@enron.com>
|
|
||||||
"jones-t/notes_inbox/3303.", // To: <"tibor.vizkelety":@enron.com>
|
|
||||||
"lokey-t/calendar/33.", // A second Date entry for the calendar containing
|
|
||||||
// Date: Monday, March 12
|
|
||||||
"zipper-a/inbox/199.", // To: e-mail <mari.>
|
|
||||||
"dasovich-j/deleted_items/128.", // To: f62489 <g>
|
|
||||||
"dasovich-j/all_documents/677.", // To: w/assts <govt.>
|
|
||||||
"dasovich-j/all_documents/8984.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"dasovich-j/all_documents/3514.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"dasovich-j/all_documents/4467.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"dasovich-j/all_documents/578.", // To: w/assts <govt.>
|
|
||||||
"dasovich-j/all_documents/3148.", // To: <"economist.com.readers":@enron.com>
|
|
||||||
"dasovich-j/all_documents/9953.", // To: <"economist.com.reader":@enron.com>
|
|
||||||
"dasovich-j/risk_analytics/3.", // To: w/assts <govt.>
|
|
||||||
"dasovich-j/notes_inbox/5391.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"dasovich-j/notes_inbox/4952.", // To: <"economist.com.reader":@enron.com>
|
|
||||||
"dasovich-j/notes_inbox/2386.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"dasovich-j/notes_inbox/1706.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"dasovich-j/notes_inbox/1489.", // To: <"economist.com.readers":@enron.com>
|
|
||||||
"dasovich-j/notes_inbox/5.", // To: w/assts <govt.>
|
|
||||||
"kaminski-v/sites/19.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/sites/1.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/discussion_threads/5082.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"kaminski-v/discussion_threads/4046.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/discussion_threads/4187.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/discussion_threads/8068.", // To: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
|
||||||
"kaminski-v/discussion_threads/7980.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
|
|
||||||
"kaminski-v/all_documents/5970.", //To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
|
|
||||||
"kaminski-v/all_documents/5838.", // To + Cc: dogs <breakthrough.>, breakthrough.adm@enron.com, breakthrough.adm@enron.com,\r\n\tbreakthrough.adm@enron.com
|
|
||||||
"kaminski-v/all_documents/10070.", // To: <"ft.com.users":@enron.com>
|
|
||||||
"kaminski-v/all_documents/92.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/all_documents/276.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/technical/1.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/technical/7.", // To: <"the.desk":@enron.com>
|
|
||||||
"kaminski-v/notes_inbox/140.", // To: dogs <breakthrough.>, cats <breaktkhrough.>, risk <breakthrough.>,\r\n\tleaders <breaktkhrough.>
|
|
||||||
"kaminski-v/notes_inbox/95.", // To + CC failed: cats <breaktkhrough.>, risk <breakthrough.>, leaders <breaktkhrough.>
|
|
||||||
"kean-s/archiving/untitled/1232.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
|
||||||
"kean-s/archiving/untitled/1688.", // To: w/assts <govt.>
|
|
||||||
"kean-s/sent/198.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
|
||||||
"kean-s/reg_risk/9.", // To: w/assts <govt.>
|
|
||||||
"kean-s/discussion_threads/950.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
|
||||||
"kean-s/discussion_threads/577.", // To: w/assts <govt.>
|
|
||||||
"kean-s/calendar/untitled/1096.", // To: w/assts <govt.>, mark.palmer@enron.com, karen.denne@enron.com
|
|
||||||
"kean-s/calendar/untitled/640.", // To: w/assts <govt.>
|
|
||||||
"kean-s/all_documents/640.", // To: w/assts <govt.>
|
|
||||||
"kean-s/all_documents/1095.", // To: w/assts <govt.>
|
|
||||||
"kean-s/attachments/2030.", // To: w/assts <govt.>
|
|
||||||
"williams-w3/operations_committee_isas/10.", // To: z34655 <m>
|
|
||||||
]);
|
|
||||||
|
|
||||||
let known_bad_from = HashSet::from([
|
|
||||||
"skilling-j/inbox/223.", // From: pep <performance.>
|
|
||||||
]);
|
|
||||||
|
|
||||||
let mut i = 0;
|
|
||||||
for entry in WalkDir::new(d.as_path())
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|file| file.ok())
|
|
||||||
{
|
|
||||||
if entry.metadata().unwrap().is_file() {
|
|
||||||
let mail_path = entry.path();
|
|
||||||
let suffix = &mail_path.to_str().unwrap()[prefix_sz..];
|
|
||||||
|
|
||||||
// read file
|
|
||||||
let mut raw = Vec::new();
|
|
||||||
let mut f = File::open(mail_path).unwrap();
|
|
||||||
f.read_to_end(&mut raw).unwrap();
|
|
||||||
|
|
||||||
// parse
|
|
||||||
parser(&raw, |hdrs| {
|
|
||||||
let ok_date = hdrs.date.is_some();
|
|
||||||
let ok_from = hdrs.from.len() > 0;
|
|
||||||
let ok_fields = hdrs.bad_fields.len() == 0;
|
|
||||||
|
|
||||||
if !ok_date || !ok_from || !ok_fields {
|
|
||||||
println!("Issue with: {}", suffix);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert!(ok_date);
|
|
||||||
|
|
||||||
if !known_bad_from.contains(suffix) {
|
|
||||||
assert!(ok_from);
|
|
||||||
}
|
|
||||||
|
|
||||||
if !known_bad_fields.contains(suffix) {
|
|
||||||
assert!(ok_fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
i += 1;
|
|
||||||
if i % 1000 == 0 {
|
|
||||||
println!("Analyzed emails: {}", i);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
340
tests/known.rs
340
tests/known.rs
|
@ -1,340 +0,0 @@
|
||||||
use chrono::{FixedOffset, TimeZone};
|
|
||||||
use imf_codec::fragments::{misc_token, model, section, part, trace};
|
|
||||||
use imf_codec::multipass;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
fn parser<'a, F>(input: &'a [u8], func: F) -> ()
|
|
||||||
where
|
|
||||||
F: FnOnce(§ion::Section) -> (),
|
|
||||||
{
|
|
||||||
let seg = multipass::segment::new(input).unwrap();
|
|
||||||
let charset = seg.charset();
|
|
||||||
let fields = charset.fields().unwrap();
|
|
||||||
let field_names = fields.names();
|
|
||||||
let field_body = field_names.body();
|
|
||||||
let section = field_body.section();
|
|
||||||
|
|
||||||
func(§ion.fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_headers() {
|
|
||||||
let fullmail: &[u8] = r#"Return-Path: <gitlab@example.com>
|
|
||||||
Delivered-To: quentin@example.com
|
|
||||||
Received: from smtp.example.com ([10.83.2.2])
|
|
||||||
by doradille with LMTP
|
|
||||||
id xyzabcd
|
|
||||||
(envelope-from <gitlab@example.com>)
|
|
||||||
for <quentin@example.com>; Tue, 13 Jun 2023 19:01:08 +0000
|
|
||||||
Date: Tue, 13 Jun 2023 10:01:10 +0200
|
|
||||||
From: Mary Smith
|
|
||||||
<mary@example.net>, "A\lan" <alan@example>
|
|
||||||
Sender: imf@example.com
|
|
||||||
Reply-To: "Mary Smith: Personal Account" <smith@home.example>
|
|
||||||
To: John Doe <jdoe@machine.example>
|
|
||||||
Cc: imf2@example.com
|
|
||||||
Bcc: (hidden)
|
|
||||||
Subject: Re: Saying Hello
|
|
||||||
Comments: A simple message
|
|
||||||
Comments: Not that complicated
|
|
||||||
comments : not valid header name but should be accepted
|
|
||||||
by the parser.
|
|
||||||
Keywords: hello, world
|
|
||||||
Héron: Raté
|
|
||||||
Raté raté
|
|
||||||
Keywords: salut, le, monde
|
|
||||||
Not a real header but should still recover
|
|
||||||
Message-ID: <3456@example.net>
|
|
||||||
In-Reply-To: <1234@local.machine.example>
|
|
||||||
References: <1234@local.machine.example>
|
|
||||||
Unknown: unknown
|
|
||||||
|
|
||||||
This is a reply to your hello.
|
|
||||||
"#
|
|
||||||
.as_bytes();
|
|
||||||
parser(fullmail, |parsed_section| {
|
|
||||||
assert_eq!(
|
|
||||||
parsed_section,
|
|
||||||
§ion::Section {
|
|
||||||
date: Some(
|
|
||||||
&FixedOffset::east_opt(2 * 3600)
|
|
||||||
.unwrap()
|
|
||||||
.with_ymd_and_hms(2023, 06, 13, 10, 01, 10)
|
|
||||||
.unwrap()
|
|
||||||
),
|
|
||||||
|
|
||||||
from: vec![
|
|
||||||
&model::MailboxRef {
|
|
||||||
name: Some("Mary Smith".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "mary".into(),
|
|
||||||
domain: "example.net".into(),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
&model::MailboxRef {
|
|
||||||
name: Some("Alan".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "alan".into(),
|
|
||||||
domain: "example".into(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
|
|
||||||
sender: Some(&model::MailboxRef {
|
|
||||||
name: None,
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "imf".into(),
|
|
||||||
domain: "example.com".into(),
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
|
|
||||||
reply_to: vec![&model::AddressRef::Single(model::MailboxRef {
|
|
||||||
name: Some("Mary Smith: Personal Account".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "smith".into(),
|
|
||||||
domain: "home.example".into(),
|
|
||||||
}
|
|
||||||
})],
|
|
||||||
|
|
||||||
to: vec![&model::AddressRef::Single(model::MailboxRef {
|
|
||||||
name: Some("John Doe".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "jdoe".into(),
|
|
||||||
domain: "machine.example".into(),
|
|
||||||
}
|
|
||||||
})],
|
|
||||||
|
|
||||||
cc: vec![&model::AddressRef::Single(model::MailboxRef {
|
|
||||||
name: None,
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "imf2".into(),
|
|
||||||
domain: "example.com".into(),
|
|
||||||
}
|
|
||||||
})],
|
|
||||||
|
|
||||||
bcc: vec![],
|
|
||||||
|
|
||||||
msg_id: Some(&model::MessageId {
|
|
||||||
left: "3456",
|
|
||||||
right: "example.net"
|
|
||||||
}),
|
|
||||||
in_reply_to: vec![&model::MessageId {
|
|
||||||
left: "1234",
|
|
||||||
right: "local.machine.example"
|
|
||||||
}],
|
|
||||||
references: vec![&model::MessageId {
|
|
||||||
left: "1234",
|
|
||||||
right: "local.machine.example"
|
|
||||||
}],
|
|
||||||
|
|
||||||
subject: Some(&misc_token::Unstructured("Re: Saying Hello".into())),
|
|
||||||
|
|
||||||
comments: vec![
|
|
||||||
&misc_token::Unstructured("A simple message".into()),
|
|
||||||
&misc_token::Unstructured("Not that complicated".into()),
|
|
||||||
&misc_token::Unstructured(
|
|
||||||
"not valid header name but should be accepted by the parser.".into()
|
|
||||||
),
|
|
||||||
],
|
|
||||||
|
|
||||||
keywords: vec![
|
|
||||||
&misc_token::PhraseList(vec!["hello".into(), "world".into(),]),
|
|
||||||
&misc_token::PhraseList(vec!["salut".into(), "le".into(), "monde".into(),]),
|
|
||||||
],
|
|
||||||
|
|
||||||
received: vec![&trace::ReceivedLog(
|
|
||||||
r#"from smtp.example.com ([10.83.2.2])
|
|
||||||
by doradille with LMTP
|
|
||||||
id xyzabcd
|
|
||||||
(envelope-from <gitlab@example.com>)
|
|
||||||
for <quentin@example.com>"#
|
|
||||||
)],
|
|
||||||
|
|
||||||
return_path: vec![&model::MailboxRef {
|
|
||||||
name: None,
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "gitlab".into(),
|
|
||||||
domain: "example.com".into(),
|
|
||||||
}
|
|
||||||
}],
|
|
||||||
|
|
||||||
optional: HashMap::from([
|
|
||||||
(
|
|
||||||
"Delivered-To",
|
|
||||||
&misc_token::Unstructured("quentin@example.com".into())
|
|
||||||
),
|
|
||||||
("Unknown", &misc_token::Unstructured("unknown".into())),
|
|
||||||
]),
|
|
||||||
|
|
||||||
bad_fields: vec![],
|
|
||||||
|
|
||||||
unparsed: vec![
|
|
||||||
"Héron: Raté\n Raté raté\n",
|
|
||||||
"Not a real header but should still recover\n",
|
|
||||||
],
|
|
||||||
..section::Section::default()
|
|
||||||
}
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_headers_mime() {
|
|
||||||
use imf_codec::fragments::mime;
|
|
||||||
let fullmail: &[u8] = r#"From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
|
|
||||||
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
|
|
||||||
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
|
|
||||||
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
|
|
||||||
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=ISO-8859-1
|
|
||||||
Content-Transfer-Encoding: quoted-printable
|
|
||||||
Content-ID: <a@example.com>
|
|
||||||
Content-Description: hello
|
|
||||||
|
|
||||||
Now's the time =
|
|
||||||
for all folk to come=
|
|
||||||
to the aid of their country.
|
|
||||||
"#
|
|
||||||
.as_bytes();
|
|
||||||
|
|
||||||
parser(fullmail, |parsed_section| {
|
|
||||||
assert_eq!(
|
|
||||||
parsed_section,
|
|
||||||
§ion::Section {
|
|
||||||
from: vec![
|
|
||||||
&model::MailboxRef {
|
|
||||||
name: Some("Keith Moore".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "moore".into(),
|
|
||||||
domain: "cs.utk.edu".into(),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
],
|
|
||||||
|
|
||||||
to: vec![&model::AddressRef::Single(model::MailboxRef {
|
|
||||||
name: Some("Keld Jørn Simonsen".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "keld".into(),
|
|
||||||
domain: "dkuug.dk".into(),
|
|
||||||
}
|
|
||||||
})],
|
|
||||||
|
|
||||||
cc: vec![&model::AddressRef::Single(model::MailboxRef {
|
|
||||||
name: Some("André Pirard".into()),
|
|
||||||
addrspec: model::AddrSpec {
|
|
||||||
local_part: "PIRARD".into(),
|
|
||||||
domain: "vm1.ulg.ac.be".into(),
|
|
||||||
}
|
|
||||||
})],
|
|
||||||
|
|
||||||
subject: Some(&misc_token::Unstructured("If you can read this you understand the example.".into())),
|
|
||||||
mime_version: Some(&mime::Version{ major: 1, minor: 0 }),
|
|
||||||
mime: section::MIMESection {
|
|
||||||
content_type: Some(&mime::Type::Text(mime::TextDesc {
|
|
||||||
charset: Some(mime::EmailCharset::ISO_8859_1),
|
|
||||||
subtype: mime::TextSubtype::Plain,
|
|
||||||
unknown_parameters: vec![]
|
|
||||||
})),
|
|
||||||
content_transfer_encoding: Some(&mime::Mechanism::QuotedPrintable),
|
|
||||||
content_id: Some(&model::MessageId {
|
|
||||||
left: "a",
|
|
||||||
right: "example.com"
|
|
||||||
}),
|
|
||||||
content_description: Some(&misc_token::Unstructured("hello".into())),
|
|
||||||
..section::MIMESection::default()
|
|
||||||
},
|
|
||||||
..section::Section::default()
|
|
||||||
}
|
|
||||||
);
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parser_bodystruct<'a, F>(input: &'a [u8], func: F) -> ()
|
|
||||||
where
|
|
||||||
F: FnOnce(&part::PartNode) -> (),
|
|
||||||
{
|
|
||||||
let seg = multipass::segment::new(input).unwrap();
|
|
||||||
let charset = seg.charset();
|
|
||||||
let fields = charset.fields().unwrap();
|
|
||||||
let field_names = fields.names();
|
|
||||||
let field_body = field_names.body();
|
|
||||||
let section = field_body.section();
|
|
||||||
let bodystruct = section.body_structure();
|
|
||||||
|
|
||||||
func(&bodystruct.body);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_multipart() {
|
|
||||||
let fullmail: &[u8] = r#"Date: Sat, 8 Jul 2023 07:14:29 +0200
|
|
||||||
From: Grrrnd Zero <grrrndzero@example.org>
|
|
||||||
To: John Doe <jdoe@machine.example>
|
|
||||||
Subject: Re: Saying Hello
|
|
||||||
Message-ID: <NTAxNzA2AC47634Y366BAMTY4ODc5MzQyODY0ODY5@www.grrrndzero.org>
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: multipart/alternative;
|
|
||||||
boundary="b1_e376dc71bafc953c0b0fdeb9983a9956"
|
|
||||||
Content-Transfer-Encoding: 7bit
|
|
||||||
|
|
||||||
This is a multi-part message in MIME format.
|
|
||||||
|
|
||||||
--b1_e376dc71bafc953c0b0fdeb9983a9956
|
|
||||||
Content-Type: text/plain; charset=utf-8
|
|
||||||
Content-Transfer-Encoding: quoted-printable
|
|
||||||
|
|
||||||
GZ
|
|
||||||
OoOoO
|
|
||||||
oOoOoOoOo
|
|
||||||
oOoOoOoOoOoOoOoOo
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOo
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
|
|
||||||
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
|
|
||||||
|
|
||||||
--b1_e376dc71bafc953c0b0fdeb9983a9956
|
|
||||||
Content-Type: text/html; charset=us-ascii
|
|
||||||
|
|
||||||
<div style="text-align: center;"><strong>GZ</strong><br />
|
|
||||||
OoOoO<br />
|
|
||||||
oOoOoOoOo<br />
|
|
||||||
oOoOoOoOoOoOoOoOo<br />
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOo<br />
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
|
|
||||||
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
|
|
||||||
|
|
||||||
--b1_e376dc71bafc953c0b0fdeb9983a9956--
|
|
||||||
"#.as_bytes();
|
|
||||||
|
|
||||||
parser_bodystruct(fullmail, |part| {
|
|
||||||
assert_eq!(part, &part::PartNode::Composite(
|
|
||||||
part::PartHeader {
|
|
||||||
..part::PartHeader::default()
|
|
||||||
},
|
|
||||||
vec![
|
|
||||||
part::PartNode::Discrete(
|
|
||||||
part::PartHeader {
|
|
||||||
..part::PartHeader::default()
|
|
||||||
},
|
|
||||||
r#"GZ
|
|
||||||
OoOoO
|
|
||||||
oOoOoOoOo
|
|
||||||
oOoOoOoOoOoOoOoOo
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOo
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo
|
|
||||||
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO"#.as_bytes()
|
|
||||||
),
|
|
||||||
part::PartNode::Discrete(
|
|
||||||
part::PartHeader {
|
|
||||||
..part::PartHeader::default()
|
|
||||||
},
|
|
||||||
r#"<div style="text-align: center;"><strong>GZ</strong><br />
|
|
||||||
OoOoO<br />
|
|
||||||
oOoOoOoOo<br />
|
|
||||||
oOoOoOoOoOoOoOoOo<br />
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOo<br />
|
|
||||||
oOoOoOoOoOoOoOoOoOoOoOoOoOoOo<br />
|
|
||||||
OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />"#.as_bytes()
|
|
||||||
),
|
|
||||||
]));
|
|
||||||
});
|
|
||||||
}
|
|
Loading…
Reference in a new issue