Fix envelope + rfc822 message size+line number

This commit is contained in:
Quentin 2022-07-06 18:42:37 +02:00
parent cba06269ab
commit 82c7f5909f
Signed by: quentin
GPG key ID: E9602264D639FF68
6 changed files with 134 additions and 36 deletions

3
Cargo.lock generated
View file

@ -1335,8 +1335,7 @@ dependencies = [
[[package]] [[package]]
name = "mail-parser" name = "mail-parser"
version = "0.4.8" version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/superboum/mail-parser?branch=feature/last_part_offset#c227527d2613d33ea1342f85c635134134222736"
checksum = "c46a841ae5276aba5218ade7bb76896358f9f95a925c7b3deea6a0ec0fb8e2a7"
dependencies = [ dependencies = [
"encoding_rs", "encoding_rs",
"serde", "serde",

View file

@ -20,7 +20,7 @@ itertools = "0.10"
lazy_static = "1.4" lazy_static = "1.4"
ldap3 = { version = "0.10", default-features = false, features = ["tls"] } ldap3 = { version = "0.10", default-features = false, features = ["tls"] }
log = "0.4" log = "0.4"
mail-parser = "0.4.8" mail-parser = { git = "https://github.com/superboum/mail-parser", branch = "feature/last_part_offset" }
pretty_env_logger = "0.4" pretty_env_logger = "0.4"
rusoto_core = "0.48.0" rusoto_core = "0.48.0"
rusoto_credential = "0.48.0" rusoto_credential = "0.48.0"

View file

@ -1,5 +1,6 @@
use std::borrow::{Borrow, Cow}; use std::borrow::{Borrow, Cow};
use std::collections::HashMap; use std::collections::HashMap;
use std::io::{BufRead, Cursor};
use std::num::NonZeroU32; use std::num::NonZeroU32;
use std::sync::Arc; use std::sync::Arc;
@ -422,7 +423,30 @@ fn string_to_flag(f: &str) -> Option<Flag> {
} }
} }
/// Envelope rules are defined in RFC 3501, section 7.4.2
/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
///
/// Some important notes:
///
/// If the Sender or Reply-To lines are absent in the [RFC-2822]
/// header, or are present but empty, the server sets the
/// corresponding member of the envelope to be the same value as
/// the from member (the client is not expected to know to do
/// this). Note: [RFC-2822] requires that all messages have a valid
/// From header. Therefore, the from, sender, and reply-to
/// members in the envelope can not be NIL.
///
/// If the Date, Subject, In-Reply-To, and Message-ID header lines
/// are absent in the [RFC-2822] header, the corresponding member
/// of the envelope is NIL; if these header lines are present but
/// empty the corresponding member of the envelope is the empty
/// string.
//@FIXME return an error if the envelope is invalid instead of panicking
//@FIXME some fields must be defaulted if there are not set.
fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope { fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope {
let from = convert_addresses(msg.get_from()).unwrap_or(vec![]);
Envelope { Envelope {
date: NString( date: NString(
msg.get_date() msg.get_date()
@ -432,13 +456,13 @@ fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope {
msg.get_subject() msg.get_subject()
.map(|d| IString::try_from(d.to_string()).unwrap()), .map(|d| IString::try_from(d.to_string()).unwrap()),
), ),
from: convert_addresses(msg.get_from()), from: from.clone(),
sender: convert_addresses(msg.get_sender()), sender: convert_addresses(msg.get_sender()).unwrap_or(from.clone()),
reply_to: convert_addresses(msg.get_reply_to()), reply_to: convert_addresses(msg.get_reply_to()).unwrap_or(from.clone()),
to: convert_addresses(msg.get_to()), to: convert_addresses(msg.get_to()).unwrap_or(vec![]),
cc: convert_addresses(msg.get_cc()), cc: convert_addresses(msg.get_cc()).unwrap_or(vec![]),
bcc: convert_addresses(msg.get_bcc()), bcc: convert_addresses(msg.get_bcc()).unwrap_or(vec![]),
in_reply_to: NString(None), // TODO in_reply_to: NString(None), // @TODO
message_id: NString( message_id: NString(
msg.get_message_id() msg.get_message_id()
.map(|d| IString::try_from(d.to_string()).unwrap()), .map(|d| IString::try_from(d.to_string()).unwrap()),
@ -446,28 +470,27 @@ fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope {
} }
} }
fn convert_addresses(a: &mail_parser::HeaderValue<'_>) -> Vec<Address> { fn convert_addresses(a: &mail_parser::HeaderValue<'_>) -> Option<Vec<Address>> {
match a { match a {
mail_parser::HeaderValue::Address(a) => vec![convert_address(a)], mail_parser::HeaderValue::Address(a) => Some(vec![convert_address(a)]),
mail_parser::HeaderValue::AddressList(a) => { mail_parser::HeaderValue::AddressList(l) => {
let mut ret = vec![]; Some(l.iter().map(|a| convert_address(a)).collect())
for aa in a {
ret.push(convert_address(aa));
}
ret
} }
mail_parser::HeaderValue::Empty => vec![], mail_parser::HeaderValue::Empty => None,
mail_parser::HeaderValue::Collection(c) => { mail_parser::HeaderValue::Collection(c) => Some(
let mut ret = vec![]; c.iter()
for cc in c.iter() { .map(|l| convert_addresses(l).unwrap_or(vec![]))
ret.extend(convert_addresses(cc).into_iter()); .flatten()
} .collect(),
ret ),
_ => {
tracing::warn!("Invalid address header");
None
} }
_ => panic!("Invalid address header"),
} }
} }
//@FIXME Remove unwrap
fn convert_address(a: &mail_parser::Addr<'_>) -> Address { fn convert_address(a: &mail_parser::Addr<'_>) -> Address {
let (user, host) = match &a.address { let (user, host) = match &a.address {
None => (None, None), None => (None, None),
@ -483,6 +506,8 @@ fn convert_address(a: &mail_parser::Addr<'_>) -> Address {
.as_ref() .as_ref()
.map(|x| IString::try_from(x.to_string()).unwrap()), .map(|x| IString::try_from(x.to_string()).unwrap()),
), ),
// SMTP at-domain-list (source route) seems obsolete since at least 1991
// https://www.mhonarc.org/archive/html/ietf-822/1991-06/msg00060.html
NString(None), NString(None),
NString(user.map(|x| IString::try_from(x).unwrap())), NString(user.map(|x| IString::try_from(x).unwrap())),
NString(host.map(|x| IString::try_from(x).unwrap())), NString(host.map(|x| IString::try_from(x).unwrap())),
@ -550,15 +575,82 @@ fn build_imap_email_struct<'a>(
extension: None, extension: None,
}) })
} }
MessagePart::Binary(_) | MessagePart::InlineBinary(_) => { MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => {
/* let (_, mut basic) = headers_to_basic_fields(bp)?;
* Note also that a subtype specification is MANDATORY -- it may not be
* omitted from a Content-Type header field. As such, there are no let ct = msg
* default subtypes. .get_content_type()
*/ .ok_or(anyhow!("Content-Type is missing but required here."))?;
todo!()
let type_ =
IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| {
anyhow!("Unable to build IString from given Content-Type type given")
})?;
let subtype = IString::try_from(
ct.c_subtype
.as_ref()
.ok_or(anyhow!("Content-Type invalid, missing subtype"))?
.to_string(),
)
.map_err(|_| {
anyhow!("Unable to build IString from given Content-Type subtype given")
})?;
Ok(BodyStructure::Single {
body: FetchBody {
basic,
specific: SpecificFields::Basic { type_, subtype },
},
extension: None,
})
}
MessagePart::Message(bp) => {
let (_, mut basic) = headers_to_basic_fields(bp)?;
// @NOTE in some cases mail-parser does not parse the MessageAttachment but
// provide it as raw body. Using `as_ref()` masks this fact: if the message is
// parsed, as_ref() will return None. But by looking quickly at the code, it
// seems that the attachment is not parsed when mail-parser encounters some
// encoding problems, so it might be better to trust mail-parser.
let inner = bp
.get_body()
.as_ref()
.ok_or(anyhow!("Unable to parse inner message."))?;
// @NOTE mail-parser does not provide enough information to compute the end of the
// message. The offset_end value wrongly includes the multipart delimiter,
// which lead to incorrect line count and body size.
// I have patched the lib to add a new offset type named last_part that take
// into account this fact. After that, we need to do some maths...
let len = inner.offset_last_part - inner.offset_header;
let raw_msg = &inner.raw_message[..len];
basic.size = u32::try_from(len)?;
Ok(BodyStructure::Single {
body: FetchBody {
basic,
specific: SpecificFields::Message {
envelope: message_envelope(inner),
body_structure: Box::new(build_imap_email_struct(
inner,
&inner.structure,
)?),
// @FIXME This solution is bad for 2 reasons:
// - RFC2045 says line endings are CRLF but we accept LF alone with
// this method. It could be a feature (be liberal in what you
// accept) but we must be sure that we don't break things.
// - It should be done during parsing, we are iterating twice on
// the same data which results in some wastes.
number_of_lines: u32::try_from(
Cursor::new(raw_msg.as_ref()).lines().count(),
)?,
},
},
extension: None,
})
} }
MessagePart::Message(_) => todo!(),
} }
} }
MessageStructure::List(lp) => { MessageStructure::List(lp) => {
@ -746,6 +838,8 @@ mod tests {
"tests/emails/dxflrs/0001_simple", "tests/emails/dxflrs/0001_simple",
"tests/emails/dxflrs/0002_mime", "tests/emails/dxflrs/0002_mime",
"tests/emails/dxflrs/0003_mime-in-mime", "tests/emails/dxflrs/0003_mime-in-mime",
"tests/emails/dxflrs/0004_msg-in-msg", // broken
//"tests/emails/dxflrs/0005_mail-parser-readme", // broken
]; ];
for pref in prefixes.iter() { for pref in prefixes.iter() {

View file

@ -1 +1 @@
(BODY (("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1) 4) "mixed")) (BODY (("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 9 1)("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1) 4) "mixed"))

View file

@ -1 +1 @@
(BODYSTRUCTURE (("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1 NIL NIL NIL NIL) 4 NIL NIL NIL NIL) "mixed" ("boundary" "delim") NIL NIL NIL)) (BODYSTRUCTURE (("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 9 1 NIL NIL NIL NIL)("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1 NIL NIL NIL NIL) 4 NIL NIL NIL NIL) "mixed" ("boundary" "delim") NIL NIL NIL))

View file

@ -2,6 +2,11 @@ From: Garage team <garagehq@deuxfleurs.fr>
Content-Type: multipart/mixed; boundary="delim"; Content-Type: multipart/mixed; boundary="delim";
Subject: Welcome to Aerogramme!! Subject: Welcome to Aerogramme!!
--delim
Content-Type: text/plain; charset="us-ascii"
Hello 1
--delim --delim
Content-Type: message/rfc822 Content-Type: message/rfc822