From 82c7f5909f0c3e07d0dbd4395d54121091aab267 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Wed, 6 Jul 2022 18:42:37 +0200 Subject: [PATCH] Fix envelope + rfc822 message size+line number --- Cargo.lock | 3 +- Cargo.toml | 2 +- src/imap/mailbox_view.rs | 156 ++++++++++++++---- tests/emails/dxflrs/0004_msg-in-msg.body | 2 +- .../dxflrs/0004_msg-in-msg.bodystructure | 2 +- tests/emails/dxflrs/0004_msg-in-msg.eml | 5 + 6 files changed, 134 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf39121..d21e88b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1335,8 +1335,7 @@ dependencies = [ [[package]] name = "mail-parser" version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c46a841ae5276aba5218ade7bb76896358f9f95a925c7b3deea6a0ec0fb8e2a7" +source = "git+https://github.com/superboum/mail-parser?branch=feature/last_part_offset#c227527d2613d33ea1342f85c635134134222736" dependencies = [ "encoding_rs", "serde", diff --git a/Cargo.toml b/Cargo.toml index ac3e641..3f16605 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ itertools = "0.10" lazy_static = "1.4" ldap3 = { version = "0.10", default-features = false, features = ["tls"] } log = "0.4" -mail-parser = "0.4.8" +mail-parser = { git = "https://github.com/superboum/mail-parser", branch = "feature/last_part_offset" } pretty_env_logger = "0.4" rusoto_core = "0.48.0" rusoto_credential = "0.48.0" diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index f1d3df7..7ce10b3 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -1,5 +1,6 @@ use std::borrow::{Borrow, Cow}; use std::collections::HashMap; +use std::io::{BufRead, Cursor}; use std::num::NonZeroU32; use std::sync::Arc; @@ -422,7 +423,30 @@ fn string_to_flag(f: &str) -> Option { } } +/// Envelope rules are defined in RFC 3501, section 7.4.2 +/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2 +/// +/// Some important notes: +/// +/// If the Sender or Reply-To lines are absent in the [RFC-2822] +/// header, or are present but empty, the server sets the +/// corresponding member of the envelope to be the same value as +/// the from member (the client is not expected to know to do +/// this). Note: [RFC-2822] requires that all messages have a valid +/// From header. Therefore, the from, sender, and reply-to +/// members in the envelope can not be NIL. +/// +/// If the Date, Subject, In-Reply-To, and Message-ID header lines +/// are absent in the [RFC-2822] header, the corresponding member +/// of the envelope is NIL; if these header lines are present but +/// empty the corresponding member of the envelope is the empty +/// string. + +//@FIXME return an error if the envelope is invalid instead of panicking +//@FIXME some fields must be defaulted if there are not set. fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope { + let from = convert_addresses(msg.get_from()).unwrap_or(vec![]); + Envelope { date: NString( msg.get_date() @@ -432,13 +456,13 @@ fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope { msg.get_subject() .map(|d| IString::try_from(d.to_string()).unwrap()), ), - from: convert_addresses(msg.get_from()), - sender: convert_addresses(msg.get_sender()), - reply_to: convert_addresses(msg.get_reply_to()), - to: convert_addresses(msg.get_to()), - cc: convert_addresses(msg.get_cc()), - bcc: convert_addresses(msg.get_bcc()), - in_reply_to: NString(None), // TODO + from: from.clone(), + sender: convert_addresses(msg.get_sender()).unwrap_or(from.clone()), + reply_to: convert_addresses(msg.get_reply_to()).unwrap_or(from.clone()), + to: convert_addresses(msg.get_to()).unwrap_or(vec![]), + cc: convert_addresses(msg.get_cc()).unwrap_or(vec![]), + bcc: convert_addresses(msg.get_bcc()).unwrap_or(vec![]), + in_reply_to: NString(None), // @TODO message_id: NString( msg.get_message_id() .map(|d| IString::try_from(d.to_string()).unwrap()), @@ -446,28 +470,27 @@ fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope { } } -fn convert_addresses(a: &mail_parser::HeaderValue<'_>) -> Vec
{ +fn convert_addresses(a: &mail_parser::HeaderValue<'_>) -> Option> { match a { - mail_parser::HeaderValue::Address(a) => vec![convert_address(a)], - mail_parser::HeaderValue::AddressList(a) => { - let mut ret = vec![]; - for aa in a { - ret.push(convert_address(aa)); - } - ret + mail_parser::HeaderValue::Address(a) => Some(vec![convert_address(a)]), + mail_parser::HeaderValue::AddressList(l) => { + Some(l.iter().map(|a| convert_address(a)).collect()) } - mail_parser::HeaderValue::Empty => vec![], - mail_parser::HeaderValue::Collection(c) => { - let mut ret = vec![]; - for cc in c.iter() { - ret.extend(convert_addresses(cc).into_iter()); - } - ret + mail_parser::HeaderValue::Empty => None, + mail_parser::HeaderValue::Collection(c) => Some( + c.iter() + .map(|l| convert_addresses(l).unwrap_or(vec![])) + .flatten() + .collect(), + ), + _ => { + tracing::warn!("Invalid address header"); + None } - _ => panic!("Invalid address header"), } } +//@FIXME Remove unwrap fn convert_address(a: &mail_parser::Addr<'_>) -> Address { let (user, host) = match &a.address { None => (None, None), @@ -483,6 +506,8 @@ fn convert_address(a: &mail_parser::Addr<'_>) -> Address { .as_ref() .map(|x| IString::try_from(x.to_string()).unwrap()), ), + // SMTP at-domain-list (source route) seems obsolete since at least 1991 + // https://www.mhonarc.org/archive/html/ietf-822/1991-06/msg00060.html NString(None), NString(user.map(|x| IString::try_from(x).unwrap())), NString(host.map(|x| IString::try_from(x).unwrap())), @@ -550,15 +575,82 @@ fn build_imap_email_struct<'a>( extension: None, }) } - MessagePart::Binary(_) | MessagePart::InlineBinary(_) => { - /* - * Note also that a subtype specification is MANDATORY -- it may not be - * omitted from a Content-Type header field. As such, there are no - * default subtypes. - */ - todo!() + MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => { + let (_, mut basic) = headers_to_basic_fields(bp)?; + + let ct = msg + .get_content_type() + .ok_or(anyhow!("Content-Type is missing but required here."))?; + + let type_ = + IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| { + anyhow!("Unable to build IString from given Content-Type type given") + })?; + + let subtype = IString::try_from( + ct.c_subtype + .as_ref() + .ok_or(anyhow!("Content-Type invalid, missing subtype"))? + .to_string(), + ) + .map_err(|_| { + anyhow!("Unable to build IString from given Content-Type subtype given") + })?; + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Basic { type_, subtype }, + }, + extension: None, + }) + } + MessagePart::Message(bp) => { + let (_, mut basic) = headers_to_basic_fields(bp)?; + + // @NOTE in some cases mail-parser does not parse the MessageAttachment but + // provide it as raw body. Using `as_ref()` masks this fact: if the message is + // parsed, as_ref() will return None. But by looking quickly at the code, it + // seems that the attachment is not parsed when mail-parser encounters some + // encoding problems, so it might be better to trust mail-parser. + let inner = bp + .get_body() + .as_ref() + .ok_or(anyhow!("Unable to parse inner message."))?; + + // @NOTE mail-parser does not provide enough information to compute the end of the + // message. The offset_end value wrongly includes the multipart delimiter, + // which lead to incorrect line count and body size. + // I have patched the lib to add a new offset type named last_part that take + // into account this fact. After that, we need to do some maths... + let len = inner.offset_last_part - inner.offset_header; + let raw_msg = &inner.raw_message[..len]; + basic.size = u32::try_from(len)?; + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Message { + envelope: message_envelope(inner), + body_structure: Box::new(build_imap_email_struct( + inner, + &inner.structure, + )?), + + // @FIXME This solution is bad for 2 reasons: + // - RFC2045 says line endings are CRLF but we accept LF alone with + // this method. It could be a feature (be liberal in what you + // accept) but we must be sure that we don't break things. + // - It should be done during parsing, we are iterating twice on + // the same data which results in some wastes. + number_of_lines: u32::try_from( + Cursor::new(raw_msg.as_ref()).lines().count(), + )?, + }, + }, + extension: None, + }) } - MessagePart::Message(_) => todo!(), } } MessageStructure::List(lp) => { @@ -746,6 +838,8 @@ mod tests { "tests/emails/dxflrs/0001_simple", "tests/emails/dxflrs/0002_mime", "tests/emails/dxflrs/0003_mime-in-mime", + "tests/emails/dxflrs/0004_msg-in-msg", // broken + //"tests/emails/dxflrs/0005_mail-parser-readme", // broken ]; for pref in prefixes.iter() { diff --git a/tests/emails/dxflrs/0004_msg-in-msg.body b/tests/emails/dxflrs/0004_msg-in-msg.body index 8149cfd..9ff3336 100644 --- a/tests/emails/dxflrs/0004_msg-in-msg.body +++ b/tests/emails/dxflrs/0004_msg-in-msg.body @@ -1 +1 @@ -(BODY (("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1) 4) "mixed")) \ No newline at end of file +(BODY (("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 9 1)("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1) 4) "mixed")) \ No newline at end of file diff --git a/tests/emails/dxflrs/0004_msg-in-msg.bodystructure b/tests/emails/dxflrs/0004_msg-in-msg.bodystructure index 22644b8..a0d751a 100644 --- a/tests/emails/dxflrs/0004_msg-in-msg.bodystructure +++ b/tests/emails/dxflrs/0004_msg-in-msg.bodystructure @@ -1 +1 @@ -(BODYSTRUCTURE (("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1 NIL NIL NIL NIL) 4 NIL NIL NIL NIL) "mixed" ("boundary" "delim") NIL NIL NIL)) \ No newline at end of file +(BODYSTRUCTURE (("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 9 1 NIL NIL NIL NIL)("message" "rfc822" NIL NIL NIL "7bit" 129 (NIL "Welcome to Aerogramme!!" (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) (("Garage team" NIL "garagehq" "deuxfleurs.fr")) NIL NIL NIL NIL NIL) ("text" "plain" ("charset" "us-ascii") NIL NIL "7bit" 49 1 NIL NIL NIL NIL) 4 NIL NIL NIL NIL) "mixed" ("boundary" "delim") NIL NIL NIL)) \ No newline at end of file diff --git a/tests/emails/dxflrs/0004_msg-in-msg.eml b/tests/emails/dxflrs/0004_msg-in-msg.eml index b1602cb..f581b59 100644 --- a/tests/emails/dxflrs/0004_msg-in-msg.eml +++ b/tests/emails/dxflrs/0004_msg-in-msg.eml @@ -2,6 +2,11 @@ From: Garage team Content-Type: multipart/mixed; boundary="delim"; Subject: Welcome to Aerogramme!! +--delim +Content-Type: text/plain; charset="us-ascii" + +Hello 1 + --delim Content-Type: message/rfc822