From 3e3821682c0009acb9651799b961f41ed3027e61 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 20 Jul 2022 12:27:49 +0200 Subject: [PATCH] Move back to mainline mail-parser - this removes the bug introduced in 0fe5fe071 - but adds some bugs where the body structure is not returned properly --- Cargo.lock | 3 +- Cargo.toml | 2 +- src/imap/mailbox_view.rs | 136 ++++++++++++++++------------ src/main.rs | 3 - tests/parsing-crates/mail_parser.rs | 7 +- 5 files changed, 84 insertions(+), 67 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0724fb0..f54e742 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,7 +1366,8 @@ dependencies = [ [[package]] name = "mail-parser" version = "0.4.8" -source = "git+https://github.com/superboum/mail-parser?rev=db61a03#db61a0364c00d3cf115f20c6d195bfa4594c53ff" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c46a841ae5276aba5218ade7bb76896358f9f95a925c7b3deea6a0ec0fb8e2a7" dependencies = [ "encoding_rs", "serde", diff --git a/Cargo.toml b/Cargo.toml index b713401..4130ef1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ itertools = "0.10" lazy_static = "1.4" ldap3 = { version = "0.10", default-features = false, features = ["tls"] } log = "0.4" -mail-parser = { git = "https://github.com/superboum/mail-parser", rev = "db61a03" } +mail-parser = "0.4.8" pretty_env_logger = "0.4" rusoto_core = "0.48.0" rusoto_credential = "0.48.0" diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index 8844b3f..31ccd9f 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -675,7 +675,7 @@ fn build_imap_email_struct<'a>( unreachable!("A multipart entry can not be found here.") } MessagePart::Text(bp) | MessagePart::Html(bp) => { - let (attrs, mut basic) = headers_to_basic_fields(bp)?; + let (attrs, mut basic) = headers_to_basic_fields(bp, bp.body.len())?; // If the charset is not defined, set it to "us-ascii" if attrs.charset.is_none() { @@ -704,7 +704,10 @@ fn build_imap_email_struct<'a>( // We do not count the number of lines but the number of line // feeds to have the same behavior as Dovecot and Cyrus. // 2 lines = 1 line feed. - bp.body_raw.as_ref().iter().filter(|&c| c == &b'\n').count(), + // @FIXME+BUG: if the body is base64-encoded, this returns the + // number of lines in the decoded body, however we should + // instead return the number of raw base64 lines + bp.body.as_ref().chars().filter(|&c| c == '\n').count(), )?, }, }, @@ -712,7 +715,7 @@ fn build_imap_email_struct<'a>( }) } MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => { - let (_, basic) = headers_to_basic_fields(bp)?; + let (_, basic) = headers_to_basic_fields(bp, bp.body.len())?; let ct = bp .get_content_type() @@ -742,63 +745,77 @@ fn build_imap_email_struct<'a>( }) } MessagePart::Message(bp) => { - let (_, mut basic) = headers_to_basic_fields(bp)?; - // @NOTE in some cases mail-parser does not parse the MessageAttachment but - // provide it as raw body. Using `as_ref()` masks this fact: if the message is - // parsed, as_ref() will return None. But by looking quickly at the code, it - // seems that the attachment is not parsed when mail-parser encounters some - // encoding problems, so it might be better to trust mail-parser. - let inner = bp - .get_body() - .as_ref() - .ok_or(anyhow!("Unable to parse inner message."))?; + // provide it as raw body. By looking quickly at the code, it seems that the + // attachment is not parsed when mail-parser encounters some encoding problems. + match &bp.body { + MessageAttachment::Parsed(inner) => { + // @FIXME+BUG mail-parser does not handle ways when a MIME message contains + // a raw email and wrongly take its delimiter. The size and number of + // lines returned in that case are wrong. A patch to mail-parser is + // needed to fix this. + let (_, basic) = headers_to_basic_fields(bp, inner.raw_message.len())?; - // @FIXME mail-parser does not handle ways when a MIME message contains - // a raw email and wrongly take its delimiter. We thus test the headers to - // learn if it is a RFC822 email (raw) or RFC5322 (MIME) message. - // The correct way would be to patch mail-parser. - let raw_msg = match part.unwrap_message().get_content_type() { - Some(ContentType { - attributes: Some(_), - .. - }) => { - //println!("has a content type {:?}", bp); - &inner.raw_message[..] + // We do not count the number of lines but the number of line + // feeds to have the same behavior as Dovecot and Cyrus. + // 2 lines = 1 line feed. + let nol = inner.raw_message.iter().filter(|&c| c == &b'\n').count(); + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Message { + envelope: message_envelope(inner), + body_structure: Box::new(build_imap_email_struct( + inner, + &inner.structure, + )?), + + // @FIXME This solution is bad for 2 reasons: + // - RFC2045 says line endings are CRLF but we accept LF alone with + // this method. It could be a feature (be liberal in what you + // accept) but we must be sure that we don't break things. + // - It should be done during parsing, we are iterating twice on + // the same data which results in some wastes. + number_of_lines: u32::try_from(nol)?, + }, + }, + extension: None, + }) } - _ => { - //println!("has no content type {:?}", bp); - &inner.raw_message[..(inner.offset_last_part - inner.offset_header)] + MessageAttachment::Raw(raw_msg) => { + let (_, basic) = headers_to_basic_fields(bp, raw_msg.len())?; + + let ct = bp + .get_content_type() + .ok_or(anyhow!("Content-Type is missing but required here."))?; + + let type_ = + IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| { + anyhow!("Unable to build IString from given Content-Type type given") + })?; + + let subtype = IString::try_from( + ct.c_subtype + .as_ref() + .ok_or(anyhow!("Content-Type invalid, missing subtype"))? + .to_string(), + ) + .map_err(|_| { + anyhow!( + "Unable to build IString from given Content-Type subtype given" + ) + })?; + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Basic { type_, subtype }, + }, + extension: None, + }) } - }; - basic.size = u32::try_from(raw_msg.len())?; - - // We do not count the number of lines but the number of line - // feeds to have the same behavior as Dovecot and Cyrus. - // 2 lines = 1 line feed. - let nol = raw_msg.iter().filter(|&c| c == &b'\n').count(); - - Ok(BodyStructure::Single { - body: FetchBody { - basic, - specific: SpecificFields::Message { - envelope: message_envelope(inner), - body_structure: Box::new(build_imap_email_struct( - inner, - &inner.structure, - )?), - - // @FIXME This solution is bad for 2 reasons: - // - RFC2045 says line endings are CRLF but we accept LF alone with - // this method. It could be a feature (be liberal in what you - // accept) but we must be sure that we don't break things. - // - It should be done during parsing, we are iterating twice on - // the same data which results in some wastes. - number_of_lines: u32::try_from(nol)?, - }, - }, - extension: None, - }) + } } } } @@ -934,7 +951,10 @@ fn attrs_to_params<'a>(bp: &impl MimeHeaders<'a>) -> (SpecialAttrs, Vec<(IString /// Takes mail-parser headers and build imap-codec BasicFields /// Return some special informations too -fn headers_to_basic_fields<'a, T>(bp: &'a Part) -> Result<(SpecialAttrs<'a>, BasicFields)> { +fn headers_to_basic_fields<'a, T>( + bp: &'a Part, + size: usize, +) -> Result<(SpecialAttrs<'a>, BasicFields)> { let (attrs, parameter_list) = attrs_to_params(bp); let bf = BasicFields { @@ -963,7 +983,7 @@ fn headers_to_basic_fields<'a, T>(bp: &'a Part) -> Result<(SpecialAttrs<'a>, .flatten() .unwrap_or(unchecked_istring("7bit")), - size: u32::try_from(bp.body_raw.len())?, + size: u32::try_from(size)?, }; Ok((attrs, bf)) diff --git a/src/main.rs b/src/main.rs index 50eb2ed..4886679 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,6 @@ mod mail; mod server; mod time; -#[cfg(test)] -mod mail_parser_tests; - use std::path::PathBuf; use anyhow::{bail, Result}; diff --git a/tests/parsing-crates/mail_parser.rs b/tests/parsing-crates/mail_parser.rs index 165bace..8b277a3 100644 --- a/tests/parsing-crates/mail_parser.rs +++ b/tests/parsing-crates/mail_parser.rs @@ -1,10 +1,10 @@ -use mail_parser_superboum::Message; // FAIL +use mail_parser_superboum::Message; // FAIL + //use mail_parser_048::Message; // PASS //use mail_parser_05::Message; // PASS //use mail_parser_main::Message; // PASS //use mail_parser_db61a03::Message; // PASS - #[test] fn test1() { let input = br#"Content-Type: multipart/mixed; boundary="1234567890123456789012345678901234567890123456789012345678901234567890123456789012" @@ -37,7 +37,6 @@ Content-Type: text/plain dbg!(message); } - #[test] fn test2() { let input = br#"Content-Type: message/rfc822 @@ -54,7 +53,7 @@ Content-Type: text/plain #[test] fn test3() { -let input = br#"Content-Type: multipart/mixed; boundary=":foo" + let input = br#"Content-Type: multipart/mixed; boundary=":foo" --:foo --:foo