Move back to mainline mail-parser

- this removes the bug introduced in 0fe5fe071
- but adds some bugs where the body structure is not returned properly
This commit is contained in:
Alex 2022-07-20 12:27:49 +02:00
parent 24d9f192e4
commit 3e3821682c
Signed by: lx
GPG key ID: 0E496D15096376BE
5 changed files with 84 additions and 67 deletions

3
Cargo.lock generated
View file

@ -1366,7 +1366,8 @@ dependencies = [
[[package]] [[package]]
name = "mail-parser" name = "mail-parser"
version = "0.4.8" version = "0.4.8"
source = "git+https://github.com/superboum/mail-parser?rev=db61a03#db61a0364c00d3cf115f20c6d195bfa4594c53ff" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c46a841ae5276aba5218ade7bb76896358f9f95a925c7b3deea6a0ec0fb8e2a7"
dependencies = [ dependencies = [
"encoding_rs", "encoding_rs",
"serde", "serde",

View file

@ -21,7 +21,7 @@ itertools = "0.10"
lazy_static = "1.4" lazy_static = "1.4"
ldap3 = { version = "0.10", default-features = false, features = ["tls"] } ldap3 = { version = "0.10", default-features = false, features = ["tls"] }
log = "0.4" log = "0.4"
mail-parser = { git = "https://github.com/superboum/mail-parser", rev = "db61a03" } mail-parser = "0.4.8"
pretty_env_logger = "0.4" pretty_env_logger = "0.4"
rusoto_core = "0.48.0" rusoto_core = "0.48.0"
rusoto_credential = "0.48.0" rusoto_credential = "0.48.0"

View file

@ -675,7 +675,7 @@ fn build_imap_email_struct<'a>(
unreachable!("A multipart entry can not be found here.") unreachable!("A multipart entry can not be found here.")
} }
MessagePart::Text(bp) | MessagePart::Html(bp) => { MessagePart::Text(bp) | MessagePart::Html(bp) => {
let (attrs, mut basic) = headers_to_basic_fields(bp)?; let (attrs, mut basic) = headers_to_basic_fields(bp, bp.body.len())?;
// If the charset is not defined, set it to "us-ascii" // If the charset is not defined, set it to "us-ascii"
if attrs.charset.is_none() { if attrs.charset.is_none() {
@ -704,7 +704,10 @@ fn build_imap_email_struct<'a>(
// We do not count the number of lines but the number of line // We do not count the number of lines but the number of line
// feeds to have the same behavior as Dovecot and Cyrus. // feeds to have the same behavior as Dovecot and Cyrus.
// 2 lines = 1 line feed. // 2 lines = 1 line feed.
bp.body_raw.as_ref().iter().filter(|&c| c == &b'\n').count(), // @FIXME+BUG: if the body is base64-encoded, this returns the
// number of lines in the decoded body, however we should
// instead return the number of raw base64 lines
bp.body.as_ref().chars().filter(|&c| c == '\n').count(),
)?, )?,
}, },
}, },
@ -712,7 +715,7 @@ fn build_imap_email_struct<'a>(
}) })
} }
MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => { MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => {
let (_, basic) = headers_to_basic_fields(bp)?; let (_, basic) = headers_to_basic_fields(bp, bp.body.len())?;
let ct = bp let ct = bp
.get_content_type() .get_content_type()
@ -742,41 +745,21 @@ fn build_imap_email_struct<'a>(
}) })
} }
MessagePart::Message(bp) => { MessagePart::Message(bp) => {
let (_, mut basic) = headers_to_basic_fields(bp)?;
// @NOTE in some cases mail-parser does not parse the MessageAttachment but // @NOTE in some cases mail-parser does not parse the MessageAttachment but
// provide it as raw body. Using `as_ref()` masks this fact: if the message is // provide it as raw body. By looking quickly at the code, it seems that the
// parsed, as_ref() will return None. But by looking quickly at the code, it // attachment is not parsed when mail-parser encounters some encoding problems.
// seems that the attachment is not parsed when mail-parser encounters some match &bp.body {
// encoding problems, so it might be better to trust mail-parser. MessageAttachment::Parsed(inner) => {
let inner = bp // @FIXME+BUG mail-parser does not handle ways when a MIME message contains
.get_body() // a raw email and wrongly take its delimiter. The size and number of
.as_ref() // lines returned in that case are wrong. A patch to mail-parser is
.ok_or(anyhow!("Unable to parse inner message."))?; // needed to fix this.
let (_, basic) = headers_to_basic_fields(bp, inner.raw_message.len())?;
// @FIXME mail-parser does not handle ways when a MIME message contains
// a raw email and wrongly take its delimiter. We thus test the headers to
// learn if it is a RFC822 email (raw) or RFC5322 (MIME) message.
// The correct way would be to patch mail-parser.
let raw_msg = match part.unwrap_message().get_content_type() {
Some(ContentType {
attributes: Some(_),
..
}) => {
//println!("has a content type {:?}", bp);
&inner.raw_message[..]
}
_ => {
//println!("has no content type {:?}", bp);
&inner.raw_message[..(inner.offset_last_part - inner.offset_header)]
}
};
basic.size = u32::try_from(raw_msg.len())?;
// We do not count the number of lines but the number of line // We do not count the number of lines but the number of line
// feeds to have the same behavior as Dovecot and Cyrus. // feeds to have the same behavior as Dovecot and Cyrus.
// 2 lines = 1 line feed. // 2 lines = 1 line feed.
let nol = raw_msg.iter().filter(|&c| c == &b'\n').count(); let nol = inner.raw_message.iter().filter(|&c| c == &b'\n').count();
Ok(BodyStructure::Single { Ok(BodyStructure::Single {
body: FetchBody { body: FetchBody {
@ -800,6 +783,40 @@ fn build_imap_email_struct<'a>(
extension: None, extension: None,
}) })
} }
MessageAttachment::Raw(raw_msg) => {
let (_, basic) = headers_to_basic_fields(bp, raw_msg.len())?;
let ct = bp
.get_content_type()
.ok_or(anyhow!("Content-Type is missing but required here."))?;
let type_ =
IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| {
anyhow!("Unable to build IString from given Content-Type type given")
})?;
let subtype = IString::try_from(
ct.c_subtype
.as_ref()
.ok_or(anyhow!("Content-Type invalid, missing subtype"))?
.to_string(),
)
.map_err(|_| {
anyhow!(
"Unable to build IString from given Content-Type subtype given"
)
})?;
Ok(BodyStructure::Single {
body: FetchBody {
basic,
specific: SpecificFields::Basic { type_, subtype },
},
extension: None,
})
}
}
}
} }
} }
MessageStructure::List(lp) => { MessageStructure::List(lp) => {
@ -934,7 +951,10 @@ fn attrs_to_params<'a>(bp: &impl MimeHeaders<'a>) -> (SpecialAttrs, Vec<(IString
/// Takes mail-parser headers and build imap-codec BasicFields /// Takes mail-parser headers and build imap-codec BasicFields
/// Return some special informations too /// Return some special informations too
fn headers_to_basic_fields<'a, T>(bp: &'a Part<T>) -> Result<(SpecialAttrs<'a>, BasicFields)> { fn headers_to_basic_fields<'a, T>(
bp: &'a Part<T>,
size: usize,
) -> Result<(SpecialAttrs<'a>, BasicFields)> {
let (attrs, parameter_list) = attrs_to_params(bp); let (attrs, parameter_list) = attrs_to_params(bp);
let bf = BasicFields { let bf = BasicFields {
@ -963,7 +983,7 @@ fn headers_to_basic_fields<'a, T>(bp: &'a Part<T>) -> Result<(SpecialAttrs<'a>,
.flatten() .flatten()
.unwrap_or(unchecked_istring("7bit")), .unwrap_or(unchecked_istring("7bit")),
size: u32::try_from(bp.body_raw.len())?, size: u32::try_from(size)?,
}; };
Ok((attrs, bf)) Ok((attrs, bf))

View file

@ -9,9 +9,6 @@ mod mail;
mod server; mod server;
mod time; mod time;
#[cfg(test)]
mod mail_parser_tests;
use std::path::PathBuf; use std::path::PathBuf;
use anyhow::{bail, Result}; use anyhow::{bail, Result};

View file

@ -1,10 +1,10 @@
use mail_parser_superboum::Message; // FAIL use mail_parser_superboum::Message; // FAIL
//use mail_parser_048::Message; // PASS //use mail_parser_048::Message; // PASS
//use mail_parser_05::Message; // PASS //use mail_parser_05::Message; // PASS
//use mail_parser_main::Message; // PASS //use mail_parser_main::Message; // PASS
//use mail_parser_db61a03::Message; // PASS //use mail_parser_db61a03::Message; // PASS
#[test] #[test]
fn test1() { fn test1() {
let input = br#"Content-Type: multipart/mixed; boundary="1234567890123456789012345678901234567890123456789012345678901234567890123456789012" let input = br#"Content-Type: multipart/mixed; boundary="1234567890123456789012345678901234567890123456789012345678901234567890123456789012"
@ -37,7 +37,6 @@ Content-Type: text/plain
dbg!(message); dbg!(message);
} }
#[test] #[test]
fn test2() { fn test2() {
let input = br#"Content-Type: message/rfc822 let input = br#"Content-Type: message/rfc822
@ -54,7 +53,7 @@ Content-Type: text/plain
#[test] #[test]
fn test3() { fn test3() {
let input = br#"Content-Type: multipart/mixed; boundary=":foo" let input = br#"Content-Type: multipart/mixed; boundary=":foo"
--:foo --:foo
--:foo --:foo