Add a workaround for mail-parser bug

This commit is contained in:
Quentin 2022-07-08 17:39:23 +02:00
parent 8bfe61d767
commit e0322a7761
Signed by: quentin
GPG key ID: E9602264D639FF68

View file

@ -581,7 +581,7 @@ fn build_imap_email_struct<'a>(
MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => { MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => {
let (_, mut basic) = headers_to_basic_fields(bp)?; let (_, mut basic) = headers_to_basic_fields(bp)?;
let ct = msg let ct = bp
.get_content_type() .get_content_type()
.ok_or(anyhow!("Content-Type is missing but required here."))?; .ok_or(anyhow!("Content-Type is missing but required here."))?;
@ -621,14 +621,29 @@ fn build_imap_email_struct<'a>(
.as_ref() .as_ref()
.ok_or(anyhow!("Unable to parse inner message."))?; .ok_or(anyhow!("Unable to parse inner message."))?;
// @NOTE mail-parser does not provide enough information to compute the end of the // @FIXME mail-parser does not handle ways when a MIME message contains
// message. The offset_end value wrongly includes the multipart delimiter, // a raw email and wrongly take its delimiter. We thus test the headers to
// which lead to incorrect line count and body size. // learn if it is a RFC822 email (raw) or RFC5322 (MIME) message.
// I have patched the lib to add a new offset type named last_part that take // The correct way would be to patch mail-parser.
// into account this fact. After that, we need to do some maths... let raw_msg = match part.unwrap_message().get_content_type() {
let len = inner.offset_last_part - inner.offset_header; Some(ContentType {
let raw_msg = &inner.raw_message[..len]; attributes: Some(_),
basic.size = u32::try_from(len)?; ..
}) => {
//println!("has a content type {:?}", bp);
&inner.raw_message[..]
}
_ => {
//println!("has no content type {:?}", bp);
&inner.raw_message[..(inner.offset_last_part - inner.offset_header)]
}
};
basic.size = u32::try_from(raw_msg.len())?;
// We do not count the number of lines but the number of line
// feeds to have the same behavior as Dovecot and Cyrus.
// 2 lines = 1 line feed.
let nol = raw_msg.iter().filter(|&c| c == &b'\n').count();
Ok(BodyStructure::Single { Ok(BodyStructure::Single {
body: FetchBody { body: FetchBody {
@ -646,9 +661,7 @@ fn build_imap_email_struct<'a>(
// accept) but we must be sure that we don't break things. // accept) but we must be sure that we don't break things.
// - It should be done during parsing, we are iterating twice on // - It should be done during parsing, we are iterating twice on
// the same data which results in some wastes. // the same data which results in some wastes.
number_of_lines: u32::try_from( number_of_lines: u32::try_from(nol)?,
Cursor::new(raw_msg.as_ref()).lines().count(),
)?,
}, },
}, },
extension: None, extension: None,
@ -840,15 +853,16 @@ mod tests {
"tests/emails/dxflrs/0002_mime", "tests/emails/dxflrs/0002_mime",
"tests/emails/dxflrs/0003_mime-in-mime", "tests/emails/dxflrs/0003_mime-in-mime",
"tests/emails/dxflrs/0004_msg-in-msg", "tests/emails/dxflrs/0004_msg-in-msg",
//"tests/emails/dxflrs/0005_mail-parser-readme", // no consensus on how to parse "tests/emails/dxflrs/0005_mail-parser-readme",
//"tests/emails/rfc/000", // broken /*"tests/emails/rfc/000", // broken
//"tests/emails/rfc/001", // broken "tests/emails/rfc/001", // broken
//"tests/emails/rfc/002", // broken: dovecot adds \r when it is missing and count is as "tests/emails/rfc/002", // broken: dovecot adds \r when it is missing and count is as
// a character. Difference on how lines are counted too. // a character. Difference on how lines are counted too.
//"tests/emails/rfc/003", // broken for the same reason "tests/emails/rfc/003", // broken for the same reason
//"tests/emails/thirdparty/000", "tests/emails/thirdparty/000",
//"tests/emails/thirdparty/001", "tests/emails/thirdparty/001",
//"tests/emails/thirdparty/002", "tests/emails/thirdparty/002",
*/
]; ];
for pref in prefixes.iter() { for pref in prefixes.iter() {