Move back to mainline mail-parser
- this removes the bug introduced in 0fe5fe071 - but adds some bugs where the body structure is not returned properly
This commit is contained in:
parent
24d9f192e4
commit
3e3821682c
5 changed files with 84 additions and 67 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -1366,7 +1366,8 @@ dependencies = [
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mail-parser"
|
name = "mail-parser"
|
||||||
version = "0.4.8"
|
version = "0.4.8"
|
||||||
source = "git+https://github.com/superboum/mail-parser?rev=db61a03#db61a0364c00d3cf115f20c6d195bfa4594c53ff"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c46a841ae5276aba5218ade7bb76896358f9f95a925c7b3deea6a0ec0fb8e2a7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
|
@ -21,7 +21,7 @@ itertools = "0.10"
|
||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
ldap3 = { version = "0.10", default-features = false, features = ["tls"] }
|
ldap3 = { version = "0.10", default-features = false, features = ["tls"] }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
mail-parser = { git = "https://github.com/superboum/mail-parser", rev = "db61a03" }
|
mail-parser = "0.4.8"
|
||||||
pretty_env_logger = "0.4"
|
pretty_env_logger = "0.4"
|
||||||
rusoto_core = "0.48.0"
|
rusoto_core = "0.48.0"
|
||||||
rusoto_credential = "0.48.0"
|
rusoto_credential = "0.48.0"
|
||||||
|
|
|
@ -675,7 +675,7 @@ fn build_imap_email_struct<'a>(
|
||||||
unreachable!("A multipart entry can not be found here.")
|
unreachable!("A multipart entry can not be found here.")
|
||||||
}
|
}
|
||||||
MessagePart::Text(bp) | MessagePart::Html(bp) => {
|
MessagePart::Text(bp) | MessagePart::Html(bp) => {
|
||||||
let (attrs, mut basic) = headers_to_basic_fields(bp)?;
|
let (attrs, mut basic) = headers_to_basic_fields(bp, bp.body.len())?;
|
||||||
|
|
||||||
// If the charset is not defined, set it to "us-ascii"
|
// If the charset is not defined, set it to "us-ascii"
|
||||||
if attrs.charset.is_none() {
|
if attrs.charset.is_none() {
|
||||||
|
@ -704,7 +704,10 @@ fn build_imap_email_struct<'a>(
|
||||||
// We do not count the number of lines but the number of line
|
// We do not count the number of lines but the number of line
|
||||||
// feeds to have the same behavior as Dovecot and Cyrus.
|
// feeds to have the same behavior as Dovecot and Cyrus.
|
||||||
// 2 lines = 1 line feed.
|
// 2 lines = 1 line feed.
|
||||||
bp.body_raw.as_ref().iter().filter(|&c| c == &b'\n').count(),
|
// @FIXME+BUG: if the body is base64-encoded, this returns the
|
||||||
|
// number of lines in the decoded body, however we should
|
||||||
|
// instead return the number of raw base64 lines
|
||||||
|
bp.body.as_ref().chars().filter(|&c| c == '\n').count(),
|
||||||
)?,
|
)?,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -712,7 +715,7 @@ fn build_imap_email_struct<'a>(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => {
|
MessagePart::Binary(bp) | MessagePart::InlineBinary(bp) => {
|
||||||
let (_, basic) = headers_to_basic_fields(bp)?;
|
let (_, basic) = headers_to_basic_fields(bp, bp.body.len())?;
|
||||||
|
|
||||||
let ct = bp
|
let ct = bp
|
||||||
.get_content_type()
|
.get_content_type()
|
||||||
|
@ -742,63 +745,77 @@ fn build_imap_email_struct<'a>(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
MessagePart::Message(bp) => {
|
MessagePart::Message(bp) => {
|
||||||
let (_, mut basic) = headers_to_basic_fields(bp)?;
|
|
||||||
|
|
||||||
// @NOTE in some cases mail-parser does not parse the MessageAttachment but
|
// @NOTE in some cases mail-parser does not parse the MessageAttachment but
|
||||||
// provide it as raw body. Using `as_ref()` masks this fact: if the message is
|
// provide it as raw body. By looking quickly at the code, it seems that the
|
||||||
// parsed, as_ref() will return None. But by looking quickly at the code, it
|
// attachment is not parsed when mail-parser encounters some encoding problems.
|
||||||
// seems that the attachment is not parsed when mail-parser encounters some
|
match &bp.body {
|
||||||
// encoding problems, so it might be better to trust mail-parser.
|
MessageAttachment::Parsed(inner) => {
|
||||||
let inner = bp
|
// @FIXME+BUG mail-parser does not handle ways when a MIME message contains
|
||||||
.get_body()
|
// a raw email and wrongly take its delimiter. The size and number of
|
||||||
.as_ref()
|
// lines returned in that case are wrong. A patch to mail-parser is
|
||||||
.ok_or(anyhow!("Unable to parse inner message."))?;
|
// needed to fix this.
|
||||||
|
let (_, basic) = headers_to_basic_fields(bp, inner.raw_message.len())?;
|
||||||
|
|
||||||
// @FIXME mail-parser does not handle ways when a MIME message contains
|
// We do not count the number of lines but the number of line
|
||||||
// a raw email and wrongly take its delimiter. We thus test the headers to
|
// feeds to have the same behavior as Dovecot and Cyrus.
|
||||||
// learn if it is a RFC822 email (raw) or RFC5322 (MIME) message.
|
// 2 lines = 1 line feed.
|
||||||
// The correct way would be to patch mail-parser.
|
let nol = inner.raw_message.iter().filter(|&c| c == &b'\n').count();
|
||||||
let raw_msg = match part.unwrap_message().get_content_type() {
|
|
||||||
Some(ContentType {
|
Ok(BodyStructure::Single {
|
||||||
attributes: Some(_),
|
body: FetchBody {
|
||||||
..
|
basic,
|
||||||
}) => {
|
specific: SpecificFields::Message {
|
||||||
//println!("has a content type {:?}", bp);
|
envelope: message_envelope(inner),
|
||||||
&inner.raw_message[..]
|
body_structure: Box::new(build_imap_email_struct(
|
||||||
|
inner,
|
||||||
|
&inner.structure,
|
||||||
|
)?),
|
||||||
|
|
||||||
|
// @FIXME This solution is bad for 2 reasons:
|
||||||
|
// - RFC2045 says line endings are CRLF but we accept LF alone with
|
||||||
|
// this method. It could be a feature (be liberal in what you
|
||||||
|
// accept) but we must be sure that we don't break things.
|
||||||
|
// - It should be done during parsing, we are iterating twice on
|
||||||
|
// the same data which results in some wastes.
|
||||||
|
number_of_lines: u32::try_from(nol)?,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
extension: None,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
_ => {
|
MessageAttachment::Raw(raw_msg) => {
|
||||||
//println!("has no content type {:?}", bp);
|
let (_, basic) = headers_to_basic_fields(bp, raw_msg.len())?;
|
||||||
&inner.raw_message[..(inner.offset_last_part - inner.offset_header)]
|
|
||||||
|
let ct = bp
|
||||||
|
.get_content_type()
|
||||||
|
.ok_or(anyhow!("Content-Type is missing but required here."))?;
|
||||||
|
|
||||||
|
let type_ =
|
||||||
|
IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| {
|
||||||
|
anyhow!("Unable to build IString from given Content-Type type given")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let subtype = IString::try_from(
|
||||||
|
ct.c_subtype
|
||||||
|
.as_ref()
|
||||||
|
.ok_or(anyhow!("Content-Type invalid, missing subtype"))?
|
||||||
|
.to_string(),
|
||||||
|
)
|
||||||
|
.map_err(|_| {
|
||||||
|
anyhow!(
|
||||||
|
"Unable to build IString from given Content-Type subtype given"
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(BodyStructure::Single {
|
||||||
|
body: FetchBody {
|
||||||
|
basic,
|
||||||
|
specific: SpecificFields::Basic { type_, subtype },
|
||||||
|
},
|
||||||
|
extension: None,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
basic.size = u32::try_from(raw_msg.len())?;
|
|
||||||
|
|
||||||
// We do not count the number of lines but the number of line
|
|
||||||
// feeds to have the same behavior as Dovecot and Cyrus.
|
|
||||||
// 2 lines = 1 line feed.
|
|
||||||
let nol = raw_msg.iter().filter(|&c| c == &b'\n').count();
|
|
||||||
|
|
||||||
Ok(BodyStructure::Single {
|
|
||||||
body: FetchBody {
|
|
||||||
basic,
|
|
||||||
specific: SpecificFields::Message {
|
|
||||||
envelope: message_envelope(inner),
|
|
||||||
body_structure: Box::new(build_imap_email_struct(
|
|
||||||
inner,
|
|
||||||
&inner.structure,
|
|
||||||
)?),
|
|
||||||
|
|
||||||
// @FIXME This solution is bad for 2 reasons:
|
|
||||||
// - RFC2045 says line endings are CRLF but we accept LF alone with
|
|
||||||
// this method. It could be a feature (be liberal in what you
|
|
||||||
// accept) but we must be sure that we don't break things.
|
|
||||||
// - It should be done during parsing, we are iterating twice on
|
|
||||||
// the same data which results in some wastes.
|
|
||||||
number_of_lines: u32::try_from(nol)?,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
extension: None,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -934,7 +951,10 @@ fn attrs_to_params<'a>(bp: &impl MimeHeaders<'a>) -> (SpecialAttrs, Vec<(IString
|
||||||
|
|
||||||
/// Takes mail-parser headers and build imap-codec BasicFields
|
/// Takes mail-parser headers and build imap-codec BasicFields
|
||||||
/// Return some special informations too
|
/// Return some special informations too
|
||||||
fn headers_to_basic_fields<'a, T>(bp: &'a Part<T>) -> Result<(SpecialAttrs<'a>, BasicFields)> {
|
fn headers_to_basic_fields<'a, T>(
|
||||||
|
bp: &'a Part<T>,
|
||||||
|
size: usize,
|
||||||
|
) -> Result<(SpecialAttrs<'a>, BasicFields)> {
|
||||||
let (attrs, parameter_list) = attrs_to_params(bp);
|
let (attrs, parameter_list) = attrs_to_params(bp);
|
||||||
|
|
||||||
let bf = BasicFields {
|
let bf = BasicFields {
|
||||||
|
@ -963,7 +983,7 @@ fn headers_to_basic_fields<'a, T>(bp: &'a Part<T>) -> Result<(SpecialAttrs<'a>,
|
||||||
.flatten()
|
.flatten()
|
||||||
.unwrap_or(unchecked_istring("7bit")),
|
.unwrap_or(unchecked_istring("7bit")),
|
||||||
|
|
||||||
size: u32::try_from(bp.body_raw.len())?,
|
size: u32::try_from(size)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok((attrs, bf))
|
Ok((attrs, bf))
|
||||||
|
|
|
@ -9,9 +9,6 @@ mod mail;
|
||||||
mod server;
|
mod server;
|
||||||
mod time;
|
mod time;
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod mail_parser_tests;
|
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use anyhow::{bail, Result};
|
use anyhow::{bail, Result};
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
use mail_parser_superboum::Message; // FAIL
|
use mail_parser_superboum::Message; // FAIL
|
||||||
|
|
||||||
//use mail_parser_048::Message; // PASS
|
//use mail_parser_048::Message; // PASS
|
||||||
//use mail_parser_05::Message; // PASS
|
//use mail_parser_05::Message; // PASS
|
||||||
//use mail_parser_main::Message; // PASS
|
//use mail_parser_main::Message; // PASS
|
||||||
//use mail_parser_db61a03::Message; // PASS
|
//use mail_parser_db61a03::Message; // PASS
|
||||||
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test1() {
|
fn test1() {
|
||||||
let input = br#"Content-Type: multipart/mixed; boundary="1234567890123456789012345678901234567890123456789012345678901234567890123456789012"
|
let input = br#"Content-Type: multipart/mixed; boundary="1234567890123456789012345678901234567890123456789012345678901234567890123456789012"
|
||||||
|
@ -37,7 +37,6 @@ Content-Type: text/plain
|
||||||
dbg!(message);
|
dbg!(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test2() {
|
fn test2() {
|
||||||
let input = br#"Content-Type: message/rfc822
|
let input = br#"Content-Type: message/rfc822
|
||||||
|
@ -54,7 +53,7 @@ Content-Type: text/plain
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test3() {
|
fn test3() {
|
||||||
let input = br#"Content-Type: multipart/mixed; boundary=":foo"
|
let input = br#"Content-Type: multipart/mixed; boundary=":foo"
|
||||||
|
|
||||||
--:foo
|
--:foo
|
||||||
--:foo
|
--:foo
|
||||||
|
|
Loading…
Reference in a new issue