aerogramme/src/imap/mailbox_view.rs

1180 lines
44 KiB
Rust
Raw Normal View History

2022-07-12 13:31:29 +00:00
use std::borrow::Cow;
2022-06-29 15:58:31 +00:00
use std::num::NonZeroU32;
use std::sync::Arc;
2022-06-29 17:24:21 +00:00
use anyhow::{anyhow, bail, Error, Result};
2022-06-29 14:05:34 +00:00
use boitalettres::proto::res::body::Data as Body;
use chrono::{Offset, TimeZone, Utc};
2022-06-29 17:24:21 +00:00
use futures::stream::{FuturesOrdered, StreamExt};
2022-06-29 18:00:38 +00:00
use imap_codec::types::address::Address;
use imap_codec::types::body::{BasicFields, Body as FetchBody, BodyStructure, SpecificFields};
use imap_codec::types::core::{AString, Atom, IString, NString};
use imap_codec::types::datetime::MyDateTime;
2022-06-29 18:00:38 +00:00
use imap_codec::types::envelope::Envelope;
2022-07-13 15:05:07 +00:00
use imap_codec::types::fetch_attributes::{
2022-07-15 15:55:04 +00:00
FetchAttribute, MacroOrFetchAttributes, Section as FetchSection,
2022-07-13 15:05:07 +00:00
};
2022-07-12 15:32:57 +00:00
use imap_codec::types::flag::{Flag, StoreResponse, StoreType};
2022-06-29 15:58:31 +00:00
use imap_codec::types::response::{Code, Data, MessageAttribute, Status};
2022-06-29 17:24:21 +00:00
use imap_codec::types::sequence::{self, SequenceSet};
use mail_parser::*;
use crate::mail::mailbox::Mailbox;
2022-07-12 13:59:13 +00:00
use crate::mail::uidindex::{ImapUid, ImapUidvalidity, UidIndex};
2022-07-12 15:32:57 +00:00
use crate::mail::unique_ident::UniqueIdent;
const DEFAULT_FLAGS: [Flag; 5] = [
Flag::Seen,
Flag::Answered,
Flag::Flagged,
Flag::Deleted,
Flag::Draft,
];
/// A MailboxView is responsible for giving the client the information
/// it needs about a mailbox, such as an initial summary of the mailbox's
/// content and continuous updates indicating when the content
/// of the mailbox has been changed.
/// To do this, it keeps a variable `known_state` that corresponds to
/// what the client knows, and produces IMAP messages to be sent to the
/// client that go along updates to `known_state`.
pub struct MailboxView {
2022-07-12 14:35:11 +00:00
pub(crate) mailbox: Arc<Mailbox>,
known_state: UidIndex,
}
impl MailboxView {
/// Creates a new IMAP view into a mailbox.
/// Generates the necessary IMAP messages so that the client
/// has a satisfactory summary of the current mailbox's state.
/// These are the messages that are sent in response to a SELECT command.
pub async fn new(mailbox: Arc<Mailbox>) -> Result<(Self, Vec<Body>)> {
let state = mailbox.current_uid_index().await;
let new_view = Self {
mailbox,
known_state: state,
};
let mut data = Vec::<Body>::new();
2022-07-12 13:59:13 +00:00
data.push(new_view.exists_status()?);
data.push(new_view.recent_status()?);
data.extend(new_view.flags_status()?.into_iter());
data.push(new_view.uidvalidity_status()?);
data.push(new_view.uidnext_status()?);
Ok((new_view, data))
}
/// Produces a set of IMAP responses describing the change between
/// what the client knows and what is actually in the mailbox.
/// This does NOT trigger a sync, it bases itself on what is currently
/// loaded in RAM by Bayou.
pub async fn update(&mut self) -> Result<Vec<Body>> {
2022-06-29 15:58:31 +00:00
let new_view = MailboxView {
mailbox: self.mailbox.clone(),
known_state: self.mailbox.current_uid_index().await,
};
let mut data = Vec::<Body>::new();
2022-07-13 09:39:13 +00:00
// Calculate diff between two mailbox states
// See example in IMAP RFC in section on NOOP command:
// we want to produce something like this:
// C: a047 NOOP
// S: * 22 EXPUNGE
// S: * 23 EXISTS
// S: * 14 FETCH (UID 1305 FLAGS (\Seen \Deleted))
// S: a047 OK Noop completed
// In other words:
// - notify client of expunged mails
// - if new mails arrived, notify client of number of existing mails
// - if flags changed for existing mails, tell client
// (for this last step: if uidvalidity changed, do nothing,
// just notify of new uidvalidity and they will resync)
// - notify client of expunged mails
let mut n_expunge = 0;
for (i, (_uid, uuid)) in self.known_state.idx_by_uid.iter().enumerate() {
if !new_view.known_state.table.contains_key(uuid) {
data.push(Body::Data(Data::Expunge(
NonZeroU32::try_from((i + 1 - n_expunge) as u32).unwrap(),
)));
n_expunge += 1;
}
}
// - if new mails arrived, notify client of number of existing mails
if new_view.known_state.table.len() != self.known_state.table.len() - n_expunge
|| new_view.known_state.uidvalidity != self.known_state.uidvalidity
{
data.push(new_view.exists_status()?);
}
2022-06-29 15:58:31 +00:00
if new_view.known_state.uidvalidity != self.known_state.uidvalidity {
// TODO: do we want to push less/more info than this?
2022-07-12 13:59:13 +00:00
data.push(new_view.uidvalidity_status()?);
data.push(new_view.uidnext_status()?);
2022-06-29 15:58:31 +00:00
} else {
// - if flags changed for existing mails, tell client
2022-06-29 17:27:32 +00:00
for (i, (_uid, uuid)) in new_view.known_state.idx_by_uid.iter().enumerate() {
2022-06-29 15:58:31 +00:00
let old_mail = self.known_state.table.get(uuid);
let new_mail = new_view.known_state.table.get(uuid);
if old_mail.is_some() && old_mail != new_mail {
if let Some((uid, flags)) = new_mail {
data.push(Body::Data(Data::Fetch {
seq_or_uid: NonZeroU32::try_from((i + 1) as u32).unwrap(),
attributes: vec![
MessageAttribute::Uid((*uid).try_into().unwrap()),
MessageAttribute::Flags(
flags.iter().filter_map(|f| string_to_flag(f)).collect(),
),
],
}));
}
}
}
}
*self = new_view;
Ok(data)
}
2022-07-12 15:32:57 +00:00
pub async fn store(
&mut self,
sequence_set: &SequenceSet,
kind: &StoreType,
_response: &StoreResponse,
flags: &[Flag],
is_uid_store: &bool,
2022-07-12 15:32:57 +00:00
) -> Result<Vec<Body>> {
self.mailbox.opportunistic_sync().await?;
2022-07-12 15:32:57 +00:00
let flags = flags.iter().map(|x| x.to_string()).collect::<Vec<_>>();
let mails = self.get_mail_ids(sequence_set, *is_uid_store)?;
2022-07-13 09:19:08 +00:00
for (_i, _uid, uuid) in mails.iter() {
2022-07-12 15:32:57 +00:00
match kind {
StoreType::Add => {
self.mailbox.add_flags(*uuid, &flags[..]).await?;
}
StoreType::Remove => {
self.mailbox.del_flags(*uuid, &flags[..]).await?;
}
StoreType::Replace => {
2022-07-13 09:00:35 +00:00
self.mailbox.set_flags(*uuid, &flags[..]).await?;
2022-07-12 15:32:57 +00:00
}
}
}
2022-07-21 10:44:58 +00:00
// @TODO: handle _response
2022-07-12 15:32:57 +00:00
self.update().await
}
2022-07-13 09:00:35 +00:00
pub async fn expunge(&mut self) -> Result<Vec<Body>> {
self.mailbox.opportunistic_sync().await?;
2022-07-13 09:19:08 +00:00
let deleted_flag = Flag::Deleted.to_string();
let state = self.mailbox.current_uid_index().await;
let msgs = state
2022-07-13 09:19:08 +00:00
.table
.iter()
.filter(|(_uuid, (_uid, flags))| flags.iter().any(|x| *x == deleted_flag))
.map(|(uuid, _)| *uuid);
for msg in msgs {
self.mailbox.delete(msg).await?;
}
self.update().await
2022-07-13 09:00:35 +00:00
}
2022-07-21 10:44:58 +00:00
pub async fn copy(
&self,
sequence_set: &SequenceSet,
to: Arc<Mailbox>,
is_uid_copy: &bool,
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> {
let mails = self.get_mail_ids(sequence_set, *is_uid_copy)?;
let mut new_uuids = vec![];
for (_i, _uid, uuid) in mails.iter() {
new_uuids.push(to.copy_from(&self.mailbox, *uuid).await?);
}
let mut ret = vec![];
let to_state = to.current_uid_index().await;
for ((_i, uid, _uuid), new_uuid) in mails.iter().zip(new_uuids.iter()) {
let dest_uid = to_state
.table
.get(new_uuid)
.ok_or(anyhow!("copied mail not in destination mailbox"))?
.0;
ret.push((*uid, dest_uid));
}
Ok((to_state.uidvalidity, ret))
}
2022-06-29 17:24:21 +00:00
/// Looks up state changes in the mailbox and produces a set of IMAP
/// responses describing the new state.
pub async fn fetch(
&self,
sequence_set: &SequenceSet,
attributes: &MacroOrFetchAttributes,
is_uid_fetch: &bool,
2022-06-29 17:24:21 +00:00
) -> Result<Vec<Body>> {
let mails = self.get_mail_ids(sequence_set, *is_uid_fetch)?;
2022-06-29 17:24:21 +00:00
let mails_uuid = mails
.iter()
2022-07-12 15:32:57 +00:00
.map(|(_i, _uid, uuid)| *uuid)
2022-06-29 17:24:21 +00:00
.collect::<Vec<_>>();
let mails_meta = self.mailbox.fetch_meta(&mails_uuid).await?;
let mut fetch_attrs = match attributes {
2022-06-29 17:24:21 +00:00
MacroOrFetchAttributes::Macro(m) => m.expand(),
MacroOrFetchAttributes::FetchAttributes(a) => a.clone(),
};
if *is_uid_fetch && !fetch_attrs.contains(&FetchAttribute::Uid) {
fetch_attrs.push(FetchAttribute::Uid);
}
2022-06-29 17:24:21 +00:00
let need_body = fetch_attrs.iter().any(|x| {
matches!(
x,
FetchAttribute::Body
| FetchAttribute::BodyExt { .. }
| FetchAttribute::Rfc822
| FetchAttribute::Rfc822Text
| FetchAttribute::BodyStructure
)
});
let mails = if need_body {
let mut iter = mails
.into_iter()
.zip(mails_meta.into_iter())
2022-07-12 15:32:57 +00:00
.map(|((i, uid, uuid), meta)| async move {
2022-06-29 17:24:21 +00:00
let body = self.mailbox.fetch_full(uuid, &meta.message_key).await?;
Ok::<_, anyhow::Error>((i, uid, uuid, meta, Some(body)))
})
.collect::<FuturesOrdered<_>>();
let mut mails = vec![];
while let Some(m) = iter.next().await {
mails.push(m?);
}
mails
} else {
mails
.into_iter()
.zip(mails_meta.into_iter())
2022-07-12 15:32:57 +00:00
.map(|((i, uid, uuid), meta)| (i, uid, uuid, meta, None))
2022-06-29 17:24:21 +00:00
.collect::<Vec<_>>()
};
let mut ret = vec![];
2022-06-29 18:00:38 +00:00
for (i, uid, uuid, meta, body) in mails {
2022-07-13 09:32:47 +00:00
let mut attributes = vec![];
2022-06-29 17:24:21 +00:00
2022-06-29 17:27:32 +00:00
let (_uid2, flags) = self
2022-06-29 17:24:21 +00:00
.known_state
.table
.get(&uuid)
.ok_or_else(|| anyhow!("Mail not in uidindex table: {}", uuid))?;
2022-06-29 18:00:38 +00:00
let parsed = match &body {
Some(m) => {
mail_parser::Message::parse(m).ok_or_else(|| anyhow!("Invalid mail body"))?
}
None => mail_parser::Message::parse(&meta.headers)
.ok_or_else(|| anyhow!("Invalid mail headers"))?,
};
2022-06-29 17:24:21 +00:00
for attr in fetch_attrs.iter() {
match attr {
2022-07-13 09:32:47 +00:00
FetchAttribute::Uid => attributes.push(MessageAttribute::Uid(uid)),
2022-06-29 17:24:21 +00:00
FetchAttribute::Flags => {
attributes.push(MessageAttribute::Flags(
flags.iter().filter_map(|f| string_to_flag(f)).collect(),
));
}
FetchAttribute::Rfc822Size => {
attributes.push(MessageAttribute::Rfc822Size(meta.rfc822_size as u32))
}
2022-07-15 14:15:48 +00:00
FetchAttribute::Rfc822Header => {
attributes.push(MessageAttribute::Rfc822Header(NString(
meta.headers.to_vec().try_into().ok().map(IString::Literal),
)))
}
2022-06-30 11:36:21 +00:00
FetchAttribute::Rfc822Text => {
2022-07-20 13:14:34 +00:00
let rp = parsed.get_root_part();
2022-06-30 11:36:21 +00:00
let r = parsed
2022-07-20 13:14:34 +00:00
.raw_message
.get(rp.offset_body..rp.offset_end)
.ok_or(Error::msg(
"Unable to extract email body, cursors out of bound. This is a bug.",
))?;
2022-06-30 11:36:21 +00:00
2022-07-15 14:15:48 +00:00
attributes.push(MessageAttribute::Rfc822Text(NString(
r.try_into().ok().map(IString::Literal),
)));
2022-06-29 18:10:42 +00:00
}
2022-07-15 14:15:48 +00:00
FetchAttribute::Rfc822 => attributes.push(MessageAttribute::Rfc822(NString(
body.as_ref()
.unwrap()
.clone()
.try_into()
.ok()
.map(IString::Literal),
))),
2022-06-29 18:00:38 +00:00
FetchAttribute::Envelope => {
attributes.push(MessageAttribute::Envelope(message_envelope(&parsed)))
}
2022-07-04 16:14:19 +00:00
FetchAttribute::Body => attributes.push(MessageAttribute::Body(
2022-07-20 13:14:34 +00:00
build_imap_email_struct(&parsed, parsed.get_root_part())?,
2022-07-04 16:14:19 +00:00
)),
FetchAttribute::BodyStructure => attributes.push(MessageAttribute::Body(
2022-07-20 13:14:34 +00:00
build_imap_email_struct(&parsed, parsed.get_root_part())?,
2022-07-04 16:14:19 +00:00
)),
2022-06-30 11:36:21 +00:00
FetchAttribute::BodyExt {
2022-07-13 15:05:07 +00:00
section,
partial,
peek,
2022-06-30 11:36:21 +00:00
} => {
2022-07-13 15:05:07 +00:00
// @TODO Add missing section specifiers
2022-07-15 14:15:48 +00:00
match get_message_section(&parsed, section) {
Ok(text) => {
let seen_flag = Flag::Seen.to_string();
if !peek && !flags.iter().any(|x| *x == seen_flag) {
// Add \Seen flag
self.mailbox.add_flags(uuid, &[seen_flag]).await?;
}
let (text, origin) = match partial {
Some((begin, len)) => {
if *begin as usize > text.len() {
(&[][..], Some(*begin))
} else if (*begin + len.get()) as usize >= text.len() {
(&text[*begin as usize..], Some(*begin))
} else {
(
&text[*begin as usize
..(*begin + len.get()) as usize],
Some(*begin),
)
}
}
None => (&text[..], None),
};
let data =
NString(text.to_vec().try_into().ok().map(IString::Literal));
attributes.push(MessageAttribute::BodyExt {
section: section.clone(),
origin,
data,
})
2022-07-13 15:05:07 +00:00
}
2022-07-15 14:15:48 +00:00
Err(e) => {
tracing::error!(
"Could not get section {:?} of message {}: {}",
section,
uuid,
e
);
2022-07-13 15:05:07 +00:00
}
}
2022-06-30 11:36:21 +00:00
}
FetchAttribute::InternalDate => {
attributes.push(MessageAttribute::InternalDate(MyDateTime(
Utc.fix()
.timestamp(i64::try_from(meta.internaldate / 1000)?, 0),
)));
2022-06-30 11:36:21 +00:00
}
2022-06-29 17:24:21 +00:00
}
}
ret.push(Body::Data(Data::Fetch {
seq_or_uid: i,
attributes,
}));
}
Ok(ret)
}
// ----
2022-07-12 15:32:57 +00:00
// Gets the UIDs and UUIDs of mails identified by a SequenceSet of
// sequence numbers
fn get_mail_ids(
&self,
sequence_set: &SequenceSet,
by_uid: bool,
2022-07-12 15:32:57 +00:00
) -> Result<Vec<(NonZeroU32, ImapUid, UniqueIdent)>> {
let mail_vec = self
.known_state
.idx_by_uid
.iter()
.map(|(uid, uuid)| (*uid, *uuid))
.collect::<Vec<_>>();
let mut mails = vec![];
if by_uid {
if mail_vec.is_empty() {
return Ok(vec![]);
}
let iter_strat = sequence::Strategy::Naive {
largest: mail_vec.last().unwrap().0,
};
let mut i = 0;
for uid in sequence_set.iter(iter_strat) {
while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) {
i += 1;
}
if let Some(mail) = mail_vec.get(i) {
if mail.0 == uid {
mails.push((NonZeroU32::try_from(i as u32 + 1).unwrap(), mail.0, mail.1));
}
} else {
break;
}
}
} else {
if mail_vec.is_empty() {
bail!("No such message (mailbox is empty)");
}
let iter_strat = sequence::Strategy::Naive {
largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(),
};
for i in sequence_set.iter(iter_strat) {
if let Some(mail) = mail_vec.get(i.get() as usize - 1) {
mails.push((i, mail.0, mail.1));
} else {
bail!("No such mail: {}", i);
}
2022-07-12 15:32:57 +00:00
}
}
Ok(mails)
}
// ----
/// Produce an OK [UIDVALIDITY _] message corresponding to `known_state`
2022-07-12 13:59:13 +00:00
fn uidvalidity_status(&self) -> Result<Body> {
let uid_validity = Status::ok(
None,
2022-07-12 13:59:13 +00:00
Some(Code::UidValidity(self.uidvalidity())),
"UIDs valid",
)
.map_err(Error::msg)?;
Ok(Body::Status(uid_validity))
}
2022-07-12 13:59:13 +00:00
pub(crate) fn uidvalidity(&self) -> ImapUidvalidity {
self.known_state.uidvalidity
}
/// Produce an OK [UIDNEXT _] message corresponding to `known_state`
2022-07-12 13:59:13 +00:00
fn uidnext_status(&self) -> Result<Body> {
let next_uid = Status::ok(
None,
2022-07-12 13:59:13 +00:00
Some(Code::UidNext(self.uidnext())),
"Predict next UID",
)
.map_err(Error::msg)?;
Ok(Body::Status(next_uid))
}
2022-07-12 13:59:13 +00:00
pub(crate) fn uidnext(&self) -> ImapUid {
self.known_state.uidnext
}
/// Produce an EXISTS message corresponding to the number of mails
/// in `known_state`
2022-07-12 13:59:13 +00:00
fn exists_status(&self) -> Result<Body> {
Ok(Body::Data(Data::Exists(self.exists()?)))
}
pub(crate) fn exists(&self) -> Result<u32> {
Ok(u32::try_from(self.known_state.idx_by_uid.len())?)
}
/// Produce a RECENT message corresponding to the number of
/// recent mails in `known_state`
2022-07-12 13:59:13 +00:00
fn recent_status(&self) -> Result<Body> {
Ok(Body::Data(Data::Recent(self.recent()?)))
}
pub(crate) fn recent(&self) -> Result<u32> {
let recent = self
.known_state
.idx_by_flag
.get(&"\\Recent".to_string())
.map(|os| os.len())
.unwrap_or(0);
2022-07-12 13:59:13 +00:00
Ok(u32::try_from(recent)?)
}
/// Produce a FLAGS and a PERMANENTFLAGS message that indicates
/// the flags that are in `known_state` + default flags
2022-07-12 13:59:13 +00:00
fn flags_status(&self) -> Result<Vec<Body>> {
let mut flags: Vec<Flag> = self
.known_state
.idx_by_flag
.flags()
2022-06-29 15:58:31 +00:00
.map(|f| string_to_flag(f))
.flatten()
.collect();
for f in DEFAULT_FLAGS.iter() {
if !flags.contains(f) {
flags.push(f.clone());
}
}
let mut ret = vec![Body::Data(Data::Flags(flags.clone()))];
flags.push(Flag::Permanent);
let permanent_flags =
Status::ok(None, Some(Code::PermanentFlags(flags)), "Flags permitted")
.map_err(Error::msg)?;
ret.push(Body::Status(permanent_flags));
Ok(ret)
}
pub(crate) fn unseen_count(&self) -> usize {
let total = self.known_state.table.len();
let seen = self
.known_state
.idx_by_flag
.get(&Flag::Seen.to_string())
.map(|x| x.len())
.unwrap_or(0);
total - seen
}
}
2022-06-29 15:58:31 +00:00
fn string_to_flag(f: &str) -> Option<Flag> {
match f.chars().next() {
2022-07-12 15:32:57 +00:00
Some('\\') => match f {
"\\Seen" => Some(Flag::Seen),
"\\Answered" => Some(Flag::Answered),
"\\Flagged" => Some(Flag::Flagged),
"\\Deleted" => Some(Flag::Deleted),
"\\Draft" => Some(Flag::Draft),
"\\Recent" => Some(Flag::Recent),
_ => match Atom::try_from(f.strip_prefix('\\').unwrap().clone()) {
Err(_) => {
tracing::error!(flag=%f, "Unable to encode flag as IMAP atom");
None
}
Ok(a) => Some(Flag::Extension(a)),
},
},
2022-06-29 15:58:31 +00:00
Some(_) => match Atom::try_from(f.clone()) {
Err(_) => {
tracing::error!(flag=%f, "Unable to encode flag as IMAP atom");
None
}
Ok(a) => Some(Flag::Keyword(a)),
},
None => None,
}
}
2022-06-29 18:00:38 +00:00
/// Envelope rules are defined in RFC 3501, section 7.4.2
/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
///
/// Some important notes:
///
/// If the Sender or Reply-To lines are absent in the [RFC-2822]
/// header, or are present but empty, the server sets the
/// corresponding member of the envelope to be the same value as
/// the from member (the client is not expected to know to do
/// this). Note: [RFC-2822] requires that all messages have a valid
/// From header. Therefore, the from, sender, and reply-to
/// members in the envelope can not be NIL.
///
/// If the Date, Subject, In-Reply-To, and Message-ID header lines
/// are absent in the [RFC-2822] header, the corresponding member
/// of the envelope is NIL; if these header lines are present but
/// empty the corresponding member of the envelope is the empty
/// string.
//@FIXME return an error if the envelope is invalid instead of panicking
//@FIXME some fields must be defaulted if there are not set.
2022-06-29 18:00:38 +00:00
fn message_envelope(msg: &mail_parser::Message<'_>) -> Envelope {
let from = convert_addresses(msg.get_from()).unwrap_or(vec![]);
2022-06-29 18:00:38 +00:00
Envelope {
date: NString(
msg.get_date()
.map(|d| IString::try_from(d.to_iso8601()).unwrap()),
),
subject: NString(
msg.get_subject()
.map(|d| IString::try_from(d.to_string()).unwrap()),
),
from: from.clone(),
sender: convert_addresses(msg.get_sender()).unwrap_or(from.clone()),
reply_to: convert_addresses(msg.get_reply_to()).unwrap_or(from.clone()),
to: convert_addresses(msg.get_to()).unwrap_or(vec![]),
cc: convert_addresses(msg.get_cc()).unwrap_or(vec![]),
bcc: convert_addresses(msg.get_bcc()).unwrap_or(vec![]),
in_reply_to: NString(None), // @TODO
2022-06-29 18:00:38 +00:00
message_id: NString(
msg.get_message_id()
.map(|d| IString::try_from(d.to_string()).unwrap()),
),
}
}
fn convert_addresses(a: &mail_parser::HeaderValue<'_>) -> Option<Vec<Address>> {
2022-06-29 18:00:38 +00:00
match a {
mail_parser::HeaderValue::Address(a) => Some(vec![convert_address(a)]),
mail_parser::HeaderValue::AddressList(l) => {
Some(l.iter().map(|a| convert_address(a)).collect())
2022-06-29 18:00:38 +00:00
}
mail_parser::HeaderValue::Empty => None,
mail_parser::HeaderValue::Collection(c) => Some(
c.iter()
.map(|l| convert_addresses(l).unwrap_or(vec![]))
.flatten()
.collect(),
),
_ => {
tracing::warn!("Invalid address header");
None
2022-06-29 18:00:38 +00:00
}
}
}
//@FIXME Remove unwrap
2022-06-29 18:00:38 +00:00
fn convert_address(a: &mail_parser::Addr<'_>) -> Address {
let (user, host) = match &a.address {
None => (None, None),
Some(x) => match x.split_once('@') {
Some((u, h)) => (Some(u.to_string()), Some(h.to_string())),
None => (Some(x.to_string()), None),
},
};
Address::new(
NString(
a.name
.as_ref()
.map(|x| IString::try_from(x.to_string()).unwrap()),
),
// SMTP at-domain-list (source route) seems obsolete since at least 1991
// https://www.mhonarc.org/archive/html/ietf-822/1991-06/msg00060.html
2022-06-29 18:00:38 +00:00
NString(None),
NString(user.map(|x| IString::try_from(x).unwrap())),
NString(host.map(|x| IString::try_from(x).unwrap())),
)
}
2022-07-04 16:14:19 +00:00
/*
--CAPTURE--
b fetch 29878:29879 (BODY)
* 29878 FETCH (BODY (("text" "plain" ("charset" "utf-8") NIL NIL "quoted-printable" 3264 82)("text" "html" ("charset" "utf-8") NIL NIL "quoted-printable" 31834 643) "alternative"))
* 29879 FETCH (BODY ("text" "html" ("charset" "us-ascii") NIL NIL "7bit" 4107 131))
^^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^ ^^^^^^ ^^^^ ^^^
| | | | | | number of lines
| | | | | size
| | | | content transfer encoding
| | | description
| | id
| parameter list
b OK Fetch completed (0.001 + 0.000 secs).
*/
2022-07-20 13:14:34 +00:00
fn build_imap_email_struct<'a>(msg: &Message<'a>, part: &MessagePart<'a>) -> Result<BodyStructure> {
match &part.body {
PartType::Multipart(parts) => {
let subtype = IString::try_from(
part.headers_rfc
.get(&RfcHeader::ContentType)
.ok_or(anyhow!("Content-Type is missing but required here."))?
.get_content_type()
.c_subtype
.as_ref()
.ok_or(anyhow!("Content-Type invalid, missing subtype"))?
.to_string(),
)
.map_err(|_| {
anyhow!("Unable to build IString from given Content-Type subtype given")
})?;
Ok(BodyStructure::Multi {
bodies: parts
.iter()
.map(|index| build_imap_email_struct(msg, &msg.parts[*index]))
.fold(Ok(vec![]), try_collect_shime)?,
subtype,
extension_data: None,
/*Some(MultipartExtensionData {
parameter_list: vec![],
disposition: None,
language: None,
location: None,
extension: vec![],
})*/
})
}
PartType::Text(bp) | PartType::Html(bp) => {
let (attrs, mut basic) = headers_to_basic_fields(&part, bp.len())?;
// If the charset is not defined, set it to "us-ascii"
if attrs.charset.is_none() {
basic
.parameter_list
.push((unchecked_istring("charset"), unchecked_istring("us-ascii")));
}
// If the subtype is not defined, set it to "plain". MIME (RFC2045) says that subtype
// MUST be defined and hence has no default. But mail-parser does not make any
// difference between MIME and raw emails, hence raw emails have no subtypes.
let subtype = part
.get_content_type()
.map(|h| h.c_subtype.as_ref())
.flatten()
.map(|st| IString::try_from(st.to_string()).ok())
.flatten()
.unwrap_or(unchecked_istring("plain"));
let number_of_lines = msg
.raw_message
.get(part.offset_body..part.offset_end)
.map(|text| text.iter().filter(|x| **x == b'\n').count())
.unwrap_or(0)
.try_into()?;
Ok(BodyStructure::Single {
body: FetchBody {
basic,
specific: SpecificFields::Text {
subtype,
number_of_lines,
},
},
extension: None,
})
}
PartType::Binary(bp) | PartType::InlineBinary(bp) => {
let (_, basic) = headers_to_basic_fields(&part, bp.len())?;
let ct = part
.get_content_type()
.ok_or(anyhow!("Content-Type is missing but required here."))?;
let type_ = IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| {
anyhow!("Unable to build IString from given Content-Type type given")
})?;
let subtype = IString::try_from(
ct.c_subtype
.as_ref()
.ok_or(anyhow!("Content-Type invalid, missing subtype"))?
.to_string(),
)
.map_err(|_| {
anyhow!("Unable to build IString from given Content-Type subtype given")
})?;
Ok(BodyStructure::Single {
body: FetchBody {
basic,
specific: SpecificFields::Basic { type_, subtype },
},
extension: None,
})
}
PartType::Message(bp) => {
// @NOTE in some cases mail-parser does not parse the MessageAttachment but
// provide it as raw body. By looking quickly at the code, it seems that the
// attachment is not parsed when mail-parser encounters some encoding problems.
match &bp {
MessageAttachment::Parsed(inner) => {
// @FIXME+BUG mail-parser does not handle ways when a MIME message contains
// a raw email and wrongly take its delimiter. The size and number of
// lines returned in that case are wrong. A patch to mail-parser is
// needed to fix this.
let (_, basic) = headers_to_basic_fields(&part, inner.raw_message.len())?;
// We do not count the number of lines but the number of line
// feeds to have the same behavior as Dovecot and Cyrus.
// 2 lines = 1 line feed.
let nol = inner.raw_message.iter().filter(|&c| c == &b'\n').count();
2022-07-04 16:14:19 +00:00
Ok(BodyStructure::Single {
body: FetchBody {
2022-07-05 15:08:12 +00:00
basic,
2022-07-20 13:14:34 +00:00
specific: SpecificFields::Message {
envelope: message_envelope(inner),
body_structure: Box::new(build_imap_email_struct(
&inner,
inner.get_root_part(),
)?),
// @FIXME This solution is bad for 2 reasons:
// - RFC2045 says line endings are CRLF but we accept LF alone with
// this method. It could be a feature (be liberal in what you
// accept) but we must be sure that we don't break things.
// - It should be done during parsing, we are iterating twice on
// the same data which results in some wastes.
number_of_lines: u32::try_from(nol)?,
2022-07-04 16:14:19 +00:00
},
},
extension: None,
})
2022-07-05 08:18:08 +00:00
}
2022-07-20 13:14:34 +00:00
MessageAttachment::Raw(raw_msg) => {
let (_, basic) = headers_to_basic_fields(&part, raw_msg.len())?;
2022-07-20 13:14:34 +00:00
let ct = part
.get_content_type()
.ok_or(anyhow!("Content-Type is missing but required here."))?;
let type_ =
IString::try_from(ct.c_type.as_ref().to_string()).map_err(|_| {
anyhow!("Unable to build IString from given Content-Type type given")
})?;
let subtype = IString::try_from(
ct.c_subtype
.as_ref()
.ok_or(anyhow!("Content-Type invalid, missing subtype"))?
.to_string(),
)
.map_err(|_| {
anyhow!("Unable to build IString from given Content-Type subtype given")
})?;
Ok(BodyStructure::Single {
body: FetchBody {
basic,
specific: SpecificFields::Basic { type_, subtype },
},
extension: None,
})
}
}
}
2022-07-20 13:14:34 +00:00
}
}
2022-07-05 15:48:10 +00:00
fn try_collect_shime<T>(acc: Result<Vec<T>>, elem: Result<T>) -> Result<Vec<T>> {
match (acc, elem) {
(Err(e), _) | (_, Err(e)) => Err(e),
(Ok(mut ac), Ok(el)) => {
ac.push(el);
Ok(ac)
}
}
}
/// s is set to static to ensure that only compile time values
2022-07-05 15:08:12 +00:00
/// checked by developpers are passed.
fn unchecked_istring(s: &'static str) -> IString {
IString::try_from(s).expect("this value is expected to be a valid imap-codec::IString")
}
2022-07-05 15:08:12 +00:00
#[derive(Default)]
struct SpecialAttrs<'a> {
charset: Option<&'a Cow<'a, str>>,
boundary: Option<&'a Cow<'a, str>>,
}
/// Takes mail-parser Content-Type attributes, build imap-codec BasicFields.parameter_list and
/// identify some specific attributes (charset and boundary).
fn attrs_to_params<'a>(bp: &impl MimeHeaders<'a>) -> (SpecialAttrs, Vec<(IString, IString)>) {
// Try to extract Content-Type attributes from headers
2022-07-05 15:48:10 +00:00
let attrs = match bp
.get_content_type()
.map(|c| c.attributes.as_ref())
.flatten()
{
2022-07-05 15:08:12 +00:00
Some(v) => v,
2022-07-05 15:48:10 +00:00
_ => return (SpecialAttrs::default(), vec![]),
2022-07-05 15:08:12 +00:00
};
// Transform the Content-Type attributes into IMAP's parameter list
// Also collect some special attributes that might be used elsewhere
attrs.iter().fold(
(SpecialAttrs::default(), vec![]),
|(mut sa, mut param_list), (k, v)| {
let nk = k.to_lowercase();
match (IString::try_from(k.as_ref()), IString::try_from(v.as_ref())) {
(Ok(ik), Ok(iv)) => param_list.push((ik, iv)),
_ => return (sa, param_list),
};
match nk.as_str() {
"charset" => {
sa.charset = Some(v);
}
"boundary" => {
sa.boundary = Some(v);
}
_ => (),
};
(sa, param_list)
},
)
}
/// Takes mail-parser headers and build imap-codec BasicFields
/// Return some special informations too
2022-07-20 13:14:34 +00:00
fn headers_to_basic_fields<'a>(
bp: &'a MessagePart<'a>,
size: usize,
) -> Result<(SpecialAttrs<'a>, BasicFields)> {
2022-07-05 15:08:12 +00:00
let (attrs, parameter_list) = attrs_to_params(bp);
let bf = BasicFields {
parameter_list,
id: NString(
bp.get_content_id()
.map(|ci| IString::try_from(ci.to_string()).ok())
.flatten(),
),
description: NString(
bp.get_content_description()
.map(|cd| IString::try_from(cd.to_string()).ok())
.flatten(),
),
/*
* RFC2045 - section 6.1
* "Content-Transfer-Encoding: 7BIT" is assumed if the
* Content-Transfer-Encoding header field is not present.
*/
content_transfer_encoding: bp
.get_content_transfer_encoding()
.map(|h| IString::try_from(h.to_string()).ok())
.flatten()
.unwrap_or(unchecked_istring("7bit")),
size: u32::try_from(size)?,
2022-07-05 15:08:12 +00:00
};
Ok((attrs, bf))
}
2022-07-15 14:15:48 +00:00
fn get_message_section<'a>(
parsed: &'a Message<'a>,
section: &Option<FetchSection>,
) -> Result<Cow<'a, [u8]>> {
match section {
2022-07-20 13:14:34 +00:00
Some(FetchSection::Text(None)) => {
let rp = parsed.get_root_part();
Ok(parsed
.raw_message
.get(rp.offset_body..rp.offset_end)
.ok_or(Error::msg(
"Unable to extract email body, cursors out of bound. This is a bug.",
))?
.into())
}
2022-07-15 14:15:48 +00:00
Some(FetchSection::Text(Some(part))) => {
2022-07-15 15:55:04 +00:00
map_subpart_msg(parsed, part.0.as_slice(), |part_msg| {
2022-07-20 13:14:34 +00:00
let rp = part_msg.get_root_part();
2022-07-15 14:15:48 +00:00
Ok(part_msg
.raw_message
2022-07-20 13:14:34 +00:00
.get(rp.offset_body..rp.offset_end)
2022-07-15 14:15:48 +00:00
.ok_or(Error::msg(
"Unable to extract email body, cursors out of bound. This is a bug.",
))?
.to_vec()
.into())
})
}
2022-07-15 15:55:04 +00:00
Some(FetchSection::Header(part)) => map_subpart_msg(
2022-07-15 14:15:48 +00:00
parsed,
part.as_ref().map(|p| p.0.as_slice()).unwrap_or(&[]),
|part_msg| {
2022-07-20 13:14:34 +00:00
let rp = part_msg.get_root_part();
2022-07-15 14:15:48 +00:00
Ok(part_msg
.raw_message
2022-07-20 13:14:34 +00:00
.get(..rp.offset_body)
2022-07-15 14:15:48 +00:00
.ok_or(Error::msg(
"Unable to extract email header, cursors out of bound. This is a bug.",
))?
.to_vec()
.into())
},
),
Some(
FetchSection::HeaderFields(part, fields) | FetchSection::HeaderFieldsNot(part, fields),
) => {
let invert = matches!(section, Some(FetchSection::HeaderFieldsNot(_, _)));
let fields = fields
.iter()
.map(|x| match x {
AString::Atom(a) => a.as_bytes(),
AString::String(IString::Literal(l)) => l.as_slice(),
AString::String(IString::Quoted(q)) => q.as_bytes(),
})
.collect::<Vec<_>>();
map_subpart_msg(
parsed,
part.as_ref().map(|p| p.0.as_slice()).unwrap_or(&[]),
|part_msg| {
let mut ret = vec![];
for (hn, hv) in part_msg.get_raw_headers() {
if fields
.as_slice()
.iter()
.any(|x| (*x == hn.as_str().as_bytes()) ^ invert)
{
ret.extend(hn.as_str().as_bytes());
ret.extend(b": ");
ret.extend(hv.as_bytes());
}
}
ret.extend(b"\r\n");
Ok(ret.into())
},
)
}
2022-07-15 15:55:04 +00:00
Some(FetchSection::Part(part)) => map_subpart(parsed, part.0.as_slice(), |_msg, part| {
2022-07-20 13:14:34 +00:00
let bytes = match &part.body {
PartType::Text(p) | PartType::Html(p) => p.as_bytes().to_vec(),
PartType::Binary(p) | PartType::InlineBinary(p) => p.to_vec(),
PartType::Message(MessageAttachment::Raw(r)) => r.to_vec(),
PartType::Message(MessageAttachment::Parsed(p)) => p.raw_message.to_vec(),
PartType::Multipart(_) => bail!("Multipart part has no body"),
2022-07-15 14:15:48 +00:00
};
Ok(bytes.into())
}),
2022-07-15 15:55:04 +00:00
Some(FetchSection::Mime(part)) => map_subpart(parsed, part.0.as_slice(), |msg, part| {
2022-07-15 14:15:48 +00:00
let mut ret = vec![];
2022-07-20 13:14:34 +00:00
for (name, body) in part.headers_raw.iter() {
2022-07-15 14:15:48 +00:00
ret.extend(name.as_str().as_bytes());
ret.extend(b": ");
ret.extend(&msg.raw_message[body.start..body.end]);
}
ret.extend(b"\r\n");
Ok(ret.into())
}),
None => Ok(parsed.raw_message.clone()),
}
}
2022-07-15 15:55:04 +00:00
fn map_subpart_msg<'a, F, R>(msg: &Message<'a>, path: &[NonZeroU32], f: F) -> Result<R>
2022-07-15 14:15:48 +00:00
where
F: FnOnce(&Message<'_>) -> Result<R>,
{
if path.is_empty() {
f(msg)
} else {
let part = msg
.parts
.get(path[0].get() as usize - 1)
.ok_or(anyhow!("No such subpart: {}", path[0]))?;
2022-07-20 13:14:34 +00:00
if let PartType::Message(msg_attch) = &part.body {
let part_msg = msg_attch
.get_message()
2022-07-15 14:15:48 +00:00
.ok_or(anyhow!("Cannot parse subpart: {}", path[0]))?;
2022-07-15 15:55:04 +00:00
map_subpart_msg(&part_msg, &path[1..], f)
2022-07-15 14:15:48 +00:00
} else {
bail!("Subpart is not a message: {}", path[0]);
}
}
}
2022-07-15 15:55:04 +00:00
fn map_subpart<'a, F, R>(msg: &Message<'a>, path: &[NonZeroU32], f: F) -> Result<R>
2022-07-15 14:15:48 +00:00
where
F: FnOnce(&Message<'_>, &MessagePart<'_>) -> Result<R>,
{
if path.is_empty() {
bail!("Unexpected empty path");
} else {
let part = msg
.parts
.get(path[0].get() as usize - 1)
.ok_or(anyhow!("No such subpart: {}", path[0]))?;
if path.len() == 1 {
f(msg, part)
} else {
2022-07-20 13:14:34 +00:00
if let PartType::Message(msg_attch) = &part.body {
let part_msg = msg_attch
.get_message()
2022-07-15 14:15:48 +00:00
.ok_or(anyhow!("Cannot parse subpart: {}", path[0]))?;
2022-07-15 15:55:04 +00:00
map_subpart(&part_msg, &path[1..], f)
2022-07-15 14:15:48 +00:00
} else {
bail!("Subpart is not a message: {}", path[0]);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
2022-07-05 13:21:14 +00:00
use imap_codec::codec::Encode;
use std::fs;
2022-07-05 16:27:49 +00:00
/// Future automated test. We use lossy utf8 conversion + lowercase everything,
2022-07-05 13:21:14 +00:00
/// so this test might allow invalid results. But at least it allows us to quickly test a
/// large variety of emails.
/// Keep in mind that special cases must still be tested manually!
#[test]
2022-07-05 13:21:14 +00:00
fn fetch_body() -> Result<()> {
2022-07-05 15:48:10 +00:00
let prefixes = [
"tests/emails/dxflrs/0001_simple",
"tests/emails/dxflrs/0002_mime",
2022-07-05 16:27:49 +00:00
"tests/emails/dxflrs/0003_mime-in-mime",
2022-07-20 11:58:24 +00:00
// broken: numbers of lines/characters not counted correctly
2022-07-20 13:14:34 +00:00
"tests/emails/dxflrs/0004_msg-in-msg",
2022-07-20 11:58:24 +00:00
//"tests/emails/dxflrs/0005_mail-parser-readme",
// broken
2022-07-08 15:48:51 +00:00
//"tests/emails/dxflrs/0006_single-mime",
//"tests/emails/dxflrs/0007_raw_msg_in_rfc822",
//"tests/emails/rfc/000", // broken
// "tests/emails/rfc/001", // broken
// "tests/emails/rfc/002", // broken: dovecot adds \r when it is missing and count is as
// a character. Difference on how lines are counted too.
/*"tests/emails/rfc/003", // broken for the same reason
"tests/emails/thirdparty/000",
"tests/emails/thirdparty/001",
"tests/emails/thirdparty/002",
2022-07-08 15:39:23 +00:00
*/
2022-07-05 15:48:10 +00:00
];
2022-07-05 15:48:10 +00:00
for pref in prefixes.iter() {
println!("{}", pref);
let txt = fs::read(format!("{}.eml", pref))?;
2022-07-08 08:23:07 +00:00
let exp = fs::read(format!("{}.dovecot.body", pref))?;
2022-07-05 15:48:10 +00:00
let message = Message::parse(&txt).unwrap();
2022-07-05 15:48:10 +00:00
let mut resp = Vec::new();
2022-07-20 13:14:34 +00:00
MessageAttribute::Body(build_imap_email_struct(&message, message.get_root_part())?)
2022-07-05 15:48:10 +00:00
.encode(&mut resp);
2022-07-05 15:48:10 +00:00
let resp_str = String::from_utf8_lossy(&resp).to_lowercase();
2022-07-05 13:21:14 +00:00
2022-07-05 15:48:10 +00:00
let exp_no_parenthesis = &exp[1..exp.len() - 1];
let exp_str = String::from_utf8_lossy(exp_no_parenthesis).to_lowercase();
println!("aerogramme: {}\ndovecot: {}", resp_str, exp_str);
2022-07-08 08:23:07 +00:00
//println!("\n\n {} \n\n", String::from_utf8_lossy(&resp));
2022-07-05 15:48:10 +00:00
assert_eq!(resp_str, exp_str);
}
Ok(())
}
}