Merge pull request 'Implement search' (#61) from feat/search into main

Reviewed-on: #61
This commit is contained in:
Quentin 2024-01-08 10:39:26 +00:00
commit d7788e29a8
15 changed files with 3212 additions and 1774 deletions

3966
Cargo.nix vendored

File diff suppressed because it is too large Load diff

View file

@ -22,6 +22,7 @@ impl Default for ServerCapability {
fn default() -> Self {
Self(HashSet::from([
Capability::Imap4Rev1,
Capability::Enable,
Capability::Move,
Capability::LiteralPlus,
capability_unselect(),

View file

@ -111,15 +111,17 @@ impl<'a> ExaminedContext<'a> {
pub async fn search(
self,
_charset: &Option<Charset<'a>>,
_criteria: &SearchKey<'a>,
_uid: &bool,
charset: &Option<Charset<'a>>,
criteria: &SearchKey<'a>,
uid: &bool,
) -> Result<(Response<'static>, flow::Transition)> {
let found = self.mailbox.search(charset, criteria, *uid).await?;
Ok((
Response::build()
.to_req(self.req)
.message("Not implemented")
.bad()?,
.set_body(found)
.message("SEARCH completed")
.ok()?,
flow::Transition::None,
))
}

View file

@ -1,30 +1,41 @@
use anyhow::{anyhow, Result};
use chrono::naive::NaiveDate;
use imap_codec::imap_types::core::{IString, NString};
use imap_codec::imap_types::envelope::{Address, Envelope};
use eml_codec::imf;
/// Envelope rules are defined in RFC 3501, section 7.4.2
/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
///
/// Some important notes:
///
/// If the Sender or Reply-To lines are absent in the [RFC-2822]
/// header, or are present but empty, the server sets the
/// corresponding member of the envelope to be the same value as
/// the from member (the client is not expected to know to do
/// this). Note: [RFC-2822] requires that all messages have a valid
/// From header. Therefore, the from, sender, and reply-to
/// members in the envelope can not be NIL.
///
/// If the Date, Subject, In-Reply-To, and Message-ID header lines
/// are absent in the [RFC-2822] header, the corresponding member
/// of the envelope is NIL; if these header lines are present but
/// empty the corresponding member of the envelope is the empty
/// string.
pub struct ImfView<'a>(pub &'a imf::Imf<'a>);
//@FIXME return an error if the envelope is invalid instead of panicking
//@FIXME some fields must be defaulted if there are not set.
pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> {
impl<'a> ImfView<'a> {
pub fn naive_date(&self) -> Result<NaiveDate> {
Ok(self.0.date.ok_or(anyhow!("date is not set"))?.date_naive())
}
/// Envelope rules are defined in RFC 3501, section 7.4.2
/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
///
/// Some important notes:
///
/// If the Sender or Reply-To lines are absent in the [RFC-2822]
/// header, or are present but empty, the server sets the
/// corresponding member of the envelope to be the same value as
/// the from member (the client is not expected to know to do
/// this). Note: [RFC-2822] requires that all messages have a valid
/// From header. Therefore, the from, sender, and reply-to
/// members in the envelope can not be NIL.
///
/// If the Date, Subject, In-Reply-To, and Message-ID header lines
/// are absent in the [RFC-2822] header, the corresponding member
/// of the envelope is NIL; if these header lines are present but
/// empty the corresponding member of the envelope is the empty
/// string.
//@FIXME return an error if the envelope is invalid instead of panicking
//@FIXME some fields must be defaulted if there are not set.
pub fn message_envelope(&self) -> Envelope<'static> {
let msg = self.0;
let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>();
Envelope {
@ -64,6 +75,7 @@ pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> {
.map(|d| IString::try_from(d.to_string()).unwrap()),
),
}
}
}
pub fn convert_addresses(addrlist: &Vec<imf::address::AddressRef>) -> Vec<Address<'static>> {

View file

@ -1,95 +1,181 @@
use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result};
use imap_codec::imap_types::sequence::{self, SequenceSet};
use anyhow::{anyhow, Context, Result};
use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
use crate::mail::uidindex::{ImapUid, UidIndex};
use crate::mail::unique_ident::UniqueIdent;
pub struct Index<'a>(pub &'a UidIndex);
pub struct Index<'a> {
pub imap_index: Vec<MailIndex<'a>>,
pub internal: &'a UidIndex,
}
impl<'a> Index<'a> {
pub fn fetch(
self: &Index<'a>,
sequence_set: &SequenceSet,
by_uid: bool,
) -> Result<Vec<MailIndex<'a>>> {
let mail_vec = self
.0
pub fn new(internal: &'a UidIndex) -> Result<Self> {
let imap_index = internal
.idx_by_uid
.iter()
.map(|(uid, uuid)| (*uid, *uuid))
.collect::<Vec<_>>();
let mut mails = vec![];
if by_uid {
if mail_vec.is_empty() {
return Ok(vec![]);
}
let iter_strat = sequence::Strategy::Naive {
largest: mail_vec.last().unwrap().0,
};
let mut i = 0;
for uid in sequence_set.iter(iter_strat) {
while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) {
i += 1;
}
if let Some(mail) = mail_vec.get(i) {
if mail.0 == uid {
mails.push(MailIndex {
i: NonZeroU32::try_from(i as u32 + 1).unwrap(),
uid: mail.0,
uuid: mail.1,
flags: self
.0
.enumerate()
.map(|(i_enum, (&uid, &uuid))| {
let flags = internal
.table
.get(&mail.1)
.get(&uuid)
.ok_or(anyhow!("mail is missing from index"))?
.1
.as_ref(),
});
}
} else {
break;
}
}
} else {
if mail_vec.is_empty() {
bail!("No such message (mailbox is empty)");
}
.as_ref();
let i_int: u32 = (i_enum + 1).try_into()?;
let i: NonZeroU32 = i_int.try_into()?;
let iter_strat = sequence::Strategy::Naive {
largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(),
};
for i in sequence_set.iter(iter_strat) {
if let Some(mail) = mail_vec.get(i.get() as usize - 1) {
mails.push(MailIndex {
Ok(MailIndex {
i,
uid: mail.0,
uuid: mail.1,
flags: self
.0
.table
.get(&mail.1)
.ok_or(anyhow!("mail is missing from index"))?
.1
.as_ref(),
});
} else {
bail!("No such mail: {}", i);
uid,
uuid,
flags,
})
})
.collect::<Result<Vec<_>>>()?;
Ok(Self {
imap_index,
internal,
})
}
pub fn last(&'a self) -> Option<&'a MailIndex<'a>> {
self.imap_index.last()
}
/// Fetch mail descriptors based on a sequence of UID
///
/// Complexity analysis:
/// - Sort is O(n * log n) where n is the number of uid generated by the sequence
/// - Finding the starting point in the index O(log m) where m is the size of the mailbox
/// While n =< m, it's not clear if the difference is big or not.
///
/// For now, the algorithm tries to be fast for small values of n,
/// as it is what is expected by clients.
///
/// So we assume for our implementation that : n << m.
/// It's not true for full mailbox searches for example...
pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> {
if self.imap_index.is_empty() {
return vec![];
}
let iter_strat = sequence::Strategy::Naive {
largest: self.last().expect("imap index is not empty").uid,
};
let mut unroll_seq = sequence_set.iter(iter_strat).collect::<Vec<_>>();
unroll_seq.sort();
let start_seq = match unroll_seq.iter().next() {
Some(elem) => elem,
None => return vec![],
};
// Quickly jump to the right point in the mailbox vector O(log m) instead
// of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid.
let mut imap_idx = {
let start_idx = self
.imap_index
.partition_point(|mail_idx| &mail_idx.uid < start_seq);
&self.imap_index[start_idx..]
};
println!(
"win: {:?}",
imap_idx.iter().map(|midx| midx.uid).collect::<Vec<_>>()
);
let mut acc = vec![];
for wanted_uid in unroll_seq.iter() {
// Slide the window forward as long as its first element is lower than our wanted uid.
let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) {
Some(v) => v,
None => break,
};
imap_idx = &imap_idx[start_idx..];
// If the beginning of our new window is the uid we want, we collect it
if &imap_idx[0].uid == wanted_uid {
acc.push(&imap_idx[0]);
}
}
Ok(mails)
acc
}
pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result<Vec<&'a MailIndex<'a>>> {
let iter_strat = sequence::Strategy::Naive {
largest: self.last().context("The mailbox is empty")?.uid,
};
sequence_set
.iter(iter_strat)
.map(|wanted_id| {
self.imap_index
.get((wanted_id.get() as usize) - 1)
.ok_or(anyhow!("Mail not found"))
})
.collect::<Result<Vec<_>>>()
}
pub fn fetch(
self: &'a Index<'a>,
sequence_set: &SequenceSet,
by_uid: bool,
) -> Result<Vec<&'a MailIndex<'a>>> {
match by_uid {
true => Ok(self.fetch_on_uid(sequence_set)),
_ => self.fetch_on_id(sequence_set),
}
}
}
#[derive(Clone, Debug)]
pub struct MailIndex<'a> {
pub i: NonZeroU32,
pub uid: ImapUid,
pub uuid: UniqueIdent,
pub flags: &'a Vec<String>,
}
impl<'a> MailIndex<'a> {
// The following functions are used to implement the SEARCH command
pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool {
match seq {
Sequence::Single(SeqOrUid::Asterisk) => true,
Sequence::Single(SeqOrUid::Value(target)) => target == &self.i,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
if x1 < x2 {
x1 <= &self.i && &self.i <= x2
} else {
x1 >= &self.i && &self.i >= x2
}
}
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
}
}
pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool {
match seq {
Sequence::Single(SeqOrUid::Asterisk) => true,
Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
if x1 < x2 {
x1 <= &self.uid && &self.uid <= x2
} else {
x1 >= &self.uid && &self.uid >= x2
}
}
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
}
}
pub fn is_flag_set(&self, flag: &str) -> bool {
self.flags
.iter()
.any(|candidate| candidate.as_str() == flag)
}
}

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result};
use chrono::{Offset, TimeZone, Utc};
use chrono::{naive::NaiveDate, DateTime as ChronoDateTime, Local, Offset, TimeZone, Utc};
use imap_codec::imap_types::core::NString;
use imap_codec::imap_types::datetime::DateTime;
@ -20,19 +20,22 @@ use crate::mail::query::QueryResult;
use crate::imap::attributes::AttributesProxy;
use crate::imap::flags;
use crate::imap::imf_view::message_envelope;
use crate::imap::imf_view::ImfView;
use crate::imap::index::MailIndex;
use crate::imap::mime_view;
use crate::imap::response::Body;
pub struct MailView<'a> {
pub in_idx: MailIndex<'a>,
pub in_idx: &'a MailIndex<'a>,
pub query_result: &'a QueryResult<'a>,
pub content: FetchedMail<'a>,
}
impl<'a> MailView<'a> {
pub fn new(query_result: &'a QueryResult<'a>, in_idx: MailIndex<'a>) -> Result<MailView<'a>> {
pub fn new(
query_result: &'a QueryResult<'a>,
in_idx: &'a MailIndex<'a>,
) -> Result<MailView<'a>> {
Ok(Self {
in_idx,
query_result,
@ -40,18 +43,88 @@ impl<'a> MailView<'a> {
QueryResult::FullResult { content, .. } => {
let (_, parsed) =
eml_codec::parse_message(&content).or(Err(anyhow!("Invalid mail body")))?;
FetchedMail::new_from_message(parsed)
FetchedMail::full_from_message(parsed)
}
QueryResult::PartialResult { metadata, .. } => {
let (_, parsed) = eml_codec::parse_imf(&metadata.headers)
let (_, parsed) = eml_codec::parse_message(&metadata.headers)
.or(Err(anyhow!("unable to parse email headers")))?;
FetchedMail::Partial(parsed)
FetchedMail::partial_from_message(parsed)
}
QueryResult::IndexResult { .. } => FetchedMail::IndexOnly,
},
})
}
pub fn imf(&self) -> Option<ImfView> {
self.content.as_imf().map(ImfView)
}
pub fn selected_mime(&'a self) -> Option<mime_view::SelectedMime<'a>> {
self.content.as_anypart().ok().map(mime_view::SelectedMime)
}
pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
let mut seen = SeenFlag::DoNothing;
let res_attrs = ap
.attrs
.iter()
.map(|attr| match attr {
MessageDataItemName::Uid => Ok(self.uid()),
MessageDataItemName::Flags => Ok(self.flags()),
MessageDataItemName::Rfc822Size => self.rfc_822_size(),
MessageDataItemName::Rfc822Header => self.rfc_822_header(),
MessageDataItemName::Rfc822Text => self.rfc_822_text(),
MessageDataItemName::Rfc822 => self.rfc822(),
MessageDataItemName::Envelope => Ok(self.envelope()),
MessageDataItemName::Body => self.body(),
MessageDataItemName::BodyStructure => self.body_structure(),
MessageDataItemName::BodyExt {
section,
partial,
peek,
} => {
let (body, has_seen) = self.body_ext(section, partial, peek)?;
seen = has_seen;
Ok(body)
}
MessageDataItemName::InternalDate => self.internal_date(),
})
.collect::<Result<Vec<_>, _>>()?;
Ok((
Body::Data(Data::Fetch {
seq: self.in_idx.i,
items: res_attrs.try_into()?,
}),
seen,
))
}
pub fn stored_naive_date(&self) -> Result<NaiveDate> {
let mail_meta = self.query_result.metadata().expect("metadata were fetched");
let mail_ts: i64 = mail_meta.internaldate.try_into()?;
let msg_date: ChronoDateTime<Local> = ChronoDateTime::from_timestamp(mail_ts, 0)
.ok_or(anyhow!("unable to parse timestamp"))?
.with_timezone(&Local);
Ok(msg_date.date_naive())
}
pub fn is_header_contains_pattern(&self, hdr: &[u8], pattern: &[u8]) -> bool {
let mime = match self.selected_mime() {
None => return false,
Some(x) => x,
};
let val = match mime.header_value(hdr) {
None => return false,
Some(x) => x,
};
val.windows(pattern.len()).any(|win| win == pattern)
}
// Private function, mainly for filter!
fn uid(&self) -> MessageDataItem<'static> {
MessageDataItem::Uid(self.in_idx.uid.clone())
}
@ -87,28 +160,32 @@ impl<'a> MailView<'a> {
}
fn rfc_822_text(&self) -> Result<MessageDataItem<'static>> {
let txt: NString = self.content.as_full()?.raw_body.to_vec().try_into()?;
let txt: NString = self.content.as_msg()?.raw_body.to_vec().try_into()?;
Ok(MessageDataItem::Rfc822Text(txt))
}
fn rfc822(&self) -> Result<MessageDataItem<'static>> {
let full: NString = self.content.as_full()?.raw_part.to_vec().try_into()?;
let full: NString = self.content.as_msg()?.raw_part.to_vec().try_into()?;
Ok(MessageDataItem::Rfc822(full))
}
fn envelope(&self) -> MessageDataItem<'static> {
MessageDataItem::Envelope(message_envelope(self.content.imf().clone()))
MessageDataItem::Envelope(
self.imf()
.expect("an imf object is derivable from fetchedmail")
.message_envelope(),
)
}
fn body(&self) -> Result<MessageDataItem<'static>> {
Ok(MessageDataItem::Body(mime_view::bodystructure(
self.content.as_full()?.child.as_ref(),
self.content.as_msg()?.child.as_ref(),
)?))
}
fn body_structure(&self) -> Result<MessageDataItem<'static>> {
Ok(MessageDataItem::Body(mime_view::bodystructure(
self.content.as_full()?.child.as_ref(),
self.content.as_msg()?.child.as_ref(),
)?))
}
@ -167,43 +244,6 @@ impl<'a> MailView<'a> {
.ok_or(anyhow!("Unable to parse internal date"))?;
Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt)))
}
pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
let mut seen = SeenFlag::DoNothing;
let res_attrs = ap
.attrs
.iter()
.map(|attr| match attr {
MessageDataItemName::Uid => Ok(self.uid()),
MessageDataItemName::Flags => Ok(self.flags()),
MessageDataItemName::Rfc822Size => self.rfc_822_size(),
MessageDataItemName::Rfc822Header => self.rfc_822_header(),
MessageDataItemName::Rfc822Text => self.rfc_822_text(),
MessageDataItemName::Rfc822 => self.rfc822(),
MessageDataItemName::Envelope => Ok(self.envelope()),
MessageDataItemName::Body => self.body(),
MessageDataItemName::BodyStructure => self.body_structure(),
MessageDataItemName::BodyExt {
section,
partial,
peek,
} => {
let (body, has_seen) = self.body_ext(section, partial, peek)?;
seen = has_seen;
Ok(body)
}
MessageDataItemName::InternalDate => self.internal_date(),
})
.collect::<Result<Vec<_>, _>>()?;
Ok((
Body::Data(Data::Fetch {
seq: self.in_idx.i,
items: res_attrs.try_into()?,
}),
seen,
))
}
}
pub enum SeenFlag {
@ -215,33 +255,39 @@ pub enum SeenFlag {
pub enum FetchedMail<'a> {
IndexOnly,
Partial(imf::Imf<'a>),
Partial(AnyPart<'a>),
Full(AnyPart<'a>),
}
impl<'a> FetchedMail<'a> {
pub fn new_from_message(msg: Message<'a>) -> Self {
pub fn full_from_message(msg: Message<'a>) -> Self {
Self::Full(AnyPart::Msg(msg))
}
fn as_anypart(&self) -> Result<&AnyPart<'a>> {
pub fn partial_from_message(msg: Message<'a>) -> Self {
Self::Partial(AnyPart::Msg(msg))
}
pub fn as_anypart(&self) -> Result<&AnyPart<'a>> {
match self {
FetchedMail::Full(x) => Ok(&x),
FetchedMail::Partial(x) => Ok(&x),
_ => bail!("The full message must be fetched, not only its headers"),
}
}
fn as_full(&self) -> Result<&Message<'a>> {
pub fn as_msg(&self) -> Result<&Message<'a>> {
match self {
FetchedMail::Full(AnyPart::Msg(x)) => Ok(&x),
FetchedMail::Partial(AnyPart::Msg(x)) => Ok(&x),
_ => bail!("The full message must be fetched, not only its headers AND it must be an AnyPart::Msg."),
}
}
fn imf(&self) -> &imf::Imf<'a> {
pub fn as_imf(&self) -> Option<&imf::Imf<'a>> {
match self {
FetchedMail::Full(AnyPart::Msg(x)) => &x.imf,
FetchedMail::Partial(x) => &x,
_ => panic!("Can't contain AnyPart that is not a message"),
FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf),
FetchedMail::Partial(AnyPart::Msg(x)) => Some(&x.imf),
_ => None,
}
}
}

View file

@ -146,7 +146,8 @@ impl MailboxView {
let flags = flags.iter().map(|x| x.to_string()).collect::<Vec<_>>();
let mails = self.index().fetch(sequence_set, *is_uid_store)?;
let idx = self.index()?;
let mails = idx.fetch(sequence_set, *is_uid_store)?;
for mi in mails.iter() {
match kind {
StoreType::Add => {
@ -189,7 +190,8 @@ impl MailboxView {
to: Arc<Mailbox>,
is_uid_copy: &bool,
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> {
let mails = self.index().fetch(sequence_set, *is_uid_copy)?;
let idx = self.index()?;
let mails = idx.fetch(sequence_set, *is_uid_copy)?;
let mut new_uuids = vec![];
for mi in mails.iter() {
@ -216,7 +218,8 @@ impl MailboxView {
to: Arc<Mailbox>,
is_uid_copy: &bool,
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec<Body<'static>>)> {
let mails = self.index().fetch(sequence_set, *is_uid_copy)?;
let idx = self.index()?;
let mails = idx.fetch(sequence_set, *is_uid_copy)?;
for mi in mails.iter() {
to.move_from(&self.0.mailbox, mi.uuid).await?;
@ -254,7 +257,8 @@ impl MailboxView {
true => QueryScope::Full,
_ => QueryScope::Partial,
};
let mail_idx_list = self.index().fetch(sequence_set, *is_uid_fetch)?;
let idx = self.index()?;
let mail_idx_list = idx.fetch(sequence_set, *is_uid_fetch)?;
// [2/6] Fetch the emails
let uuids = mail_idx_list
@ -316,29 +320,38 @@ impl MailboxView {
let (seq_set, seq_type) = crit.to_sequence_set();
// 2. Get the selection
let selection = self.index().fetch(&seq_set, seq_type.is_uid())?;
let idx = self.index()?;
let selection = idx.fetch(&seq_set, seq_type.is_uid())?;
// 3. Filter the selection based on the ID / UID / Flags
let (kept_idx, to_fetch) = crit.filter_on_idx(&selection);
// 4. If needed, filter the selection based on the metadata
let _need_meta = crit.need_meta();
// 4. Fetch additional info about the emails
let query_scope = crit.query_scope();
let uuids = to_fetch.iter().map(|midx| midx.uuid).collect::<Vec<_>>();
let query_result = self.0.query(&uuids, query_scope).fetch().await?;
// 5. If needed, filter the selection based on the body
let _need_body = crit.need_body();
let kept_query = crit.filter_on_query(&to_fetch, &query_result)?;
// 6. Format the result according to the client's taste:
// either return UID or ID.
let final_selection = kept_idx.into_iter().chain(kept_query.into_iter());
let selection_fmt = match uid {
true => selection.into_iter().map(|in_idx| in_idx.uid).collect(),
_ => selection.into_iter().map(|in_idx| in_idx.i).collect(),
true => final_selection.map(|in_idx| in_idx.uid).collect(),
_ => final_selection.map(|in_idx| in_idx.i).collect(),
};
Ok(vec![Body::Data(Data::Search(selection_fmt))])
}
// ----
fn index<'a>(&'a self) -> Index<'a> {
Index(&self.0.snapshot)
/// @FIXME index should be stored for longer than a single request
/// Instead they should be tied to the FrozenMailbox refresh
/// It's not trivial to refactor the code to do that, so we are doing
/// some useless computation for now...
fn index<'a>(&'a self) -> Result<Index<'a>> {
Index::new(&self.0.snapshot)
}
/// Produce an OK [UIDVALIDITY _] message corresponding to `known_state`
@ -513,7 +526,7 @@ mod tests {
content: rfc822.to_vec(),
};
let mv = MailView::new(&qr, mail_in_idx)?;
let mv = MailView::new(&qr, &mail_in_idx)?;
let (res_body, _seen) = mv.filter(&ap)?;
let fattr = match res_body {

View file

@ -12,7 +12,7 @@ use eml_codec::{
header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart,
};
use crate::imap::imf_view::message_envelope;
use crate::imap::imf_view::ImfView;
pub enum BodySection<'a> {
Full(Cow<'a, [u8]>),
@ -164,8 +164,23 @@ impl<'a> SubsettedSection<'a> {
/// Used for current MIME inspection
///
/// See NodeMime for recursive logic
struct SelectedMime<'a>(&'a AnyPart<'a>);
pub struct SelectedMime<'a>(pub &'a AnyPart<'a>);
impl<'a> SelectedMime<'a> {
pub fn header_value(&'a self, to_match_ext: &[u8]) -> Option<&'a [u8]> {
let to_match = to_match_ext.to_ascii_lowercase();
self.eml_mime()
.kv
.iter()
.filter_map(|field| match field {
header::Field::Good(header::Kv2(k, v)) => Some((k, v)),
_ => None,
})
.find(|(k, _)| k.to_ascii_lowercase() == to_match)
.map(|(_, v)| v)
.copied()
}
/// The subsetted fetch section basically tells us the
/// extraction logic to apply on our selected MIME.
/// This function acts as a router for these logic.
@ -200,6 +215,13 @@ impl<'a> SelectedMime<'a> {
Ok(ExtractedFull(bytes.to_vec().into()))
}
fn eml_mime(&self) -> &eml_codec::mime::NaiveMIME<'_> {
match &self.0 {
AnyPart::Msg(msg) => msg.child.mime(),
other => other.mime(),
}
}
/// The [...] HEADER.FIELDS, and HEADER.FIELDS.NOT part
/// specifiers refer to the [RFC-2822] header of the message or of
/// an encapsulated [MIME-IMT] MESSAGE/RFC822 message.
@ -231,10 +253,7 @@ impl<'a> SelectedMime<'a> {
.collect::<HashSet<_>>();
// Extract MIME headers
let mime = match &self.0 {
AnyPart::Msg(msg) => msg.child.mime(),
other => other.mime(),
};
let mime = self.eml_mime();
// Filter our MIME headers based on the field index
// 1. Keep only the correctly formatted headers
@ -347,7 +366,7 @@ impl<'a> NodeMsg<'a> {
body: FetchBody {
basic,
specific: SpecificFields::Message {
envelope: Box::new(message_envelope(&self.1.imf)),
envelope: Box::new(ImfView(&self.1.imf).message_envelope()),
body_structure: Box::new(NodeMime(&self.1.child).structure()?),
number_of_lines: nol(self.1.raw_part),
},

View file

@ -1,7 +1,13 @@
use std::num::NonZeroU32;
use anyhow::Result;
use imap_codec::imap_types::core::NonEmptyVec;
use imap_codec::imap_types::search::SearchKey;
use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet};
use std::num::NonZeroU32;
use crate::imap::index::MailIndex;
use crate::imap::mail_view::MailView;
use crate::mail::query::{QueryResult, QueryScope};
pub enum SeqType {
Undefined,
@ -54,6 +60,10 @@ impl<'a> Criteria<'a> {
tracing::debug!(
"using AND in a search request is slow: no intersection is performed"
);
// As we perform no intersection, we don't care if we mix uid or id.
// We only keep the smallest range, being it ID or UID, depending of
// which one has the less items. This is an approximation as UID ranges
// can have holes while ID ones can't.
search_list
.as_ref()
.iter()
@ -72,35 +82,227 @@ impl<'a> Criteria<'a> {
/// Not really clever as we can have cases where we filter out
/// the email before needing to inspect its meta.
/// But for now we are seeking the most basic/stupid algorithm.
pub fn need_meta(&self) -> bool {
pub fn query_scope(&self) -> QueryScope {
use SearchKey::*;
match self.0 {
// Combinators
And(and_list) => and_list
.as_ref()
.iter()
.fold(QueryScope::Index, |prev, sk| {
prev.union(&Criteria(sk).query_scope())
}),
Not(inner) => Criteria(inner).query_scope(),
Or(left, right) => Criteria(left)
.query_scope()
.union(&Criteria(right).query_scope()),
All => QueryScope::Index,
// IMF Headers
Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_)
| Subject(_) | To(_) => true,
| Subject(_) | To(_) => QueryScope::Partial,
// Internal Date is also stored in MailMeta
Before(_) | On(_) | Since(_) => true,
Before(_) | On(_) | Since(_) => QueryScope::Partial,
// Message size is also stored in MailMeta
Larger(_) | Smaller(_) => true,
And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()),
Not(inner) => Criteria(inner).need_meta(),
Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(),
_ => false,
Larger(_) | Smaller(_) => QueryScope::Partial,
// Text and Body require that we fetch the full content!
Text(_) | Body(_) => QueryScope::Full,
_ => QueryScope::Index,
}
}
pub fn need_body(&self) -> bool {
/// Returns emails that we now for sure we want to keep
/// but also a second list of emails we need to investigate further by
/// fetching some remote data
pub fn filter_on_idx<'b>(
&self,
midx_list: &[&'b MailIndex<'b>],
) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) {
let (p1, p2): (Vec<_>, Vec<_>) = midx_list
.iter()
.map(|x| (x, self.is_keep_on_idx(x)))
.filter(|(_midx, decision)| decision.is_keep())
.map(|(midx, decision)| (*midx, decision))
.partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep));
let to_keep = p1.into_iter().map(|(v, _)| v).collect();
let to_fetch = p2.into_iter().map(|(v, _)| v).collect();
(to_keep, to_fetch)
}
pub fn filter_on_query<'b>(
&self,
midx_list: &[&'b MailIndex<'b>],
query_result: &'b Vec<QueryResult<'b>>,
) -> Result<Vec<&'b MailIndex<'b>>> {
Ok(midx_list
.iter()
.zip(query_result.iter())
.map(|(midx, qr)| MailView::new(qr, midx))
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.filter(|mail_view| self.is_keep_on_query(mail_view))
.map(|mail_view| mail_view.in_idx)
.collect())
}
// ----
/// Here we are doing a partial filtering: we do not have access
/// to the headers or to the body, so every time we encounter a rule
/// based on them, we need to keep it.
///
/// @TODO Could be optimized on a per-email basis by also returning the QueryScope
/// when more information is needed!
fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision {
use SearchKey::*;
match self.0 {
Text(_) | Body(_) => true,
And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()),
Not(inner) => Criteria(inner).need_body(),
Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(),
// Combinator logic
And(expr_list) => expr_list
.as_ref()
.iter()
.fold(PartialDecision::Keep, |acc, cur| {
acc.and(&Criteria(cur).is_keep_on_idx(midx))
}),
Or(left, right) => {
let left_decision = Criteria(left).is_keep_on_idx(midx);
let right_decision = Criteria(right).is_keep_on_idx(midx);
left_decision.or(&right_decision)
}
Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(),
All => PartialDecision::Keep,
// Sequence logic
maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(),
maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(),
// All the stuff we can't evaluate yet
Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_)
| Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_)
| Text(_) | Body(_) => PartialDecision::Postpone,
unknown => {
tracing::error!("Unknown filter {:?}", unknown);
PartialDecision::Discard
}
}
}
/// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass,
/// to simplify the searck query, by removing the elements that were already checked.
/// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure
/// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true,
/// we could simplify the request to just body(x) and truncate the first OR. Today, we are
/// not doing that, and thus we reevaluate everything.
fn is_keep_on_query(&self, mail_view: &MailView) -> bool {
use SearchKey::*;
match self.0 {
// Combinator logic
And(expr_list) => expr_list
.as_ref()
.iter()
.all(|cur| Criteria(cur).is_keep_on_query(mail_view)),
Or(left, right) => {
Criteria(left).is_keep_on_query(mail_view)
|| Criteria(right).is_keep_on_query(mail_view)
}
Not(expr) => !Criteria(expr).is_keep_on_query(mail_view),
All => true,
// Reevaluating our previous logic...
maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, &mail_view.in_idx),
maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, &mail_view.in_idx),
// Filter on mail meta
Before(search_naive) => match mail_view.stored_naive_date() {
Ok(msg_naive) => &msg_naive < search_naive.as_ref(),
_ => false,
},
On(search_naive) => match mail_view.stored_naive_date() {
Ok(msg_naive) => &msg_naive == search_naive.as_ref(),
_ => false,
},
Since(search_naive) => match mail_view.stored_naive_date() {
Ok(msg_naive) => &msg_naive > search_naive.as_ref(),
_ => false,
},
// Message size is also stored in MailMeta
Larger(size_ref) => {
mail_view
.query_result
.metadata()
.expect("metadata were fetched")
.rfc822_size
> *size_ref as usize
}
Smaller(size_ref) => {
mail_view
.query_result
.metadata()
.expect("metadata were fetched")
.rfc822_size
< *size_ref as usize
}
// Filter on well-known headers
Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()),
Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()),
From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()),
Subject(txt) => mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()),
To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()),
Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()),
// Filter on Date header
SentBefore(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive < search_naive.as_ref())
.unwrap_or(false),
SentOn(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive == search_naive.as_ref())
.unwrap_or(false),
SentSince(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive > search_naive.as_ref())
.unwrap_or(false),
// Filter on the full content of the email
Text(txt) => mail_view
.content
.as_msg()
.map(|msg| {
msg.raw_part
.windows(txt.as_ref().len())
.any(|win| win == txt.as_ref())
})
.unwrap_or(false),
Body(txt) => mail_view
.content
.as_msg()
.map(|msg| {
msg.raw_body
.windows(txt.as_ref().len())
.any(|win| win == txt.as_ref())
})
.unwrap_or(false),
unknown => {
tracing::error!("Unknown filter {:?}", unknown);
false
}
}
}
}
// ---- Sequence things ----
fn sequence_set_all() -> SequenceSet {
SequenceSet::from(Sequence::Range(
SeqOrUid::Value(NonZeroU32::MIN),
@ -128,3 +330,131 @@ fn approx_sequence_size(seq: &Sequence) -> u64 {
}
}
}
// --- Partial decision things ----
enum PartialDecision {
Keep,
Discard,
Postpone,
}
impl From<bool> for PartialDecision {
fn from(x: bool) -> Self {
match x {
true => PartialDecision::Keep,
_ => PartialDecision::Discard,
}
}
}
impl PartialDecision {
fn not(&self) -> Self {
match self {
Self::Keep => Self::Discard,
Self::Discard => Self::Keep,
Self::Postpone => Self::Postpone,
}
}
fn or(&self, other: &Self) -> Self {
match (self, other) {
(Self::Keep, _) | (_, Self::Keep) => Self::Keep,
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Discard, Self::Discard) => Self::Discard,
}
}
fn and(&self, other: &Self) -> Self {
match (self, other) {
(Self::Discard, _) | (_, Self::Discard) => Self::Discard,
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Keep, Self::Keep) => Self::Keep,
}
}
fn is_keep(&self) -> bool {
!matches!(self, Self::Discard)
}
}
// ----- Search Key things ---
fn is_sk_flag(sk: &SearchKey) -> bool {
use SearchKey::*;
match sk {
Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen
| Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true,
_ => false,
}
}
fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool {
use SearchKey::*;
match sk {
Answered => midx.is_flag_set("\\Answered"),
Deleted => midx.is_flag_set("\\Deleted"),
Draft => midx.is_flag_set("\\Draft"),
Flagged => midx.is_flag_set("\\Flagged"),
Keyword(kw) => midx.is_flag_set(kw.inner()),
New => {
let is_recent = midx.is_flag_set("\\Recent");
let is_seen = midx.is_flag_set("\\Seen");
is_recent && !is_seen
}
Old => {
let is_recent = midx.is_flag_set("\\Recent");
!is_recent
}
Recent => midx.is_flag_set("\\Recent"),
Seen => midx.is_flag_set("\\Seen"),
Unanswered => {
let is_answered = midx.is_flag_set("\\Recent");
!is_answered
}
Undeleted => {
let is_deleted = midx.is_flag_set("\\Deleted");
!is_deleted
}
Undraft => {
let is_draft = midx.is_flag_set("\\Draft");
!is_draft
}
Unflagged => {
let is_flagged = midx.is_flag_set("\\Flagged");
!is_flagged
}
Unkeyword(kw) => {
let is_keyword_set = midx.is_flag_set(kw.inner());
!is_keyword_set
}
Unseen => {
let is_seen = midx.is_flag_set("\\Seen");
!is_seen
}
// Not flag logic
_ => unreachable!(),
}
}
fn is_sk_seq(sk: &SearchKey) -> bool {
use SearchKey::*;
match sk {
SequenceSet(..) | Uid(..) => true,
_ => false,
}
}
fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool {
use SearchKey::*;
match sk {
SequenceSet(seq_set) => seq_set
.0
.as_ref()
.iter()
.any(|seq| midx.is_in_sequence_i(seq)),
Uid(seq_set) => seq_set
.0
.as_ref()
.iter()
.any(|seq| midx.is_in_sequence_uid(seq)),
_ => unreachable!(),
}
}

View file

@ -39,7 +39,11 @@ impl Mailbox {
.await?;
}
// @FIXME reporting through opentelemetry or some logs
// info on the "shape" of the mailbox would be welcomed
/*
dump(&uid_index);
*/
let mbox = RwLock::new(MailboxInternal {
id,

View file

@ -1,5 +1,4 @@
use std::convert::TryFrom;
use std::io::Write;
pub mod incoming;
pub mod mailbox;
@ -22,9 +21,6 @@ impl<'a> TryFrom<&'a [u8]> for IMF<'a> {
type Error = ();
fn try_from(body: &'a [u8]) -> Result<IMF<'a>, ()> {
eprintln!("---- BEGIN PARSED MESSAGE ----");
let _ = std::io::stderr().write_all(body);
eprintln!("---- END PARSED MESSAGE ----");
let parsed = eml_codec::parse_message(body).or(Err(()))?.1;
Ok(Self { raw: body, parsed })
}

View file

@ -19,6 +19,15 @@ pub enum QueryScope {
Partial,
Full,
}
impl QueryScope {
pub fn union(&self, other: &QueryScope) -> QueryScope {
match (self, other) {
(QueryScope::Full, _) | (_, QueryScope::Full) => QueryScope::Full,
(QueryScope::Partial, _) | (_, QueryScope::Partial) => QueryScope::Partial,
(QueryScope::Index, QueryScope::Index) => QueryScope::Index,
}
}
}
impl<'a, 'b> Query<'a, 'b> {
pub async fn fetch(&self) -> Result<Vec<QueryResult<'a>>> {

View file

@ -11,8 +11,6 @@ use super::unique_ident::UniqueIdent;
/// state that is desynchronized with the real mailbox state.
/// It's up to the user to choose when their snapshot must be updated
/// to give useful information to their clients
///
///
pub struct FrozenMailbox {
pub mailbox: Arc<Mailbox>,
pub snapshot: UidIndex,

View file

@ -71,10 +71,15 @@ impl User {
/// Opens an existing mailbox given its IMAP name.
pub async fn open_mailbox(&self, name: &str) -> Result<Option<Arc<Mailbox>>> {
let (mut list, ct) = self.load_mailbox_list().await?;
//@FIXME it could be a trace or an opentelemtry trace thing.
// Be careful to not leak sensible data
/*
eprintln!("List of mailboxes:");
for ent in list.0.iter() {
eprintln!(" - {:?}", ent);
}
*/
if let Some((uidvalidity, Some(mbid))) = list.get_mailbox(name) {
let mb = self.open_mailbox_by_id(mbid, uidvalidity).await?;

View file

@ -0,0 +1,39 @@
from imaplib import IMAP4_SSL, IMAP4
from os import listdir
from os.path import isfile, join
import sys
import argparse
import mailbox
parser = argparse.ArgumentParser(
prog='mbox-to-imap',
description='Send an mbox to an imap server',
epilog='Just a debug tool')
parser.add_argument('mbox_path') # positional argument
parser.add_argument('-H', '--host', default="localhost")
parser.add_argument('-p', '--port', default="143")
parser.add_argument('-u', '--user')
parser.add_argument('-s', '--password')
parser.add_argument('-m', '--mailbox', default="INBOX")
parser.add_argument('-t', '--tls', action='store_true')
args = parser.parse_args()
mbox = mailbox.mbox(args.mbox_path)
if args.tls:
imap = IMAP4_SSL
else:
imap = IMAP4
print(args)
with imap(host=args.host, port=args.port) as M:
print(M.login(args.user, args.password))
print(M.select(args.mailbox))
for k in mbox.keys():
content = mbox.get(k).as_bytes()
M.append(args.mailbox, [], None, content)
print(f"{k}/{len(mbox)}")