Implement search #61
6 changed files with 116 additions and 87 deletions
|
@ -1,93 +1,113 @@
|
||||||
use std::num::NonZeroU32;
|
use std::num::NonZeroU32;
|
||||||
|
|
||||||
use anyhow::{anyhow, bail, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
|
use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
|
||||||
|
|
||||||
use crate::mail::uidindex::{ImapUid, UidIndex};
|
use crate::mail::uidindex::{ImapUid, UidIndex};
|
||||||
use crate::mail::unique_ident::UniqueIdent;
|
use crate::mail::unique_ident::UniqueIdent;
|
||||||
|
|
||||||
pub struct Index<'a>(pub &'a UidIndex);
|
pub struct Index<'a> {
|
||||||
|
pub imap_index: Vec<MailIndex<'a>>,
|
||||||
|
pub internal: &'a UidIndex,
|
||||||
|
}
|
||||||
impl<'a> Index<'a> {
|
impl<'a> Index<'a> {
|
||||||
pub fn fetch(
|
pub fn new(internal: &'a UidIndex) -> Result<Self> {
|
||||||
self: &Index<'a>,
|
let imap_index = internal
|
||||||
sequence_set: &SequenceSet,
|
|
||||||
by_uid: bool,
|
|
||||||
) -> Result<Vec<MailIndex<'a>>> {
|
|
||||||
let mail_vec = self
|
|
||||||
.0
|
|
||||||
.idx_by_uid
|
.idx_by_uid
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(uid, uuid)| (*uid, *uuid))
|
.enumerate()
|
||||||
.collect::<Vec<_>>();
|
.map(|(i_enum, (&uid, &uuid))| {
|
||||||
|
let flags = internal.table.get(&uuid).ok_or(anyhow!("mail is missing from index"))?.1.as_ref();
|
||||||
|
let i_int: u32 = (i_enum + 1).try_into()?;
|
||||||
|
let i: NonZeroU32 = i_int.try_into()?;
|
||||||
|
|
||||||
let mut mails = vec![];
|
Ok(MailIndex { i, uid, uuid, flags })
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
if by_uid {
|
Ok(Self { imap_index, internal })
|
||||||
if mail_vec.is_empty() {
|
}
|
||||||
return Ok(vec![]);
|
|
||||||
}
|
pub fn last(&'a self) -> Option<&'a MailIndex<'a>> {
|
||||||
let iter_strat = sequence::Strategy::Naive {
|
self.imap_index.last()
|
||||||
largest: mail_vec.last().unwrap().0,
|
}
|
||||||
|
|
||||||
|
/// Fetch mail descriptors based on a sequence of UID
|
||||||
|
///
|
||||||
|
/// Complexity analysis:
|
||||||
|
/// - Sort is O(n * log n) where n is the number of uid generated by the sequence
|
||||||
|
/// - Finding the starting point in the index O(log m) where m is the size of the mailbox
|
||||||
|
/// While n =< m, it's not clear if the difference is big or not.
|
||||||
|
///
|
||||||
|
/// For now, the algorithm tries to be fast for small values of n,
|
||||||
|
/// as it is what is expected by clients.
|
||||||
|
///
|
||||||
|
/// So we assume for our implementation that : n << m.
|
||||||
|
/// It's not true for full mailbox searches for example...
|
||||||
|
pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> {
|
||||||
|
if self.imap_index.is_empty() {
|
||||||
|
return vec![];
|
||||||
|
}
|
||||||
|
let iter_strat = sequence::Strategy::Naive {
|
||||||
|
largest: self.last().expect("imap index is not empty").uid,
|
||||||
|
};
|
||||||
|
let mut unroll_seq = sequence_set.iter(iter_strat).collect::<Vec<_>>();
|
||||||
|
unroll_seq.sort();
|
||||||
|
|
||||||
|
let start_seq = match unroll_seq.iter().next() {
|
||||||
|
Some(elem) => elem,
|
||||||
|
None => return vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Quickly jump to the right point in the mailbox vector O(log m) instead
|
||||||
|
// of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid.
|
||||||
|
let mut imap_idx = {
|
||||||
|
let start_idx = self.imap_index.partition_point(|mail_idx| &mail_idx.uid < start_seq);
|
||||||
|
&self.imap_index[start_idx..]
|
||||||
|
};
|
||||||
|
println!("win: {:?}", imap_idx.iter().map(|midx| midx.uid).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
let mut acc = vec![];
|
||||||
|
for wanted_uid in unroll_seq.iter() {
|
||||||
|
// Slide the window forward as long as its first element is lower than our wanted uid.
|
||||||
|
let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) {
|
||||||
|
Some(v) => v,
|
||||||
|
None => break,
|
||||||
};
|
};
|
||||||
|
imap_idx = &imap_idx[start_idx..];
|
||||||
|
|
||||||
let mut i = 0;
|
// If the beginning of our new window is the uid we want, we collect it
|
||||||
for uid in sequence_set.iter(iter_strat) {
|
if &imap_idx[0].uid == wanted_uid {
|
||||||
while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) {
|
acc.push(&imap_idx[0]);
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
if let Some(mail) = mail_vec.get(i) {
|
|
||||||
if mail.0 == uid {
|
|
||||||
mails.push(MailIndex {
|
|
||||||
i: NonZeroU32::try_from(i as u32 + 1).unwrap(),
|
|
||||||
uid: mail.0,
|
|
||||||
uuid: mail.1,
|
|
||||||
flags: self
|
|
||||||
.0
|
|
||||||
.table
|
|
||||||
.get(&mail.1)
|
|
||||||
.ok_or(anyhow!("mail is missing from index"))?
|
|
||||||
.1
|
|
||||||
.as_ref(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if mail_vec.is_empty() {
|
|
||||||
bail!("No such message (mailbox is empty)");
|
|
||||||
}
|
|
||||||
|
|
||||||
let iter_strat = sequence::Strategy::Naive {
|
|
||||||
largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(),
|
|
||||||
};
|
|
||||||
|
|
||||||
for i in sequence_set.iter(iter_strat) {
|
|
||||||
if let Some(mail) = mail_vec.get(i.get() as usize - 1) {
|
|
||||||
mails.push(MailIndex {
|
|
||||||
i,
|
|
||||||
uid: mail.0,
|
|
||||||
uuid: mail.1,
|
|
||||||
flags: self
|
|
||||||
.0
|
|
||||||
.table
|
|
||||||
.get(&mail.1)
|
|
||||||
.ok_or(anyhow!("mail is missing from index"))?
|
|
||||||
.1
|
|
||||||
.as_ref(),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
bail!("No such mail: {}", i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(mails)
|
acc
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result<Vec<&'a MailIndex<'a>>> {
|
||||||
|
let iter_strat = sequence::Strategy::Naive {
|
||||||
|
largest: self.last().context("The mailbox is empty")?.uid,
|
||||||
|
};
|
||||||
|
sequence_set
|
||||||
|
.iter(iter_strat)
|
||||||
|
.map(|wanted_id| self.imap_index.get((wanted_id.get() as usize) - 1).ok_or(anyhow!("Mail not found")))
|
||||||
|
.collect::<Result<Vec<_>>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fetch(
|
||||||
|
self: &'a Index<'a>,
|
||||||
|
sequence_set: &SequenceSet,
|
||||||
|
by_uid: bool,
|
||||||
|
) -> Result<Vec<&'a MailIndex<'a>>> {
|
||||||
|
match by_uid {
|
||||||
|
true => Ok(self.fetch_on_uid(sequence_set)),
|
||||||
|
_ => self.fetch_on_id(sequence_set),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct MailIndex<'a> {
|
pub struct MailIndex<'a> {
|
||||||
pub i: NonZeroU32,
|
pub i: NonZeroU32,
|
||||||
pub uid: ImapUid,
|
pub uid: ImapUid,
|
||||||
|
|
|
@ -26,13 +26,13 @@ use crate::imap::mime_view;
|
||||||
use crate::imap::response::Body;
|
use crate::imap::response::Body;
|
||||||
|
|
||||||
pub struct MailView<'a> {
|
pub struct MailView<'a> {
|
||||||
pub in_idx: MailIndex<'a>,
|
pub in_idx: &'a MailIndex<'a>,
|
||||||
pub query_result: &'a QueryResult<'a>,
|
pub query_result: &'a QueryResult<'a>,
|
||||||
pub content: FetchedMail<'a>,
|
pub content: FetchedMail<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> MailView<'a> {
|
impl<'a> MailView<'a> {
|
||||||
pub fn new(query_result: &'a QueryResult<'a>, in_idx: MailIndex<'a>) -> Result<MailView<'a>> {
|
pub fn new(query_result: &'a QueryResult<'a>, in_idx: &'a MailIndex<'a>) -> Result<MailView<'a>> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
in_idx,
|
in_idx,
|
||||||
query_result,
|
query_result,
|
||||||
|
|
|
@ -146,7 +146,8 @@ impl MailboxView {
|
||||||
|
|
||||||
let flags = flags.iter().map(|x| x.to_string()).collect::<Vec<_>>();
|
let flags = flags.iter().map(|x| x.to_string()).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mails = self.index().fetch(sequence_set, *is_uid_store)?;
|
let idx = self.index()?;
|
||||||
|
let mails = idx.fetch(sequence_set, *is_uid_store)?;
|
||||||
for mi in mails.iter() {
|
for mi in mails.iter() {
|
||||||
match kind {
|
match kind {
|
||||||
StoreType::Add => {
|
StoreType::Add => {
|
||||||
|
@ -189,7 +190,8 @@ impl MailboxView {
|
||||||
to: Arc<Mailbox>,
|
to: Arc<Mailbox>,
|
||||||
is_uid_copy: &bool,
|
is_uid_copy: &bool,
|
||||||
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> {
|
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> {
|
||||||
let mails = self.index().fetch(sequence_set, *is_uid_copy)?;
|
let idx = self.index()?;
|
||||||
|
let mails = idx.fetch(sequence_set, *is_uid_copy)?;
|
||||||
|
|
||||||
let mut new_uuids = vec![];
|
let mut new_uuids = vec![];
|
||||||
for mi in mails.iter() {
|
for mi in mails.iter() {
|
||||||
|
@ -216,7 +218,8 @@ impl MailboxView {
|
||||||
to: Arc<Mailbox>,
|
to: Arc<Mailbox>,
|
||||||
is_uid_copy: &bool,
|
is_uid_copy: &bool,
|
||||||
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec<Body<'static>>)> {
|
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec<Body<'static>>)> {
|
||||||
let mails = self.index().fetch(sequence_set, *is_uid_copy)?;
|
let idx = self.index()?;
|
||||||
|
let mails = idx.fetch(sequence_set, *is_uid_copy)?;
|
||||||
|
|
||||||
for mi in mails.iter() {
|
for mi in mails.iter() {
|
||||||
to.move_from(&self.0.mailbox, mi.uuid).await?;
|
to.move_from(&self.0.mailbox, mi.uuid).await?;
|
||||||
|
@ -254,7 +257,8 @@ impl MailboxView {
|
||||||
true => QueryScope::Full,
|
true => QueryScope::Full,
|
||||||
_ => QueryScope::Partial,
|
_ => QueryScope::Partial,
|
||||||
};
|
};
|
||||||
let mail_idx_list = self.index().fetch(sequence_set, *is_uid_fetch)?;
|
let idx = self.index()?;
|
||||||
|
let mail_idx_list = idx.fetch(sequence_set, *is_uid_fetch)?;
|
||||||
|
|
||||||
// [2/6] Fetch the emails
|
// [2/6] Fetch the emails
|
||||||
let uuids = mail_idx_list
|
let uuids = mail_idx_list
|
||||||
|
@ -316,7 +320,8 @@ impl MailboxView {
|
||||||
let (seq_set, seq_type) = crit.to_sequence_set();
|
let (seq_set, seq_type) = crit.to_sequence_set();
|
||||||
|
|
||||||
// 2. Get the selection
|
// 2. Get the selection
|
||||||
let selection = self.index().fetch(&seq_set, seq_type.is_uid())?;
|
let idx = self.index()?;
|
||||||
|
let selection = idx.fetch(&seq_set, seq_type.is_uid())?;
|
||||||
|
|
||||||
// 3. Filter the selection based on the ID / UID / Flags
|
// 3. Filter the selection based on the ID / UID / Flags
|
||||||
let (kept_idx, to_fetch) = crit.filter_on_idx(&selection);
|
let (kept_idx, to_fetch) = crit.filter_on_idx(&selection);
|
||||||
|
@ -341,8 +346,12 @@ impl MailboxView {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----
|
// ----
|
||||||
fn index<'a>(&'a self) -> Index<'a> {
|
/// @FIXME index should be stored for longer than a single request
|
||||||
Index(&self.0.snapshot)
|
/// Instead they should be tied to the FrozenMailbox refresh
|
||||||
|
/// It's not trivial to refactor the code to do that, so we are doing
|
||||||
|
/// some useless computation for now...
|
||||||
|
fn index<'a>(&'a self) -> Result<Index<'a>> {
|
||||||
|
Index::new(&self.0.snapshot)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produce an OK [UIDVALIDITY _] message corresponding to `known_state`
|
/// Produce an OK [UIDVALIDITY _] message corresponding to `known_state`
|
||||||
|
|
|
@ -117,13 +117,13 @@ impl<'a> Criteria<'a> {
|
||||||
/// fetching some remote data
|
/// fetching some remote data
|
||||||
pub fn filter_on_idx<'b>(
|
pub fn filter_on_idx<'b>(
|
||||||
&self,
|
&self,
|
||||||
midx_list: &[MailIndex<'b>],
|
midx_list: &[&'b MailIndex<'b>],
|
||||||
) -> (Vec<MailIndex<'b>>, Vec<MailIndex<'b>>) {
|
) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) {
|
||||||
let (p1, p2): (Vec<_>, Vec<_>) = midx_list
|
let (p1, p2): (Vec<_>, Vec<_>) = midx_list
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| (x, self.is_keep_on_idx(x)))
|
.map(|x| (x, self.is_keep_on_idx(x)))
|
||||||
.filter(|(_midx, decision)| decision.is_keep())
|
.filter(|(_midx, decision)| decision.is_keep())
|
||||||
.map(|(midx, decision)| ((*midx).clone(), decision))
|
.map(|(midx, decision)| (*midx, decision))
|
||||||
.partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep));
|
.partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep));
|
||||||
|
|
||||||
let to_keep = p1.into_iter().map(|(v, _)| v).collect();
|
let to_keep = p1.into_iter().map(|(v, _)| v).collect();
|
||||||
|
@ -133,13 +133,13 @@ impl<'a> Criteria<'a> {
|
||||||
|
|
||||||
pub fn filter_on_query<'b>(
|
pub fn filter_on_query<'b>(
|
||||||
&self,
|
&self,
|
||||||
midx_list: &[MailIndex<'b>],
|
midx_list: &[&'b MailIndex<'b>],
|
||||||
query_result: &'b Vec<QueryResult<'b>>,
|
query_result: &'b Vec<QueryResult<'b>>,
|
||||||
) -> Result<Vec<MailIndex<'b>>> {
|
) -> Result<Vec<&'b MailIndex<'b>>> {
|
||||||
Ok(midx_list
|
Ok(midx_list
|
||||||
.iter()
|
.iter()
|
||||||
.zip(query_result.iter())
|
.zip(query_result.iter())
|
||||||
.map(|(midx, qr)| MailView::new(qr, midx.clone()))
|
.map(|(midx, qr)| MailView::new(qr, midx))
|
||||||
.collect::<Result<Vec<_>, _>>()?
|
.collect::<Result<Vec<_>, _>>()?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|mail_view| self.is_keep_on_query(mail_view))
|
.filter(|mail_view| self.is_keep_on_query(mail_view))
|
||||||
|
|
|
@ -11,8 +11,6 @@ use super::unique_ident::UniqueIdent;
|
||||||
/// state that is desynchronized with the real mailbox state.
|
/// state that is desynchronized with the real mailbox state.
|
||||||
/// It's up to the user to choose when their snapshot must be updated
|
/// It's up to the user to choose when their snapshot must be updated
|
||||||
/// to give useful information to their clients
|
/// to give useful information to their clients
|
||||||
///
|
|
||||||
///
|
|
||||||
pub struct FrozenMailbox {
|
pub struct FrozenMailbox {
|
||||||
pub mailbox: Arc<Mailbox>,
|
pub mailbox: Arc<Mailbox>,
|
||||||
pub snapshot: UidIndex,
|
pub snapshot: UidIndex,
|
||||||
|
|
|
@ -25,6 +25,8 @@ if args.tls:
|
||||||
else:
|
else:
|
||||||
imap = IMAP4
|
imap = IMAP4
|
||||||
|
|
||||||
|
|
||||||
|
print(args)
|
||||||
with imap(host=args.host, port=args.port) as M:
|
with imap(host=args.host, port=args.port) as M:
|
||||||
print(M.login(args.user, args.password))
|
print(M.login(args.user, args.password))
|
||||||
print(M.select(args.mailbox))
|
print(M.select(args.mailbox))
|
||||||
|
|
Loading…
Reference in a new issue