Implement search #61

Merged
quentin merged 13 commits from feat/search into main 2024-01-08 10:39:26 +00:00
7 changed files with 254 additions and 60 deletions
Showing only changes of commit f58904f5bb - Show all commits

View file

@ -111,15 +111,17 @@ impl<'a> ExaminedContext<'a> {
pub async fn search( pub async fn search(
self, self,
_charset: &Option<Charset<'a>>, charset: &Option<Charset<'a>>,
_criteria: &SearchKey<'a>, criteria: &SearchKey<'a>,
_uid: &bool, uid: &bool,
) -> Result<(Response<'static>, flow::Transition)> { ) -> Result<(Response<'static>, flow::Transition)> {
let found = self.mailbox.search(charset, criteria, *uid).await?;
Ok(( Ok((
Response::build() Response::build()
.to_req(self.req) .to_req(self.req)
.message("Not implemented") .set_body(found)
.bad()?, .message("SEARCH completed")
.ok()?,
flow::Transition::None, flow::Transition::None,
)) ))
} }

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32; use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result}; use anyhow::{anyhow, bail, Result};
use imap_codec::imap_types::sequence::{self, SequenceSet}; use imap_codec::imap_types::sequence::{self, Sequence, SequenceSet, SeqOrUid};
use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::uidindex::{ImapUid, UidIndex};
use crate::mail::unique_ident::UniqueIdent; use crate::mail::unique_ident::UniqueIdent;
@ -87,9 +87,47 @@ impl<'a> Index<'a> {
} }
} }
#[derive(Clone)]
pub struct MailIndex<'a> { pub struct MailIndex<'a> {
pub i: NonZeroU32, pub i: NonZeroU32,
pub uid: ImapUid, pub uid: ImapUid,
pub uuid: UniqueIdent, pub uuid: UniqueIdent,
pub flags: &'a Vec<String>, pub flags: &'a Vec<String>,
} }
impl<'a> MailIndex<'a> {
// The following functions are used to implement the SEARCH command
pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool {
match seq {
Sequence::Single(SeqOrUid::Asterisk) => true,
Sequence::Single(SeqOrUid::Value(target)) => target == &self.i,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 {
x1 <= &self.i && &self.i <= x2
} else {
x1 >= &self.i && &self.i >= x2
},
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
}
}
pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool {
match seq {
Sequence::Single(SeqOrUid::Asterisk) => true,
Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x),SeqOrUid::Asterisk) => x <= &self.uid,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 {
x1 <= &self.uid && &self.uid <= x2
} else {
x1 >= &self.uid && &self.uid >= x2
},
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
}
}
pub fn is_flag_set(&self, flag: &str) -> bool {
self.flags.iter().any(|candidate| candidate.as_str() == flag)
}
}

View file

@ -52,6 +52,44 @@ impl<'a> MailView<'a> {
}) })
} }
pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
let mut seen = SeenFlag::DoNothing;
let res_attrs = ap
.attrs
.iter()
.map(|attr| match attr {
MessageDataItemName::Uid => Ok(self.uid()),
MessageDataItemName::Flags => Ok(self.flags()),
MessageDataItemName::Rfc822Size => self.rfc_822_size(),
MessageDataItemName::Rfc822Header => self.rfc_822_header(),
MessageDataItemName::Rfc822Text => self.rfc_822_text(),
MessageDataItemName::Rfc822 => self.rfc822(),
MessageDataItemName::Envelope => Ok(self.envelope()),
MessageDataItemName::Body => self.body(),
MessageDataItemName::BodyStructure => self.body_structure(),
MessageDataItemName::BodyExt {
section,
partial,
peek,
} => {
let (body, has_seen) = self.body_ext(section, partial, peek)?;
seen = has_seen;
Ok(body)
}
MessageDataItemName::InternalDate => self.internal_date(),
})
.collect::<Result<Vec<_>, _>>()?;
Ok((
Body::Data(Data::Fetch {
seq: self.in_idx.i,
items: res_attrs.try_into()?,
}),
seen,
))
}
// Private function, mainly for filter!
fn uid(&self) -> MessageDataItem<'static> { fn uid(&self) -> MessageDataItem<'static> {
MessageDataItem::Uid(self.in_idx.uid.clone()) MessageDataItem::Uid(self.in_idx.uid.clone())
} }
@ -168,42 +206,6 @@ impl<'a> MailView<'a> {
Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt))) Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt)))
} }
pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
let mut seen = SeenFlag::DoNothing;
let res_attrs = ap
.attrs
.iter()
.map(|attr| match attr {
MessageDataItemName::Uid => Ok(self.uid()),
MessageDataItemName::Flags => Ok(self.flags()),
MessageDataItemName::Rfc822Size => self.rfc_822_size(),
MessageDataItemName::Rfc822Header => self.rfc_822_header(),
MessageDataItemName::Rfc822Text => self.rfc_822_text(),
MessageDataItemName::Rfc822 => self.rfc822(),
MessageDataItemName::Envelope => Ok(self.envelope()),
MessageDataItemName::Body => self.body(),
MessageDataItemName::BodyStructure => self.body_structure(),
MessageDataItemName::BodyExt {
section,
partial,
peek,
} => {
let (body, has_seen) = self.body_ext(section, partial, peek)?;
seen = has_seen;
Ok(body)
}
MessageDataItemName::InternalDate => self.internal_date(),
})
.collect::<Result<Vec<_>, _>>()?;
Ok((
Body::Data(Data::Fetch {
seq: self.in_idx.i,
items: res_attrs.try_into()?,
}),
seen,
))
}
} }
pub enum SeenFlag { pub enum SeenFlag {

View file

@ -319,12 +319,18 @@ impl MailboxView {
let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; let selection = self.index().fetch(&seq_set, seq_type.is_uid())?;
// 3. Filter the selection based on the ID / UID / Flags // 3. Filter the selection based on the ID / UID / Flags
let selection = crit.filter_on_idx(&selection);
// 4. If needed, filter the selection based on the metadata // 4. Fetch additional info about the emails
let _need_meta = crit.need_meta(); let query_scope = crit.query_scope();
let uuids = selection
.iter()
.map(|midx| midx.uuid)
.collect::<Vec<_>>();
let query_result = self.0.query(&uuids, query_scope).fetch().await?;
// 5. If needed, filter the selection based on the body // 5. If needed, filter the selection based on the body
let _need_body = crit.need_body(); let selection = crit.filter_on_query(&selection, &query_result);
// 6. Format the result according to the client's taste: // 6. Format the result according to the client's taste:
// either return UID or ID. // either return UID or ID.

View file

@ -3,6 +3,9 @@ use imap_codec::imap_types::search::SearchKey;
use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet};
use std::num::NonZeroU32; use std::num::NonZeroU32;
use crate::mail::query::{QueryScope, QueryResult};
use crate::imap::index::MailIndex;
pub enum SeqType { pub enum SeqType {
Undefined, Undefined,
NonUid, NonUid,
@ -54,6 +57,10 @@ impl<'a> Criteria<'a> {
tracing::debug!( tracing::debug!(
"using AND in a search request is slow: no intersection is performed" "using AND in a search request is slow: no intersection is performed"
); );
// As we perform no intersection, we don't care if we mix uid or id.
// We only keep the smallest range, being it ID or UID, depending of
// which one has the less items. This is an approximation as UID ranges
// can have holes while ID ones can't.
search_list search_list
.as_ref() .as_ref()
.iter() .iter()
@ -72,31 +79,119 @@ impl<'a> Criteria<'a> {
/// Not really clever as we can have cases where we filter out /// Not really clever as we can have cases where we filter out
/// the email before needing to inspect its meta. /// the email before needing to inspect its meta.
/// But for now we are seeking the most basic/stupid algorithm. /// But for now we are seeking the most basic/stupid algorithm.
pub fn need_meta(&self) -> bool { pub fn query_scope(&self) -> QueryScope {
use SearchKey::*; use SearchKey::*;
match self.0 { match self.0 {
// IMF Headers // IMF Headers
Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_)
| Subject(_) | To(_) => true, | Subject(_) | To(_) => QueryScope::Partial,
// Internal Date is also stored in MailMeta // Internal Date is also stored in MailMeta
Before(_) | On(_) | Since(_) => true, Before(_) | On(_) | Since(_) => QueryScope::Partial,
// Message size is also stored in MailMeta // Message size is also stored in MailMeta
Larger(_) | Smaller(_) => true, Larger(_) | Smaller(_) => QueryScope::Partial,
And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()), // Text and Body require that we fetch the full content!
Not(inner) => Criteria(inner).need_meta(), Text(_) | Body(_) => QueryScope::Full,
Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(), And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| {
_ => false, prev.union(&Criteria(sk).query_scope())
}),
Not(inner) => Criteria(inner).query_scope(),
Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()),
_ => QueryScope::Index,
} }
} }
pub fn need_body(&self) -> bool { pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> Vec<MailIndex<'b>> {
midx_list
.iter()
.filter(|x| self.is_keep_on_idx(x).is_keep())
.map(|x| (*x).clone())
.collect::<Vec<_>>()
}
pub fn filter_on_query(&self, midx_list: &[MailIndex], query_result: &Vec<QueryResult<'_>>) -> Vec<MailIndex> {
unimplemented!();
}
// ----
/// Here we are doing a partial filtering: we do not have access
/// to the headers or to the body, so every time we encounter a rule
/// based on them, we need to keep it.
///
/// @TODO Could be optimized on a per-email basis by also returning the QueryScope
/// when more information is needed!
fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision {
use SearchKey::*; use SearchKey::*;
match self.0 { match self.0 {
Text(_) | Body(_) => true, // Combinator logic
And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()), And(expr_list) => expr_list
Not(inner) => Criteria(inner).need_body(), .as_ref()
Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(), .iter()
_ => false, .fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))),
Or(left, right) => {
let left_decision = Criteria(left).is_keep_on_idx(midx);
let right_decision = Criteria(right).is_keep_on_idx(midx);
left_decision.or(&right_decision)
}
Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(),
All => PartialDecision::Keep,
// Sequence logic
SequenceSet(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| {
let local_decision: PartialDecision = midx.is_in_sequence_i(seq).into();
acc.or(&local_decision)
}),
Uid(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| {
let local_decision: PartialDecision = midx.is_in_sequence_uid(seq).into();
acc.or(&local_decision)
}),
// Flag logic
Answered => midx.is_flag_set("\\Answered").into(),
Deleted => midx.is_flag_set("\\Deleted").into(),
Draft => midx.is_flag_set("\\Draft").into(),
Flagged => midx.is_flag_set("\\Flagged").into(),
Keyword(kw) => midx.is_flag_set(kw.inner()).into(),
New => {
let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into();
let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into();
is_recent.and(&is_seen.not())
},
Old => {
let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into();
is_recent.not()
},
Recent => midx.is_flag_set("\\Recent").into(),
Seen => midx.is_flag_set("\\Seen").into(),
Unanswered => {
let is_answered: PartialDecision = midx.is_flag_set("\\Recent").into();
is_answered.not()
},
Undeleted => {
let is_deleted: PartialDecision = midx.is_flag_set("\\Deleted").into();
is_deleted.not()
},
Undraft => {
let is_draft: PartialDecision = midx.is_flag_set("\\Draft").into();
is_draft.not()
},
Unflagged => {
let is_flagged: PartialDecision = midx.is_flag_set("\\Flagged").into();
is_flagged.not()
},
Unkeyword(kw) => {
let is_keyword_set: PartialDecision = midx.is_flag_set(kw.inner()).into();
is_keyword_set.not()
},
Unseen => {
let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into();
is_seen.not()
},
// All the stuff we can't evaluate yet
Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_)
| Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_)
| Text(_) | Body(_) => PartialDecision::Postpone,
} }
} }
} }
@ -128,3 +223,46 @@ fn approx_sequence_size(seq: &Sequence) -> u64 {
} }
} }
} }
enum PartialDecision {
Keep,
Discard,
Postpone,
}
impl From<bool> for PartialDecision {
fn from(x: bool) -> Self {
match x {
true => PartialDecision::Keep,
_ => PartialDecision::Discard,
}
}
}
impl PartialDecision {
fn not(&self) -> Self {
match self {
Self::Keep => Self::Discard,
Self::Discard => Self::Keep,
Self::Postpone => Self::Postpone,
}
}
fn or(&self, other: &Self) -> Self {
match (self, other) {
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Keep, _) | (_, Self::Keep) => Self::Keep,
(Self::Discard, Self::Discard) => Self::Discard,
}
}
fn and(&self, other: &Self) -> Self {
match (self, other) {
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Discard, _) | (_, Self::Discard) => Self::Discard,
(Self::Keep, Self::Keep) => Self::Keep,
}
}
fn is_keep(&self) -> bool {
!matches!(self, Self::Discard)
}
}

View file

@ -1,5 +1,4 @@
use std::convert::TryFrom; use std::convert::TryFrom;
use std::io::Write;
pub mod incoming; pub mod incoming;
pub mod mailbox; pub mod mailbox;

View file

@ -19,6 +19,15 @@ pub enum QueryScope {
Partial, Partial,
Full, Full,
} }
impl QueryScope {
pub fn union(&self, other: &QueryScope) -> QueryScope {
match (self, other) {
(QueryScope::Full, _) | (_, QueryScope::Full) => QueryScope::Full,
(QueryScope::Partial, _) | (_, QueryScope::Partial) => QueryScope::Partial,
(QueryScope::Index, QueryScope::Index) => QueryScope::Index,
}
}
}
impl<'a, 'b> Query<'a, 'b> { impl<'a, 'b> Query<'a, 'b> {
pub async fn fetch(&self) -> Result<Vec<QueryResult<'a>>> { pub async fn fetch(&self) -> Result<Vec<QueryResult<'a>>> {