Implement search #61

Merged
quentin merged 13 commits from feat/search into main 2024-01-08 10:39:26 +00:00
5 changed files with 175 additions and 105 deletions
Showing only changes of commit 1d84b0ffd0 - Show all commits

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result};
use imap_codec::imap_types::sequence::{self, Sequence, SequenceSet, SeqOrUid};
use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
use crate::mail::uidindex::{ImapUid, UidIndex};
use crate::mail::unique_ident::UniqueIdent;
@ -103,11 +103,13 @@ impl<'a> MailIndex<'a> {
Sequence::Single(SeqOrUid::Value(target)) => target == &self.i,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 {
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
if x1 < x2 {
x1 <= &self.i && &self.i <= x2
} else {
x1 >= &self.i && &self.i >= x2
},
}
}
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
}
}
@ -117,17 +119,21 @@ impl<'a> MailIndex<'a> {
Sequence::Single(SeqOrUid::Asterisk) => true,
Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x),SeqOrUid::Asterisk) => x <= &self.uid,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 {
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
if x1 < x2 {
x1 <= &self.uid && &self.uid <= x2
} else {
x1 >= &self.uid && &self.uid >= x2
},
}
}
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
}
}
pub fn is_flag_set(&self, flag: &str) -> bool {
self.flags.iter().any(|candidate| candidate.as_str() == flag)
self.flags
.iter()
.any(|candidate| candidate.as_str() == flag)
}
}

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result};
use chrono::{Offset, TimeZone, Utc, DateTime as ChronoDateTime, Local, naive::NaiveDate};
use chrono::{naive::NaiveDate, DateTime as ChronoDateTime, Local, Offset, TimeZone, Utc};
use imap_codec::imap_types::core::NString;
use imap_codec::imap_types::datetime::DateTime;
@ -167,7 +167,11 @@ impl<'a> MailView<'a> {
}
fn envelope(&self) -> MessageDataItem<'static> {
MessageDataItem::Envelope(self.imf().expect("an imf object is derivable from fetchedmail").message_envelope())
MessageDataItem::Envelope(
self.imf()
.expect("an imf object is derivable from fetchedmail")
.message_envelope(),
)
}
fn body(&self) -> Result<MessageDataItem<'static>> {
@ -237,7 +241,6 @@ impl<'a> MailView<'a> {
.ok_or(anyhow!("Unable to parse internal date"))?;
Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt)))
}
}
pub enum SeenFlag {

View file

@ -323,10 +323,7 @@ impl MailboxView {
// 4. Fetch additional info about the emails
let query_scope = crit.query_scope();
let uuids = to_fetch
.iter()
.map(|midx| midx.uuid)
.collect::<Vec<_>>();
let uuids = to_fetch.iter().map(|midx| midx.uuid).collect::<Vec<_>>();
let query_result = self.0.query(&uuids, query_scope).fetch().await?;
// 5. If needed, filter the selection based on the body

View file

@ -5,9 +5,9 @@ use imap_codec::imap_types::core::NonEmptyVec;
use imap_codec::imap_types::search::SearchKey;
use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet};
use crate::mail::query::{QueryScope, QueryResult};
use crate::imap::index::MailIndex;
use crate::imap::mail_view::MailView;
use crate::mail::query::{QueryResult, QueryScope};
pub enum SeqType {
Undefined,
@ -20,7 +20,6 @@ impl SeqType {
}
}
pub struct Criteria<'a>(pub &'a SearchKey<'a>);
impl<'a> Criteria<'a> {
/// Returns a set of email identifiers that is greater or equal
@ -87,11 +86,16 @@ impl<'a> Criteria<'a> {
use SearchKey::*;
match self.0 {
// Combinators
And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| {
And(and_list) => and_list
.as_ref()
.iter()
.fold(QueryScope::Index, |prev, sk| {
prev.union(&Criteria(sk).query_scope())
}),
Not(inner) => Criteria(inner).query_scope(),
Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()),
Or(left, right) => Criteria(left)
.query_scope()
.union(&Criteria(right).query_scope()),
All => QueryScope::Index,
// IMF Headers
@ -111,7 +115,10 @@ impl<'a> Criteria<'a> {
/// Returns emails that we now for sure we want to keep
/// but also a second list of emails we need to investigate further by
/// fetching some remote data
pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> (Vec<MailIndex<'b>>, Vec<MailIndex<'b>>) {
pub fn filter_on_idx<'b>(
&self,
midx_list: &[MailIndex<'b>],
) -> (Vec<MailIndex<'b>>, Vec<MailIndex<'b>>) {
let (p1, p2): (Vec<_>, Vec<_>) = midx_list
.iter()
.map(|x| (x, self.is_keep_on_idx(x)))
@ -124,7 +131,11 @@ impl<'a> Criteria<'a> {
(to_keep, to_fetch)
}
pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec<QueryResult<'b>>) -> Result<Vec<MailIndex<'b>>> {
pub fn filter_on_query<'b>(
&self,
midx_list: &[MailIndex<'b>],
query_result: &'b Vec<QueryResult<'b>>,
) -> Result<Vec<MailIndex<'b>>> {
Ok(midx_list
.iter()
.zip(query_result.iter())
@ -151,7 +162,9 @@ impl<'a> Criteria<'a> {
And(expr_list) => expr_list
.as_ref()
.iter()
.fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))),
.fold(PartialDecision::Keep, |acc, cur| {
acc.and(&Criteria(cur).is_keep_on_idx(midx))
}),
Or(left, right) => {
let left_decision = Criteria(left).is_keep_on_idx(midx);
let right_decision = Criteria(right).is_keep_on_idx(midx);
@ -172,10 +185,9 @@ impl<'a> Criteria<'a> {
unknown => {
tracing::error!("Unknown filter {:?}", unknown);
PartialDecision::Discard
},
}
}
}
/// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass,
/// to simplify the searck query, by removing the elements that were already checked.
@ -192,7 +204,8 @@ impl<'a> Criteria<'a> {
.iter()
.all(|cur| Criteria(cur).is_keep_on_query(mail_view)),
Or(left, right) => {
Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view)
Criteria(left).is_keep_on_query(mail_view)
|| Criteria(right).is_keep_on_query(mail_view)
}
Not(expr) => !Criteria(expr).is_keep_on_query(mail_view),
All => true,
@ -216,31 +229,75 @@ impl<'a> Criteria<'a> {
},
// Message size is also stored in MailMeta
Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize,
Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize,
Larger(size_ref) => {
mail_view
.query_result
.metadata()
.expect("metadata were fetched")
.rfc822_size
> *size_ref as usize
}
Smaller(size_ref) => {
mail_view
.query_result
.metadata()
.expect("metadata were fetched")
.rfc822_size
< *size_ref as usize
}
// Filter on well-known headers
Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()),
Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()),
From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()),
Subject(txt)=> mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()),
Subject(txt) => mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()),
To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()),
Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()),
// Filter on Date header
SentBefore(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive < search_naive.as_ref()).unwrap_or(false),
SentOn(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive == search_naive.as_ref()).unwrap_or(false),
SentSince(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive > search_naive.as_ref()).unwrap_or(false),
SentBefore(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive < search_naive.as_ref())
.unwrap_or(false),
SentOn(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive == search_naive.as_ref())
.unwrap_or(false),
SentSince(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive > search_naive.as_ref())
.unwrap_or(false),
// Filter on the full content of the email
Text(txt) => mail_view.content.as_msg().map(|msg| msg.raw_part.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false),
Body(txt) => mail_view.content.as_msg().map(|msg| msg.raw_body.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false),
Text(txt) => mail_view
.content
.as_msg()
.map(|msg| {
msg.raw_part
.windows(txt.as_ref().len())
.any(|win| win == txt.as_ref())
})
.unwrap_or(false),
Body(txt) => mail_view
.content
.as_msg()
.map(|msg| {
msg.raw_body
.windows(txt.as_ref().len())
.any(|win| win == txt.as_ref())
})
.unwrap_or(false),
unknown => {
tracing::error!("Unknown filter {:?}", unknown);
false
},
}
}
}
}
@ -323,9 +380,8 @@ impl PartialDecision {
fn is_sk_flag(sk: &SearchKey) -> bool {
use SearchKey::*;
match sk {
Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old
| Recent | Seen | Unanswered | Undeleted | Undraft
| Unflagged | Unkeyword(..) | Unseen => true,
Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen
| Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true,
_ => false,
}
}
@ -342,37 +398,37 @@ fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool {
let is_recent = midx.is_flag_set("\\Recent");
let is_seen = midx.is_flag_set("\\Seen");
is_recent && !is_seen
},
}
Old => {
let is_recent = midx.is_flag_set("\\Recent");
!is_recent
},
}
Recent => midx.is_flag_set("\\Recent"),
Seen => midx.is_flag_set("\\Seen"),
Unanswered => {
let is_answered = midx.is_flag_set("\\Recent");
!is_answered
},
}
Undeleted => {
let is_deleted = midx.is_flag_set("\\Deleted");
!is_deleted
},
}
Undraft => {
let is_draft = midx.is_flag_set("\\Draft");
!is_draft
},
}
Unflagged => {
let is_flagged = midx.is_flag_set("\\Flagged");
!is_flagged
},
}
Unkeyword(kw) => {
let is_keyword_set = midx.is_flag_set(kw.inner());
!is_keyword_set
},
}
Unseen => {
let is_seen = midx.is_flag_set("\\Seen");
!is_seen
},
}
// Not flag logic
_ => unreachable!(),
@ -389,8 +445,16 @@ fn is_sk_seq(sk: &SearchKey) -> bool {
fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool {
use SearchKey::*;
match sk {
SequenceSet(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_i(seq)),
Uid(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_uid(seq)),
SequenceSet(seq_set) => seq_set
.0
.as_ref()
.iter()
.any(|seq| midx.is_in_sequence_i(seq)),
Uid(seq_set) => seq_set
.0
.as_ref()
.iter()
.any(|seq| midx.is_in_sequence_uid(seq)),
_ => unreachable!(),
}
}