Implement search #61

Merged
quentin merged 13 commits from feat/search into main 2024-01-08 10:39:26 +00:00
5 changed files with 175 additions and 105 deletions
Showing only changes of commit 1d84b0ffd0 - Show all commits

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32; use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result}; use anyhow::{anyhow, bail, Result};
use imap_codec::imap_types::sequence::{self, Sequence, SequenceSet, SeqOrUid}; use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::uidindex::{ImapUid, UidIndex};
use crate::mail::unique_ident::UniqueIdent; use crate::mail::unique_ident::UniqueIdent;
@ -103,11 +103,13 @@ impl<'a> MailIndex<'a> {
Sequence::Single(SeqOrUid::Value(target)) => target == &self.i, Sequence::Single(SeqOrUid::Value(target)) => target == &self.i,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
if x1 < x2 {
x1 <= &self.i && &self.i <= x2 x1 <= &self.i && &self.i <= x2
} else { } else {
x1 >= &self.i && &self.i >= x2 x1 >= &self.i && &self.i >= x2
}, }
}
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
} }
} }
@ -118,16 +120,20 @@ impl<'a> MailIndex<'a> {
Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid, Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid,
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
| Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid, | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid,
Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
if x1 < x2 {
x1 <= &self.uid && &self.uid <= x2 x1 <= &self.uid && &self.uid <= x2
} else { } else {
x1 >= &self.uid && &self.uid >= x2 x1 >= &self.uid && &self.uid >= x2
}, }
}
Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
} }
} }
pub fn is_flag_set(&self, flag: &str) -> bool { pub fn is_flag_set(&self, flag: &str) -> bool {
self.flags.iter().any(|candidate| candidate.as_str() == flag) self.flags
.iter()
.any(|candidate| candidate.as_str() == flag)
} }
} }

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32; use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result}; use anyhow::{anyhow, bail, Result};
use chrono::{Offset, TimeZone, Utc, DateTime as ChronoDateTime, Local, naive::NaiveDate}; use chrono::{naive::NaiveDate, DateTime as ChronoDateTime, Local, Offset, TimeZone, Utc};
use imap_codec::imap_types::core::NString; use imap_codec::imap_types::core::NString;
use imap_codec::imap_types::datetime::DateTime; use imap_codec::imap_types::datetime::DateTime;
@ -167,7 +167,11 @@ impl<'a> MailView<'a> {
} }
fn envelope(&self) -> MessageDataItem<'static> { fn envelope(&self) -> MessageDataItem<'static> {
MessageDataItem::Envelope(self.imf().expect("an imf object is derivable from fetchedmail").message_envelope()) MessageDataItem::Envelope(
self.imf()
.expect("an imf object is derivable from fetchedmail")
.message_envelope(),
)
} }
fn body(&self) -> Result<MessageDataItem<'static>> { fn body(&self) -> Result<MessageDataItem<'static>> {
@ -237,7 +241,6 @@ impl<'a> MailView<'a> {
.ok_or(anyhow!("Unable to parse internal date"))?; .ok_or(anyhow!("Unable to parse internal date"))?;
Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt))) Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt)))
} }
} }
pub enum SeenFlag { pub enum SeenFlag {

View file

@ -323,10 +323,7 @@ impl MailboxView {
// 4. Fetch additional info about the emails // 4. Fetch additional info about the emails
let query_scope = crit.query_scope(); let query_scope = crit.query_scope();
let uuids = to_fetch let uuids = to_fetch.iter().map(|midx| midx.uuid).collect::<Vec<_>>();
.iter()
.map(|midx| midx.uuid)
.collect::<Vec<_>>();
let query_result = self.0.query(&uuids, query_scope).fetch().await?; let query_result = self.0.query(&uuids, query_scope).fetch().await?;
// 5. If needed, filter the selection based on the body // 5. If needed, filter the selection based on the body

View file

@ -5,9 +5,9 @@ use imap_codec::imap_types::core::NonEmptyVec;
use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::search::SearchKey;
use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet};
use crate::mail::query::{QueryScope, QueryResult};
use crate::imap::index::MailIndex; use crate::imap::index::MailIndex;
use crate::imap::mail_view::MailView; use crate::imap::mail_view::MailView;
use crate::mail::query::{QueryResult, QueryScope};
pub enum SeqType { pub enum SeqType {
Undefined, Undefined,
@ -20,7 +20,6 @@ impl SeqType {
} }
} }
pub struct Criteria<'a>(pub &'a SearchKey<'a>); pub struct Criteria<'a>(pub &'a SearchKey<'a>);
impl<'a> Criteria<'a> { impl<'a> Criteria<'a> {
/// Returns a set of email identifiers that is greater or equal /// Returns a set of email identifiers that is greater or equal
@ -87,11 +86,16 @@ impl<'a> Criteria<'a> {
use SearchKey::*; use SearchKey::*;
match self.0 { match self.0 {
// Combinators // Combinators
And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { And(and_list) => and_list
.as_ref()
.iter()
.fold(QueryScope::Index, |prev, sk| {
prev.union(&Criteria(sk).query_scope()) prev.union(&Criteria(sk).query_scope())
}), }),
Not(inner) => Criteria(inner).query_scope(), Not(inner) => Criteria(inner).query_scope(),
Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), Or(left, right) => Criteria(left)
.query_scope()
.union(&Criteria(right).query_scope()),
All => QueryScope::Index, All => QueryScope::Index,
// IMF Headers // IMF Headers
@ -111,7 +115,10 @@ impl<'a> Criteria<'a> {
/// Returns emails that we now for sure we want to keep /// Returns emails that we now for sure we want to keep
/// but also a second list of emails we need to investigate further by /// but also a second list of emails we need to investigate further by
/// fetching some remote data /// fetching some remote data
pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> (Vec<MailIndex<'b>>, Vec<MailIndex<'b>>) { pub fn filter_on_idx<'b>(
&self,
midx_list: &[MailIndex<'b>],
) -> (Vec<MailIndex<'b>>, Vec<MailIndex<'b>>) {
let (p1, p2): (Vec<_>, Vec<_>) = midx_list let (p1, p2): (Vec<_>, Vec<_>) = midx_list
.iter() .iter()
.map(|x| (x, self.is_keep_on_idx(x))) .map(|x| (x, self.is_keep_on_idx(x)))
@ -124,7 +131,11 @@ impl<'a> Criteria<'a> {
(to_keep, to_fetch) (to_keep, to_fetch)
} }
pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec<QueryResult<'b>>) -> Result<Vec<MailIndex<'b>>> { pub fn filter_on_query<'b>(
&self,
midx_list: &[MailIndex<'b>],
query_result: &'b Vec<QueryResult<'b>>,
) -> Result<Vec<MailIndex<'b>>> {
Ok(midx_list Ok(midx_list
.iter() .iter()
.zip(query_result.iter()) .zip(query_result.iter())
@ -151,7 +162,9 @@ impl<'a> Criteria<'a> {
And(expr_list) => expr_list And(expr_list) => expr_list
.as_ref() .as_ref()
.iter() .iter()
.fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))), .fold(PartialDecision::Keep, |acc, cur| {
acc.and(&Criteria(cur).is_keep_on_idx(midx))
}),
Or(left, right) => { Or(left, right) => {
let left_decision = Criteria(left).is_keep_on_idx(midx); let left_decision = Criteria(left).is_keep_on_idx(midx);
let right_decision = Criteria(right).is_keep_on_idx(midx); let right_decision = Criteria(right).is_keep_on_idx(midx);
@ -172,10 +185,9 @@ impl<'a> Criteria<'a> {
unknown => { unknown => {
tracing::error!("Unknown filter {:?}", unknown); tracing::error!("Unknown filter {:?}", unknown);
PartialDecision::Discard PartialDecision::Discard
},
} }
} }
}
/// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass, /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass,
/// to simplify the searck query, by removing the elements that were already checked. /// to simplify the searck query, by removing the elements that were already checked.
@ -192,7 +204,8 @@ impl<'a> Criteria<'a> {
.iter() .iter()
.all(|cur| Criteria(cur).is_keep_on_query(mail_view)), .all(|cur| Criteria(cur).is_keep_on_query(mail_view)),
Or(left, right) => { Or(left, right) => {
Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) Criteria(left).is_keep_on_query(mail_view)
|| Criteria(right).is_keep_on_query(mail_view)
} }
Not(expr) => !Criteria(expr).is_keep_on_query(mail_view), Not(expr) => !Criteria(expr).is_keep_on_query(mail_view),
All => true, All => true,
@ -216,8 +229,22 @@ impl<'a> Criteria<'a> {
}, },
// Message size is also stored in MailMeta // Message size is also stored in MailMeta
Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize, Larger(size_ref) => {
Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, mail_view
.query_result
.metadata()
.expect("metadata were fetched")
.rfc822_size
> *size_ref as usize
}
Smaller(size_ref) => {
mail_view
.query_result
.metadata()
.expect("metadata were fetched")
.rfc822_size
< *size_ref as usize
}
// Filter on well-known headers // Filter on well-known headers
Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()),
@ -228,19 +255,49 @@ impl<'a> Criteria<'a> {
Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()),
// Filter on Date header // Filter on Date header
SentBefore(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive < search_naive.as_ref()).unwrap_or(false), SentBefore(search_naive) => mail_view
SentOn(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive == search_naive.as_ref()).unwrap_or(false), .imf()
SentSince(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive > search_naive.as_ref()).unwrap_or(false), .map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive < search_naive.as_ref())
.unwrap_or(false),
SentOn(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive == search_naive.as_ref())
.unwrap_or(false),
SentSince(search_naive) => mail_view
.imf()
.map(|imf| imf.naive_date().ok())
.flatten()
.map(|msg_naive| &msg_naive > search_naive.as_ref())
.unwrap_or(false),
// Filter on the full content of the email // Filter on the full content of the email
Text(txt) => mail_view.content.as_msg().map(|msg| msg.raw_part.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), Text(txt) => mail_view
Body(txt) => mail_view.content.as_msg().map(|msg| msg.raw_body.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), .content
.as_msg()
.map(|msg| {
msg.raw_part
.windows(txt.as_ref().len())
.any(|win| win == txt.as_ref())
})
.unwrap_or(false),
Body(txt) => mail_view
.content
.as_msg()
.map(|msg| {
msg.raw_body
.windows(txt.as_ref().len())
.any(|win| win == txt.as_ref())
})
.unwrap_or(false),
unknown => { unknown => {
tracing::error!("Unknown filter {:?}", unknown); tracing::error!("Unknown filter {:?}", unknown);
false false
}, }
} }
} }
} }
@ -323,9 +380,8 @@ impl PartialDecision {
fn is_sk_flag(sk: &SearchKey) -> bool { fn is_sk_flag(sk: &SearchKey) -> bool {
use SearchKey::*; use SearchKey::*;
match sk { match sk {
Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen
| Recent | Seen | Unanswered | Undeleted | Undraft | Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true,
| Unflagged | Unkeyword(..) | Unseen => true,
_ => false, _ => false,
} }
} }
@ -342,37 +398,37 @@ fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool {
let is_recent = midx.is_flag_set("\\Recent"); let is_recent = midx.is_flag_set("\\Recent");
let is_seen = midx.is_flag_set("\\Seen"); let is_seen = midx.is_flag_set("\\Seen");
is_recent && !is_seen is_recent && !is_seen
}, }
Old => { Old => {
let is_recent = midx.is_flag_set("\\Recent"); let is_recent = midx.is_flag_set("\\Recent");
!is_recent !is_recent
}, }
Recent => midx.is_flag_set("\\Recent"), Recent => midx.is_flag_set("\\Recent"),
Seen => midx.is_flag_set("\\Seen"), Seen => midx.is_flag_set("\\Seen"),
Unanswered => { Unanswered => {
let is_answered = midx.is_flag_set("\\Recent"); let is_answered = midx.is_flag_set("\\Recent");
!is_answered !is_answered
}, }
Undeleted => { Undeleted => {
let is_deleted = midx.is_flag_set("\\Deleted"); let is_deleted = midx.is_flag_set("\\Deleted");
!is_deleted !is_deleted
}, }
Undraft => { Undraft => {
let is_draft = midx.is_flag_set("\\Draft"); let is_draft = midx.is_flag_set("\\Draft");
!is_draft !is_draft
}, }
Unflagged => { Unflagged => {
let is_flagged = midx.is_flag_set("\\Flagged"); let is_flagged = midx.is_flag_set("\\Flagged");
!is_flagged !is_flagged
}, }
Unkeyword(kw) => { Unkeyword(kw) => {
let is_keyword_set = midx.is_flag_set(kw.inner()); let is_keyword_set = midx.is_flag_set(kw.inner());
!is_keyword_set !is_keyword_set
}, }
Unseen => { Unseen => {
let is_seen = midx.is_flag_set("\\Seen"); let is_seen = midx.is_flag_set("\\Seen");
!is_seen !is_seen
}, }
// Not flag logic // Not flag logic
_ => unreachable!(), _ => unreachable!(),
@ -389,8 +445,16 @@ fn is_sk_seq(sk: &SearchKey) -> bool {
fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool { fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool {
use SearchKey::*; use SearchKey::*;
match sk { match sk {
SequenceSet(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_i(seq)), SequenceSet(seq_set) => seq_set
Uid(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_uid(seq)), .0
.as_ref()
.iter()
.any(|seq| midx.is_in_sequence_i(seq)),
Uid(seq_set) => seq_set
.0
.as_ref()
.iter()
.any(|seq| midx.is_in_sequence_uid(seq)),
_ => unreachable!(), _ => unreachable!(),
} }
} }