Implement search #61

Merged
quentin merged 13 commits from feat/search into main 2024-01-08 10:39:26 +00:00
5 changed files with 163 additions and 82 deletions
Showing only changes of commit ea1772df42 - Show all commits

View file

@ -3,28 +3,32 @@ use imap_codec::imap_types::envelope::{Address, Envelope};
use eml_codec::imf; use eml_codec::imf;
/// Envelope rules are defined in RFC 3501, section 7.4.2 pub struct ImfView<'a>(pub &'a imf::Imf<'a>);
/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
///
/// Some important notes:
///
/// If the Sender or Reply-To lines are absent in the [RFC-2822]
/// header, or are present but empty, the server sets the
/// corresponding member of the envelope to be the same value as
/// the from member (the client is not expected to know to do
/// this). Note: [RFC-2822] requires that all messages have a valid
/// From header. Therefore, the from, sender, and reply-to
/// members in the envelope can not be NIL.
///
/// If the Date, Subject, In-Reply-To, and Message-ID header lines
/// are absent in the [RFC-2822] header, the corresponding member
/// of the envelope is NIL; if these header lines are present but
/// empty the corresponding member of the envelope is the empty
/// string.
//@FIXME return an error if the envelope is invalid instead of panicking impl<'a> ImfView<'a> {
//@FIXME some fields must be defaulted if there are not set. /// Envelope rules are defined in RFC 3501, section 7.4.2
pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> { /// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
///
/// Some important notes:
///
/// If the Sender or Reply-To lines are absent in the [RFC-2822]
/// header, or are present but empty, the server sets the
/// corresponding member of the envelope to be the same value as
/// the from member (the client is not expected to know to do
/// this). Note: [RFC-2822] requires that all messages have a valid
/// From header. Therefore, the from, sender, and reply-to
/// members in the envelope can not be NIL.
///
/// If the Date, Subject, In-Reply-To, and Message-ID header lines
/// are absent in the [RFC-2822] header, the corresponding member
/// of the envelope is NIL; if these header lines are present but
/// empty the corresponding member of the envelope is the empty
/// string.
//@FIXME return an error if the envelope is invalid instead of panicking
//@FIXME some fields must be defaulted if there are not set.
pub fn message_envelope(&self) -> Envelope<'static> {
let msg = self.0;
let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>(); let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>();
Envelope { Envelope {
@ -64,6 +68,7 @@ pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> {
.map(|d| IString::try_from(d.to_string()).unwrap()), .map(|d| IString::try_from(d.to_string()).unwrap()),
), ),
} }
}
} }
pub fn convert_addresses(addrlist: &Vec<imf::address::AddressRef>) -> Vec<Address<'static>> { pub fn convert_addresses(addrlist: &Vec<imf::address::AddressRef>) -> Vec<Address<'static>> {

View file

@ -1,7 +1,7 @@
use std::num::NonZeroU32; use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result}; use anyhow::{anyhow, bail, Result};
use chrono::{Offset, TimeZone, Utc}; use chrono::{Offset, TimeZone, Utc, DateTime as ChronoDateTime, Local, naive::NaiveDate};
use imap_codec::imap_types::core::NString; use imap_codec::imap_types::core::NString;
use imap_codec::imap_types::datetime::DateTime; use imap_codec::imap_types::datetime::DateTime;
@ -20,7 +20,7 @@ use crate::mail::query::QueryResult;
use crate::imap::attributes::AttributesProxy; use crate::imap::attributes::AttributesProxy;
use crate::imap::flags; use crate::imap::flags;
use crate::imap::imf_view::message_envelope; use crate::imap::imf_view::ImfView;
use crate::imap::index::MailIndex; use crate::imap::index::MailIndex;
use crate::imap::mime_view; use crate::imap::mime_view;
use crate::imap::response::Body; use crate::imap::response::Body;
@ -52,6 +52,10 @@ impl<'a> MailView<'a> {
}) })
} }
pub fn imf(&self) -> Option<ImfView> {
self.content.imf().map(ImfView)
}
pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
let mut seen = SeenFlag::DoNothing; let mut seen = SeenFlag::DoNothing;
let res_attrs = ap let res_attrs = ap
@ -89,6 +93,16 @@ impl<'a> MailView<'a> {
)) ))
} }
pub fn stored_naive_date(&self) -> Result<NaiveDate> {
let mail_meta = self.query_result.metadata().expect("metadata were fetched");
let mail_ts: i64 = mail_meta.internaldate.try_into()?;
let msg_date: ChronoDateTime<Local> = ChronoDateTime::from_timestamp(mail_ts, 0)
.ok_or(anyhow!("unable to parse timestamp"))?
.with_timezone(&Local);
Ok(msg_date.date_naive())
}
// Private function, mainly for filter! // Private function, mainly for filter!
fn uid(&self) -> MessageDataItem<'static> { fn uid(&self) -> MessageDataItem<'static> {
MessageDataItem::Uid(self.in_idx.uid.clone()) MessageDataItem::Uid(self.in_idx.uid.clone())
@ -135,7 +149,7 @@ impl<'a> MailView<'a> {
} }
fn envelope(&self) -> MessageDataItem<'static> { fn envelope(&self) -> MessageDataItem<'static> {
MessageDataItem::Envelope(message_envelope(self.content.imf().clone())) MessageDataItem::Envelope(self.imf().expect("an imf object is derivable from fetchedmail").message_envelope())
} }
fn body(&self) -> Result<MessageDataItem<'static>> { fn body(&self) -> Result<MessageDataItem<'static>> {
@ -239,11 +253,11 @@ impl<'a> FetchedMail<'a> {
} }
} }
fn imf(&self) -> &imf::Imf<'a> { fn imf(&self) -> Option<&imf::Imf<'a>> {
match self { match self {
FetchedMail::Full(AnyPart::Msg(x)) => &x.imf, FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf),
FetchedMail::Partial(x) => &x, FetchedMail::Partial(x) => Some(&x),
_ => panic!("Can't contain AnyPart that is not a message"), _ => None,
} }
} }
} }

View file

@ -330,7 +330,7 @@ impl MailboxView {
let query_result = self.0.query(&uuids, query_scope).fetch().await?; let query_result = self.0.query(&uuids, query_scope).fetch().await?;
// 5. If needed, filter the selection based on the body // 5. If needed, filter the selection based on the body
let kept_query = crit.filter_on_query(&to_fetch, &query_result); let kept_query = crit.filter_on_query(&to_fetch, &query_result)?;
// 6. Format the result according to the client's taste: // 6. Format the result according to the client's taste:
// either return UID or ID. // either return UID or ID.

View file

@ -12,7 +12,7 @@ use eml_codec::{
header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart, header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart,
}; };
use crate::imap::imf_view::message_envelope; use crate::imap::imf_view::ImfView;
pub enum BodySection<'a> { pub enum BodySection<'a> {
Full(Cow<'a, [u8]>), Full(Cow<'a, [u8]>),
@ -347,7 +347,7 @@ impl<'a> NodeMsg<'a> {
body: FetchBody { body: FetchBody {
basic, basic,
specific: SpecificFields::Message { specific: SpecificFields::Message {
envelope: Box::new(message_envelope(&self.1.imf)), envelope: Box::new(ImfView(&self.1.imf).message_envelope()),
body_structure: Box::new(NodeMime(&self.1.child).structure()?), body_structure: Box::new(NodeMime(&self.1.child).structure()?),
number_of_lines: nol(self.1.raw_part), number_of_lines: nol(self.1.raw_part),
}, },

View file

@ -1,10 +1,13 @@
use std::num::NonZeroU32;
use anyhow::Result;
use imap_codec::imap_types::core::NonEmptyVec; use imap_codec::imap_types::core::NonEmptyVec;
use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::search::SearchKey;
use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet};
use std::num::NonZeroU32;
use crate::mail::query::{QueryScope, QueryResult}; use crate::mail::query::{QueryScope, QueryResult};
use crate::imap::index::MailIndex; use crate::imap::index::MailIndex;
use crate::imap::mail_view::MailView;
pub enum SeqType { pub enum SeqType {
Undefined, Undefined,
@ -121,13 +124,16 @@ impl<'a> Criteria<'a> {
(to_keep, to_fetch) (to_keep, to_fetch)
} }
pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &Vec<QueryResult<'_>>) -> Vec<MailIndex<'b>> { pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec<QueryResult<'b>>) -> Result<Vec<MailIndex<'b>>> {
midx_list Ok(midx_list
.iter() .iter()
.zip(query_result.iter()) .zip(query_result.iter())
.filter(|(midx, qr)| self.is_keep_on_query(midx, qr)) .map(|(midx, qr)| MailView::new(qr, midx.clone()))
.map(|(midx, _qr)| midx.clone()) .collect::<Result<Vec<_>, _>>()?
.collect() .into_iter()
.filter(|mail_view| self.is_keep_on_query(mail_view))
.map(|mail_view| mail_view.in_idx)
.collect())
} }
// ---- // ----
@ -163,24 +169,80 @@ impl<'a> Criteria<'a> {
| Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_)
| Text(_) | Body(_) => PartialDecision::Postpone, | Text(_) | Body(_) => PartialDecision::Postpone,
_ => unreachable!(), unknown => {
tracing::error!("Unknown filter {:?}", unknown);
PartialDecision::Discard
},
} }
} }
fn is_keep_on_query(&self, midx: &MailIndex, qr: &QueryResult) -> bool {
/// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass,
/// to simplify the searck query, by removing the elements that were already checked.
/// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure
/// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true,
/// we could simplify the request to just body(x) and truncate the first OR. Today, we are
/// not doing that, and thus we reevaluate everything.
fn is_keep_on_query(&self, mail_view: &MailView) -> bool {
use SearchKey::*; use SearchKey::*;
match self.0 { match self.0 {
// Combinator logic // Combinator logic
And(expr_list) => expr_list And(expr_list) => expr_list
.as_ref() .as_ref()
.iter() .iter()
.any(|cur| Criteria(cur).is_keep_on_query(midx, qr)), .any(|cur| Criteria(cur).is_keep_on_query(mail_view)),
Or(left, right) => { Or(left, right) => {
Criteria(left).is_keep_on_query(midx, qr) || Criteria(right).is_keep_on_query(midx, qr) Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view)
} }
Not(expr) => !Criteria(expr).is_keep_on_query(midx, qr), Not(expr) => !Criteria(expr).is_keep_on_query(mail_view),
All => true, All => true,
_ => unimplemented!(),
// Reevaluating our previous logic...
maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, &mail_view.in_idx),
maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, &mail_view.in_idx),
// Filter on mail meta
Before(search_naive) => match mail_view.stored_naive_date() {
Ok(msg_naive) => &msg_naive < search_naive.as_ref(),
_ => false,
},
On(search_naive) => match mail_view.stored_naive_date() {
Ok(msg_naive) => &msg_naive == search_naive.as_ref(),
_ => false,
},
Since(search_naive) => match mail_view.stored_naive_date() {
Ok(msg_naive) => &msg_naive > search_naive.as_ref(),
_ => false,
},
// Message size is also stored in MailMeta
Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize,
Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize,
// Filter on well-known headers
Bcc(_) => unimplemented!(),
Cc(_) => unimplemented!(),
From(_) => unimplemented!(),
Subject(_)=> unimplemented!(),
To(_) => unimplemented!(),
// Filter on arbitrary header
Header(..) => unimplemented!(),
// Filter on Date header
SentBefore(_) => unimplemented!(),
SentOn(_) => unimplemented!(),
SentSince(_) => unimplemented!(),
// Filter on the full content of the email
Text(_) => unimplemented!(),
Body(_) => unimplemented!(),
unknown => {
tracing::error!("Unknown filter {:?}", unknown);
false
},
} }
} }
} }
@ -240,16 +302,16 @@ impl PartialDecision {
fn or(&self, other: &Self) -> Self { fn or(&self, other: &Self) -> Self {
match (self, other) { match (self, other) {
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Keep, _) | (_, Self::Keep) => Self::Keep, (Self::Keep, _) | (_, Self::Keep) => Self::Keep,
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Discard, Self::Discard) => Self::Discard, (Self::Discard, Self::Discard) => Self::Discard,
} }
} }
fn and(&self, other: &Self) -> Self { fn and(&self, other: &Self) -> Self {
match (self, other) { match (self, other) {
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Discard, _) | (_, Self::Discard) => Self::Discard, (Self::Discard, _) | (_, Self::Discard) => Self::Discard,
(Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
(Self::Keep, Self::Keep) => Self::Keep, (Self::Keep, Self::Keep) => Self::Keep,
} }
} }