implement date parsing

This commit is contained in:
Quentin 2023-06-12 11:54:15 +02:00
parent 4ea0c97cfe
commit 7d3b3ff053
Signed by: quentin
GPG key ID: E9602264D639FF68
5 changed files with 366 additions and 10 deletions

314
Cargo.lock generated
View file

@ -2,13 +2,118 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bumpalo"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"time",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]]
name = "iana-time-zone"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]] [[package]]
name = "imf-codec" name = "imf-codec"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"chrono",
"nom", "nom",
] ]
[[package]]
name = "js-sys"
version = "0.3.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.146"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b"
[[package]]
name = "log"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.5.0" version = "2.5.0"
@ -30,3 +135,212 @@ dependencies = [
"memchr", "memchr",
"minimal-lexical", "minimal-lexical",
] ]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "proc-macro2"
version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
dependencies = [
"proc-macro2",
]
[[package]]
name = "syn"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasm-bindgen"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
[[package]]
name = "windows_i686_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
[[package]]
name = "windows_i686_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"

View file

@ -17,3 +17,4 @@ path = "src/parse.rs"
[dependencies] [dependencies]
nom = "7" nom = "7"
chrono = "0.4"

View file

@ -1,3 +1,4 @@
use chrono::DateTime;
use nom::{ use nom::{
IResult, IResult,
bytes::complete::take_while1, bytes::complete::take_while1,
@ -11,7 +12,7 @@ use nom::{
}; };
use crate::abnf::{fws, vchar_seq, perm_crlf}; use crate::abnf::{fws, vchar_seq, perm_crlf};
use crate::model::HeaderSection; use crate::model::{HeaderSection, HeaderDate};
/// HEADERS /// HEADERS
@ -24,6 +25,11 @@ pub fn header_section(input: &str) -> IResult<&str, HeaderSection> {
HeaderSection::default, HeaderSection::default,
|mut section, head| { |mut section, head| {
match head { match head {
HeaderField::Date(d) => {
//@FIXME only one date is allowed, what are we doing if multiple dates are
//encountered? Currently, we override...
section.date = d;
}
HeaderField::Subject(title) => { HeaderField::Subject(title) => {
section.subject = Some(title); section.subject = Some(title);
} }
@ -42,7 +48,7 @@ pub fn header_section(input: &str) -> IResult<&str, HeaderSection> {
enum HeaderField<'a> { enum HeaderField<'a> {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
Date, Date(HeaderDate),
// 3.6.2. Originator Fields // 3.6.2. Originator Fields
From, From,
@ -97,21 +103,31 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
let (input, _) = tuple((tag(":"), space0))(input)?; let (input, _) = tuple((tag(":"), space0))(input)?;
// Extract field body // Extract field body
match field_name { let (input, hfield) = match field_name {
"Date" => unimplemented!(), "Date" => {
// @FIXME want to extract datetime our way in the future
// to better handle obsolete/bad cases instead of crashing.
let (input, raw_date) = unstructured(input)?;
let date = match DateTime::parse_from_rfc2822(&raw_date) {
Ok(chronodt) => HeaderDate::Parsed(chronodt),
Err(e) => HeaderDate::Unknown(raw_date, e),
};
(input, HeaderField::Date(date))
},
//"From" => unimplemented!(), //"From" => unimplemented!(),
"Sender" => unimplemented!(), "Sender" => unimplemented!(),
"Subject" => { "Subject" => {
let (input, body) = unstructured(input)?; let (input, body) = unstructured(input)?;
let (input, _) = crlf(input)?; (input, HeaderField::Subject(body))
Ok((input, HeaderField::Subject(body)))
}, },
_ => { _ => {
let (input, body) = unstructured(input)?; let (input, body) = unstructured(input)?;
(input, HeaderField::Optional(field_name, body))
}
};
let (input, _) = crlf(input)?; let (input, _) = crlf(input)?;
Ok((input, HeaderField::Optional(field_name, body))) return Ok((input, hfield));
}
}
} }
/// Unstructured header field body /// Unstructured header field body

View file

@ -1,8 +1,33 @@
use std::collections::HashMap; use std::collections::HashMap;
use chrono::{DateTime,FixedOffset,ParseError};
#[derive(Debug, Default)]
pub enum HeaderDate {
Parsed(DateTime<FixedOffset>),
Unknown(String, ParseError),
#[default]
None,
}
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct HeaderSection<'a> { pub struct HeaderSection<'a> {
pub subject: Option<String>, pub subject: Option<String>,
pub from: Vec<String>, pub from: Vec<String>,
pub date: HeaderDate,
pub optional: HashMap<&'a str, String>, pub optional: HashMap<&'a str, String>,
} }
enum InvalidEmailErr {
NoUsableDate,
}
impl<'a> HeaderSection<'a> {
fn is_valid(&self) -> Result<(), InvalidEmailErr> {
match self.date {
HeaderDate::Parsed(_) => (),
_ => return Err(InvalidEmailErr::NoUsableDate),
};
Ok(())
}
}

View file

@ -1,6 +1,6 @@
use imf_codec::headers; use imf_codec::headers;
fn main() { fn main() {
let header = "Subject: Hello\r\n World\r\nFrom: <quentin@deuxfleurs.fr>\r\n\r\nHello world"; let header = "Date: Fri, 21 Nov 1997 09:55:06 -0600\r\nSubject: Hello\r\n World\r\nFrom: <quentin@deuxfleurs.fr>\r\n\r\nHello world";
println!("{:?}", headers::header_section(header)); println!("{:?}", headers::header_section(header));
} }