second impl of datetime, with parser

This commit is contained in:
Quentin 2023-06-18 17:27:01 +02:00
parent a6dd1e1718
commit dd6f127fa2
Signed by: quentin
GPG key ID: E9602264D639FF68
3 changed files with 154 additions and 52 deletions

View file

@ -1,29 +1,49 @@
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime, TimeZone}; use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime, TimeZone};
use nom::{ use nom::{
IResult, IResult,
bytes::complete::take_while_m_n, AsChar,
branch::alt,
bytes::complete::{tag, tag_no_case, take_while, take_while1, take_while_m_n, is_a},
character,
character::is_digit, character::is_digit,
character::complete::{one_of, alphanumeric1},
combinator::{map, opt, value},
sequence::{preceded, terminated, tuple, delimited },
}; };
use crate::misc_token; use crate::misc_token;
use crate::whitespace::{fws, cfws};
/// date-time = [ day-of-week "," ] date time [CFWS] const MIN: i32 = 60;
/// time = time-of-day zone const HOUR: i32 = 60 * MIN;
/// @FIXME: if parsing fails, Option::None is silently returned...
/// Read datetime
///
/// ```abnf
/// date-time = [ day-of-week "," ] date time [CFWS]
/// time = time-of-day zone
/// ```
///
/// ## @FIXME - known bugs
///
/// - if chrono fails, Option::None is silently returned instead of failing the parser
/// - `-0000` means NaiveDateTime, a date without a timezone
/// while this library interprets it as +0000 aka UTC.
/// - Obsolete military zones should be considered as NaiveTime
/// due to an error in RFC0822 but are interpreted as their respective
/// timezone according to the RFC5322 definition
pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> { pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> {
let (input, (_, date, time, tz, _)) = tuple(( map(tuple((
opt(terminated(day_of_week, tag(","))), opt(terminated(day_of_week, tag(","))),
date, time_of_day, zone date, time_of_day, zone,
opt(cfws)))(input)?; opt(cfws)
)), |res| {
match res {
//@TODO: rebuild DateTime from NaiveDate, NaiveTime and TimeZone (_, Some(date), Some(time), Some(tz), _) => {
date.and_time(time).and_local_timezone(tz).earliest()
},
_ => None,
// @FIXME want to extract datetime our way in the future }
// to better handle obsolete/bad cases instead of returning raw text. })(input)
//let (input, raw_date) = misc_token::unstructured(input)?;
//Ok((input, DateTime::parse_from_rfc2822(&raw_date).unwrap()))
} }
/// day-of-week = ([FWS] day-name) / obs-day-of-week /// day-of-week = ([FWS] day-name) / obs-day-of-week
@ -36,7 +56,7 @@ fn day_of_week_strict(input: &str) -> IResult<&str, &str> {
} }
fn obs_day_of_week(input: &str) -> IResult<&str, &str> { fn obs_day_of_week(input: &str) -> IResult<&str, &str> {
delimited(obs(cfws), day_name, obs(cfws))(input) delimited(opt(cfws), day_name, opt(cfws))(input)
} }
/// day-name = "Mon" / "Tue" / "Wed" / "Thu" / /// day-name = "Mon" / "Tue" / "Wed" / "Thu" /
@ -68,22 +88,18 @@ fn day(input: &str) -> IResult<&str, u32> {
alt((day_strict, obs_day))(input) alt((day_strict, obs_day))(input)
} }
fn day_strict(input: &str) -> IResult<&str, u32) { fn day_strict(input: &str) -> IResult<&str, u32> {
delimited(opt(fws), day_digit, fws)(input) delimited(opt(fws), character::complete::u32, fws)(input)
} }
fn obs_day(input: &str) -> IResult<&str, u32) { fn obs_day(input: &str) -> IResult<&str, u32> {
delimited(opt(cfws), day_digit, opt(cfws))(input) delimited(opt(cfws), character::complete::u32, opt(cfws))(input)
}
fn day_digit(input: &str) -> IRresult<&str, u32) {
map(take_while_m_n(1, 2, is_digit), |d| d.parse::<u32>().unwrap())(input)
} }
/// month = "Jan" / "Feb" / "Mar" / "Apr" / /// month = "Jan" / "Feb" / "Mar" / "Apr" /
/// "May" / "Jun" / "Jul" / "Aug" / /// "May" / "Jun" / "Jul" / "Aug" /
/// "Sep" / "Oct" / "Nov" / "Dec" /// "Sep" / "Oct" / "Nov" / "Dec"
fn month(input: &str) -> IResult<&str, u32) { fn month(input: &str) -> IResult<&str, u32> {
alt(( alt((
value(1, tag_no_case("Jan")), value(1, tag_no_case("Jan")),
value(2, tag_no_case("Feb")), value(2, tag_no_case("Feb")),
@ -106,39 +122,125 @@ fn year(input: &str) -> IResult<&str, i32> {
alt((strict_year, obs_year))(input) alt((strict_year, obs_year))(input)
} }
fn strict_year(input &str) -> IResult<&str, i32> { fn strict_year(input: &str) -> IResult<&str, i32> {
delimited(fws, strict_year_digit, fws)(input) delimited(fws, character::complete::i32, fws)(input)
} }
fn obs_year(input: &str) -> IResult<&str, i32> { fn obs_year(input: &str) -> IResult<&str, i32> {
delimited(opt(cfws), obs_year_digit, opt(cfws))(input) map(delimited(opt(cfws), character::complete::i32, opt(cfws)),
} |d: i32| if d >= 0 && d <= 49 {
2000 + d
fn strict_year_digit(input: &str) -> IResult<&str, i32> { } else if d >= 50 && d <= 999 {
// Max value for i32 is 2,147,483,647 ; in other words 10 digits. 1900 + d
// 9 digits should always be parsable into an i32 and enough for a year. } else {
// @FIXME a better implementation is desirable d
map(take_while_m_n(4, 9, is_digit), |d| d.parse::<i32>().unwrap())(input) })(input)
}
fn obs_year_digit(input: &str) -> IResult<&str, i32> {
// @FIXME same as strict_year_digit
map(take_while_m_n(2, 9, is_digit), |d| d.parse::<i32>().unwrap())(input)
} }
/// time-of-day = hour ":" minute [ ":" second ] /// time-of-day = hour ":" minute [ ":" second ]
/// ///
fn time(input: &str) -> IResult<&str, (NaiveTime, TimeZone)> { fn time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
map( map(
tuple((time_digit, tag(":"), time_digit, opt(preceded(tag(":"), time_digit)))), tuple((character::complete::u32, tag(":"), character::complete::u32, opt(preceded(tag(":"), character::complete::u32)))),
|(hour, _, minute, maybe_sec)| |(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)),
)(input)
} }
fn time_digit(input: &str) -> IResult<&str, u32> { /// Obsolete zones
alt((strict_time_digit, obs_time_digit))(input) ///
/// ```abnf
/// zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
/// obs-zone = "UT" / "GMT" / ; Universal Time
/// ; North American UT
/// ; offsets
/// "EST" / "EDT" / ; Eastern: - 5/ - 4
/// "CST" / "CDT" / ; Central: - 6/ - 5
/// "MST" / "MDT" / ; Mountain: - 7/ - 6
/// "PST" / "PDT" / ; Pacific: - 8/ - 7
/// ;
/// %d65-73 / ; Military zones - "A"
/// %d75-90 / ; through "I" and "K"
/// %d97-105 / ; through "Z", both
/// %d107-122 / ; upper and lower case
/// ;
/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones
/// ```
///
///
fn zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
alt((strict_zone, obs_zone))(input)
} }
fn strict_time_digit(input: &str) -> IResult<&str, u32> { fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
take_while_m_n(4, 4, is_digit)(input) map(
tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))),
|(_, op, dig_zone_hour, dig_zone_min)| {
let zone_hour = dig_zone_hour.parse::<i32>().unwrap() * HOUR;
let zone_min = dig_zone_min.parse::<i32>().unwrap() * MIN;
match op {
"+" => FixedOffset::east_opt(zone_hour + zone_min),
"-" => FixedOffset::west_opt(zone_hour + zone_min),
_ => unreachable!(), }
}
)(input)
} }
fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
// The writing of this function is volontarily verbose
// to keep it straightforward to understand.
// @FIXME: Could return a TimeZone and not an Option<TimeZone>
// as it could be determined at compile time if values are correct
// and panic at this time if not. But not sure how to do it without unwrap.
alt((
// Legacy UTC/GMT
value(FixedOffset::west_opt(0 * HOUR), alt((tag("UT"), tag("GMT")))),
// USA Timezones
value(FixedOffset::west_opt(4 * HOUR), tag("EDT")),
value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))),
value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))),
value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))),
value(FixedOffset::west_opt(8 * HOUR), tag("PST")),
// Military Timezone UTC
value(FixedOffset::west_opt(0 * HOUR), tag("Z")),
// Military Timezones East
map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c {
'A' | 'a' => FixedOffset::east_opt(1 * HOUR),
'B' | 'b' => FixedOffset::east_opt(2 * HOUR),
'C' | 'c' => FixedOffset::east_opt(3 * HOUR),
'D' | 'd' => FixedOffset::east_opt(4 * HOUR),
'E' | 'e' => FixedOffset::east_opt(5 * HOUR),
'F' | 'f' => FixedOffset::east_opt(6 * HOUR),
'G' | 'g' => FixedOffset::east_opt(7 * HOUR),
'H' | 'h' => FixedOffset::east_opt(8 * HOUR),
'I' | 'i' => FixedOffset::east_opt(9 * HOUR),
'K' | 'k' => FixedOffset::east_opt(10 * HOUR),
'L' | 'l' => FixedOffset::east_opt(11 * HOUR),
'M' | 'm' => FixedOffset::east_opt(12 * HOUR),
_ => unreachable!(),
}),
// Military Timezones West
map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c {
'N' | 'n' => FixedOffset::west_opt(1 * HOUR),
'O' | 'o' => FixedOffset::west_opt(2 * HOUR),
'P' | 'p' => FixedOffset::west_opt(3 * HOUR),
'Q' | 'q' => FixedOffset::west_opt(4 * HOUR),
'R' | 'r' => FixedOffset::west_opt(5 * HOUR),
'S' | 's' => FixedOffset::west_opt(6 * HOUR),
'T' | 't' => FixedOffset::west_opt(7 * HOUR),
'U' | 'u' => FixedOffset::west_opt(8 * HOUR),
'V' | 'v' => FixedOffset::west_opt(9 * HOUR),
'W' | 'w' => FixedOffset::west_opt(10 * HOUR),
'X' | 'x' => FixedOffset::west_opt(11 * HOUR),
'Y' | 'y' => FixedOffset::west_opt(12 * HOUR),
_ => unreachable!(),
}),
// Unknown timezone
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
))(input)
}

View file

@ -36,7 +36,7 @@ pub fn section(input: &str) -> IResult<&str, HeaderSection> {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
// | orig-date | 1 | 1 | | // | orig-date | 1 | 1 | |
Field::Date(FieldBody::Correct(d)) => { Field::Date(FieldBody::Correct(d)) => {
section.date = Some(d); section.date = d;
} }
// 3.6.2. Originator Fields // 3.6.2. Originator Fields

View file

@ -64,7 +64,7 @@ pub enum FieldBody<'a, T> {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Field<'a> { pub enum Field<'a> {
// 3.6.1. The Origination Date Field // 3.6.1. The Origination Date Field
Date(FieldBody<'a, DateTime<FixedOffset>>), Date(FieldBody<'a, Option<DateTime<FixedOffset>>>),
// 3.6.2. Originator Fields // 3.6.2. Originator Fields
From(FieldBody<'a, Vec<MailboxRef>>), From(FieldBody<'a, Vec<MailboxRef>>),