second impl of datetime, with parser
This commit is contained in:
parent
a6dd1e1718
commit
dd6f127fa2
3 changed files with 154 additions and 52 deletions
202
src/datetime.rs
202
src/datetime.rs
|
@ -1,29 +1,49 @@
|
|||
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime, TimeZone};
|
||||
use nom::{
|
||||
IResult,
|
||||
bytes::complete::take_while_m_n,
|
||||
AsChar,
|
||||
branch::alt,
|
||||
bytes::complete::{tag, tag_no_case, take_while, take_while1, take_while_m_n, is_a},
|
||||
character,
|
||||
character::is_digit,
|
||||
character::complete::{one_of, alphanumeric1},
|
||||
combinator::{map, opt, value},
|
||||
sequence::{preceded, terminated, tuple, delimited },
|
||||
};
|
||||
use crate::misc_token;
|
||||
use crate::whitespace::{fws, cfws};
|
||||
|
||||
/// date-time = [ day-of-week "," ] date time [CFWS]
|
||||
/// time = time-of-day zone
|
||||
/// @FIXME: if parsing fails, Option::None is silently returned...
|
||||
const MIN: i32 = 60;
|
||||
const HOUR: i32 = 60 * MIN;
|
||||
|
||||
/// Read datetime
|
||||
///
|
||||
/// ```abnf
|
||||
/// date-time = [ day-of-week "," ] date time [CFWS]
|
||||
/// time = time-of-day zone
|
||||
/// ```
|
||||
///
|
||||
/// ## @FIXME - known bugs
|
||||
///
|
||||
/// - if chrono fails, Option::None is silently returned instead of failing the parser
|
||||
/// - `-0000` means NaiveDateTime, a date without a timezone
|
||||
/// while this library interprets it as +0000 aka UTC.
|
||||
/// - Obsolete military zones should be considered as NaiveTime
|
||||
/// due to an error in RFC0822 but are interpreted as their respective
|
||||
/// timezone according to the RFC5322 definition
|
||||
pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> {
|
||||
let (input, (_, date, time, tz, _)) = tuple((
|
||||
opt(terminated(day_of_week, tag(","))),
|
||||
date, time_of_day, zone
|
||||
opt(cfws)))(input)?;
|
||||
|
||||
|
||||
//@TODO: rebuild DateTime from NaiveDate, NaiveTime and TimeZone
|
||||
|
||||
|
||||
|
||||
// @FIXME want to extract datetime our way in the future
|
||||
// to better handle obsolete/bad cases instead of returning raw text.
|
||||
//let (input, raw_date) = misc_token::unstructured(input)?;
|
||||
//Ok((input, DateTime::parse_from_rfc2822(&raw_date).unwrap()))
|
||||
map(tuple((
|
||||
opt(terminated(day_of_week, tag(","))),
|
||||
date, time_of_day, zone,
|
||||
opt(cfws)
|
||||
)), |res| {
|
||||
match res {
|
||||
(_, Some(date), Some(time), Some(tz), _) => {
|
||||
date.and_time(time).and_local_timezone(tz).earliest()
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
})(input)
|
||||
}
|
||||
|
||||
/// day-of-week = ([FWS] day-name) / obs-day-of-week
|
||||
|
@ -36,7 +56,7 @@ fn day_of_week_strict(input: &str) -> IResult<&str, &str> {
|
|||
}
|
||||
|
||||
fn obs_day_of_week(input: &str) -> IResult<&str, &str> {
|
||||
delimited(obs(cfws), day_name, obs(cfws))(input)
|
||||
delimited(opt(cfws), day_name, opt(cfws))(input)
|
||||
}
|
||||
|
||||
/// day-name = "Mon" / "Tue" / "Wed" / "Thu" /
|
||||
|
@ -68,22 +88,18 @@ fn day(input: &str) -> IResult<&str, u32> {
|
|||
alt((day_strict, obs_day))(input)
|
||||
}
|
||||
|
||||
fn day_strict(input: &str) -> IResult<&str, u32) {
|
||||
delimited(opt(fws), day_digit, fws)(input)
|
||||
fn day_strict(input: &str) -> IResult<&str, u32> {
|
||||
delimited(opt(fws), character::complete::u32, fws)(input)
|
||||
}
|
||||
|
||||
fn obs_day(input: &str) -> IResult<&str, u32) {
|
||||
delimited(opt(cfws), day_digit, opt(cfws))(input)
|
||||
}
|
||||
|
||||
fn day_digit(input: &str) -> IRresult<&str, u32) {
|
||||
map(take_while_m_n(1, 2, is_digit), |d| d.parse::<u32>().unwrap())(input)
|
||||
fn obs_day(input: &str) -> IResult<&str, u32> {
|
||||
delimited(opt(cfws), character::complete::u32, opt(cfws))(input)
|
||||
}
|
||||
|
||||
/// month = "Jan" / "Feb" / "Mar" / "Apr" /
|
||||
/// "May" / "Jun" / "Jul" / "Aug" /
|
||||
/// "Sep" / "Oct" / "Nov" / "Dec"
|
||||
fn month(input: &str) -> IResult<&str, u32) {
|
||||
fn month(input: &str) -> IResult<&str, u32> {
|
||||
alt((
|
||||
value(1, tag_no_case("Jan")),
|
||||
value(2, tag_no_case("Feb")),
|
||||
|
@ -106,39 +122,125 @@ fn year(input: &str) -> IResult<&str, i32> {
|
|||
alt((strict_year, obs_year))(input)
|
||||
}
|
||||
|
||||
fn strict_year(input &str) -> IResult<&str, i32> {
|
||||
delimited(fws, strict_year_digit, fws)(input)
|
||||
fn strict_year(input: &str) -> IResult<&str, i32> {
|
||||
delimited(fws, character::complete::i32, fws)(input)
|
||||
}
|
||||
|
||||
fn obs_year(input: &str) -> IResult<&str, i32> {
|
||||
delimited(opt(cfws), obs_year_digit, opt(cfws))(input)
|
||||
}
|
||||
|
||||
fn strict_year_digit(input: &str) -> IResult<&str, i32> {
|
||||
// Max value for i32 is 2,147,483,647 ; in other words 10 digits.
|
||||
// 9 digits should always be parsable into an i32 and enough for a year.
|
||||
// @FIXME a better implementation is desirable
|
||||
map(take_while_m_n(4, 9, is_digit), |d| d.parse::<i32>().unwrap())(input)
|
||||
}
|
||||
|
||||
fn obs_year_digit(input: &str) -> IResult<&str, i32> {
|
||||
// @FIXME same as strict_year_digit
|
||||
map(take_while_m_n(2, 9, is_digit), |d| d.parse::<i32>().unwrap())(input)
|
||||
map(delimited(opt(cfws), character::complete::i32, opt(cfws)),
|
||||
|d: i32| if d >= 0 && d <= 49 {
|
||||
2000 + d
|
||||
} else if d >= 50 && d <= 999 {
|
||||
1900 + d
|
||||
} else {
|
||||
d
|
||||
})(input)
|
||||
}
|
||||
|
||||
/// time-of-day = hour ":" minute [ ":" second ]
|
||||
///
|
||||
fn time(input: &str) -> IResult<&str, (NaiveTime, TimeZone)> {
|
||||
fn time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
|
||||
map(
|
||||
tuple((time_digit, tag(":"), time_digit, opt(preceded(tag(":"), time_digit)))),
|
||||
|(hour, _, minute, maybe_sec)|
|
||||
tuple((character::complete::u32, tag(":"), character::complete::u32, opt(preceded(tag(":"), character::complete::u32)))),
|
||||
|(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn time_digit(input: &str) -> IResult<&str, u32> {
|
||||
alt((strict_time_digit, obs_time_digit))(input)
|
||||
/// Obsolete zones
|
||||
///
|
||||
/// ```abnf
|
||||
/// zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
|
||||
/// obs-zone = "UT" / "GMT" / ; Universal Time
|
||||
/// ; North American UT
|
||||
/// ; offsets
|
||||
/// "EST" / "EDT" / ; Eastern: - 5/ - 4
|
||||
/// "CST" / "CDT" / ; Central: - 6/ - 5
|
||||
/// "MST" / "MDT" / ; Mountain: - 7/ - 6
|
||||
/// "PST" / "PDT" / ; Pacific: - 8/ - 7
|
||||
/// ;
|
||||
/// %d65-73 / ; Military zones - "A"
|
||||
/// %d75-90 / ; through "I" and "K"
|
||||
/// %d97-105 / ; through "Z", both
|
||||
/// %d107-122 / ; upper and lower case
|
||||
/// ;
|
||||
/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones
|
||||
/// ```
|
||||
///
|
||||
///
|
||||
|
||||
fn zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
|
||||
alt((strict_zone, obs_zone))(input)
|
||||
}
|
||||
|
||||
fn strict_time_digit(input: &str) -> IResult<&str, u32> {
|
||||
take_while_m_n(4, 4, is_digit)(input)
|
||||
fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
|
||||
map(
|
||||
tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))),
|
||||
|(_, op, dig_zone_hour, dig_zone_min)| {
|
||||
let zone_hour = dig_zone_hour.parse::<i32>().unwrap() * HOUR;
|
||||
let zone_min = dig_zone_min.parse::<i32>().unwrap() * MIN;
|
||||
match op {
|
||||
"+" => FixedOffset::east_opt(zone_hour + zone_min),
|
||||
"-" => FixedOffset::west_opt(zone_hour + zone_min),
|
||||
_ => unreachable!(), }
|
||||
}
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
|
||||
// The writing of this function is volontarily verbose
|
||||
// to keep it straightforward to understand.
|
||||
// @FIXME: Could return a TimeZone and not an Option<TimeZone>
|
||||
// as it could be determined at compile time if values are correct
|
||||
// and panic at this time if not. But not sure how to do it without unwrap.
|
||||
alt((
|
||||
// Legacy UTC/GMT
|
||||
value(FixedOffset::west_opt(0 * HOUR), alt((tag("UT"), tag("GMT")))),
|
||||
|
||||
// USA Timezones
|
||||
value(FixedOffset::west_opt(4 * HOUR), tag("EDT")),
|
||||
value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))),
|
||||
value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))),
|
||||
value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))),
|
||||
value(FixedOffset::west_opt(8 * HOUR), tag("PST")),
|
||||
|
||||
// Military Timezone UTC
|
||||
value(FixedOffset::west_opt(0 * HOUR), tag("Z")),
|
||||
|
||||
// Military Timezones East
|
||||
map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c {
|
||||
'A' | 'a' => FixedOffset::east_opt(1 * HOUR),
|
||||
'B' | 'b' => FixedOffset::east_opt(2 * HOUR),
|
||||
'C' | 'c' => FixedOffset::east_opt(3 * HOUR),
|
||||
'D' | 'd' => FixedOffset::east_opt(4 * HOUR),
|
||||
'E' | 'e' => FixedOffset::east_opt(5 * HOUR),
|
||||
'F' | 'f' => FixedOffset::east_opt(6 * HOUR),
|
||||
'G' | 'g' => FixedOffset::east_opt(7 * HOUR),
|
||||
'H' | 'h' => FixedOffset::east_opt(8 * HOUR),
|
||||
'I' | 'i' => FixedOffset::east_opt(9 * HOUR),
|
||||
'K' | 'k' => FixedOffset::east_opt(10 * HOUR),
|
||||
'L' | 'l' => FixedOffset::east_opt(11 * HOUR),
|
||||
'M' | 'm' => FixedOffset::east_opt(12 * HOUR),
|
||||
_ => unreachable!(),
|
||||
}),
|
||||
|
||||
// Military Timezones West
|
||||
map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c {
|
||||
'N' | 'n' => FixedOffset::west_opt(1 * HOUR),
|
||||
'O' | 'o' => FixedOffset::west_opt(2 * HOUR),
|
||||
'P' | 'p' => FixedOffset::west_opt(3 * HOUR),
|
||||
'Q' | 'q' => FixedOffset::west_opt(4 * HOUR),
|
||||
'R' | 'r' => FixedOffset::west_opt(5 * HOUR),
|
||||
'S' | 's' => FixedOffset::west_opt(6 * HOUR),
|
||||
'T' | 't' => FixedOffset::west_opt(7 * HOUR),
|
||||
'U' | 'u' => FixedOffset::west_opt(8 * HOUR),
|
||||
'V' | 'v' => FixedOffset::west_opt(9 * HOUR),
|
||||
'W' | 'w' => FixedOffset::west_opt(10 * HOUR),
|
||||
'X' | 'x' => FixedOffset::west_opt(11 * HOUR),
|
||||
'Y' | 'y' => FixedOffset::west_opt(12 * HOUR),
|
||||
_ => unreachable!(),
|
||||
}),
|
||||
|
||||
// Unknown timezone
|
||||
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
|
||||
))(input)
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ pub fn section(input: &str) -> IResult<&str, HeaderSection> {
|
|||
// 3.6.1. The Origination Date Field
|
||||
// | orig-date | 1 | 1 | |
|
||||
Field::Date(FieldBody::Correct(d)) => {
|
||||
section.date = Some(d);
|
||||
section.date = d;
|
||||
}
|
||||
|
||||
// 3.6.2. Originator Fields
|
||||
|
|
|
@ -64,7 +64,7 @@ pub enum FieldBody<'a, T> {
|
|||
#[derive(Debug, PartialEq)]
|
||||
pub enum Field<'a> {
|
||||
// 3.6.1. The Origination Date Field
|
||||
Date(FieldBody<'a, DateTime<FixedOffset>>),
|
||||
Date(FieldBody<'a, Option<DateTime<FixedOffset>>>),
|
||||
|
||||
// 3.6.2. Originator Fields
|
||||
From(FieldBody<'a, Vec<MailboxRef>>),
|
||||
|
|
Loading…
Reference in a new issue