second impl of datetime, with parser
This commit is contained in:
parent
a6dd1e1718
commit
dd6f127fa2
3 changed files with 154 additions and 52 deletions
202
src/datetime.rs
202
src/datetime.rs
|
@ -1,29 +1,49 @@
|
||||||
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime, TimeZone};
|
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime, TimeZone};
|
||||||
use nom::{
|
use nom::{
|
||||||
IResult,
|
IResult,
|
||||||
bytes::complete::take_while_m_n,
|
AsChar,
|
||||||
|
branch::alt,
|
||||||
|
bytes::complete::{tag, tag_no_case, take_while, take_while1, take_while_m_n, is_a},
|
||||||
|
character,
|
||||||
character::is_digit,
|
character::is_digit,
|
||||||
|
character::complete::{one_of, alphanumeric1},
|
||||||
|
combinator::{map, opt, value},
|
||||||
|
sequence::{preceded, terminated, tuple, delimited },
|
||||||
};
|
};
|
||||||
use crate::misc_token;
|
use crate::misc_token;
|
||||||
|
use crate::whitespace::{fws, cfws};
|
||||||
|
|
||||||
/// date-time = [ day-of-week "," ] date time [CFWS]
|
const MIN: i32 = 60;
|
||||||
/// time = time-of-day zone
|
const HOUR: i32 = 60 * MIN;
|
||||||
/// @FIXME: if parsing fails, Option::None is silently returned...
|
|
||||||
|
/// Read datetime
|
||||||
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// date-time = [ day-of-week "," ] date time [CFWS]
|
||||||
|
/// time = time-of-day zone
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// ## @FIXME - known bugs
|
||||||
|
///
|
||||||
|
/// - if chrono fails, Option::None is silently returned instead of failing the parser
|
||||||
|
/// - `-0000` means NaiveDateTime, a date without a timezone
|
||||||
|
/// while this library interprets it as +0000 aka UTC.
|
||||||
|
/// - Obsolete military zones should be considered as NaiveTime
|
||||||
|
/// due to an error in RFC0822 but are interpreted as their respective
|
||||||
|
/// timezone according to the RFC5322 definition
|
||||||
pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> {
|
pub fn section(input: &str) -> IResult<&str, Option<DateTime<FixedOffset>>> {
|
||||||
let (input, (_, date, time, tz, _)) = tuple((
|
map(tuple((
|
||||||
opt(terminated(day_of_week, tag(","))),
|
opt(terminated(day_of_week, tag(","))),
|
||||||
date, time_of_day, zone
|
date, time_of_day, zone,
|
||||||
opt(cfws)))(input)?;
|
opt(cfws)
|
||||||
|
)), |res| {
|
||||||
|
match res {
|
||||||
//@TODO: rebuild DateTime from NaiveDate, NaiveTime and TimeZone
|
(_, Some(date), Some(time), Some(tz), _) => {
|
||||||
|
date.and_time(time).and_local_timezone(tz).earliest()
|
||||||
|
},
|
||||||
|
_ => None,
|
||||||
// @FIXME want to extract datetime our way in the future
|
}
|
||||||
// to better handle obsolete/bad cases instead of returning raw text.
|
})(input)
|
||||||
//let (input, raw_date) = misc_token::unstructured(input)?;
|
|
||||||
//Ok((input, DateTime::parse_from_rfc2822(&raw_date).unwrap()))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// day-of-week = ([FWS] day-name) / obs-day-of-week
|
/// day-of-week = ([FWS] day-name) / obs-day-of-week
|
||||||
|
@ -36,7 +56,7 @@ fn day_of_week_strict(input: &str) -> IResult<&str, &str> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn obs_day_of_week(input: &str) -> IResult<&str, &str> {
|
fn obs_day_of_week(input: &str) -> IResult<&str, &str> {
|
||||||
delimited(obs(cfws), day_name, obs(cfws))(input)
|
delimited(opt(cfws), day_name, opt(cfws))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// day-name = "Mon" / "Tue" / "Wed" / "Thu" /
|
/// day-name = "Mon" / "Tue" / "Wed" / "Thu" /
|
||||||
|
@ -68,22 +88,18 @@ fn day(input: &str) -> IResult<&str, u32> {
|
||||||
alt((day_strict, obs_day))(input)
|
alt((day_strict, obs_day))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn day_strict(input: &str) -> IResult<&str, u32) {
|
fn day_strict(input: &str) -> IResult<&str, u32> {
|
||||||
delimited(opt(fws), day_digit, fws)(input)
|
delimited(opt(fws), character::complete::u32, fws)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn obs_day(input: &str) -> IResult<&str, u32) {
|
fn obs_day(input: &str) -> IResult<&str, u32> {
|
||||||
delimited(opt(cfws), day_digit, opt(cfws))(input)
|
delimited(opt(cfws), character::complete::u32, opt(cfws))(input)
|
||||||
}
|
|
||||||
|
|
||||||
fn day_digit(input: &str) -> IRresult<&str, u32) {
|
|
||||||
map(take_while_m_n(1, 2, is_digit), |d| d.parse::<u32>().unwrap())(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// month = "Jan" / "Feb" / "Mar" / "Apr" /
|
/// month = "Jan" / "Feb" / "Mar" / "Apr" /
|
||||||
/// "May" / "Jun" / "Jul" / "Aug" /
|
/// "May" / "Jun" / "Jul" / "Aug" /
|
||||||
/// "Sep" / "Oct" / "Nov" / "Dec"
|
/// "Sep" / "Oct" / "Nov" / "Dec"
|
||||||
fn month(input: &str) -> IResult<&str, u32) {
|
fn month(input: &str) -> IResult<&str, u32> {
|
||||||
alt((
|
alt((
|
||||||
value(1, tag_no_case("Jan")),
|
value(1, tag_no_case("Jan")),
|
||||||
value(2, tag_no_case("Feb")),
|
value(2, tag_no_case("Feb")),
|
||||||
|
@ -106,39 +122,125 @@ fn year(input: &str) -> IResult<&str, i32> {
|
||||||
alt((strict_year, obs_year))(input)
|
alt((strict_year, obs_year))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strict_year(input &str) -> IResult<&str, i32> {
|
fn strict_year(input: &str) -> IResult<&str, i32> {
|
||||||
delimited(fws, strict_year_digit, fws)(input)
|
delimited(fws, character::complete::i32, fws)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn obs_year(input: &str) -> IResult<&str, i32> {
|
fn obs_year(input: &str) -> IResult<&str, i32> {
|
||||||
delimited(opt(cfws), obs_year_digit, opt(cfws))(input)
|
map(delimited(opt(cfws), character::complete::i32, opt(cfws)),
|
||||||
}
|
|d: i32| if d >= 0 && d <= 49 {
|
||||||
|
2000 + d
|
||||||
fn strict_year_digit(input: &str) -> IResult<&str, i32> {
|
} else if d >= 50 && d <= 999 {
|
||||||
// Max value for i32 is 2,147,483,647 ; in other words 10 digits.
|
1900 + d
|
||||||
// 9 digits should always be parsable into an i32 and enough for a year.
|
} else {
|
||||||
// @FIXME a better implementation is desirable
|
d
|
||||||
map(take_while_m_n(4, 9, is_digit), |d| d.parse::<i32>().unwrap())(input)
|
})(input)
|
||||||
}
|
|
||||||
|
|
||||||
fn obs_year_digit(input: &str) -> IResult<&str, i32> {
|
|
||||||
// @FIXME same as strict_year_digit
|
|
||||||
map(take_while_m_n(2, 9, is_digit), |d| d.parse::<i32>().unwrap())(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// time-of-day = hour ":" minute [ ":" second ]
|
/// time-of-day = hour ":" minute [ ":" second ]
|
||||||
///
|
///
|
||||||
fn time(input: &str) -> IResult<&str, (NaiveTime, TimeZone)> {
|
fn time_of_day(input: &str) -> IResult<&str, Option<NaiveTime>> {
|
||||||
map(
|
map(
|
||||||
tuple((time_digit, tag(":"), time_digit, opt(preceded(tag(":"), time_digit)))),
|
tuple((character::complete::u32, tag(":"), character::complete::u32, opt(preceded(tag(":"), character::complete::u32)))),
|
||||||
|(hour, _, minute, maybe_sec)|
|
|(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)),
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn time_digit(input: &str) -> IResult<&str, u32> {
|
/// Obsolete zones
|
||||||
alt((strict_time_digit, obs_time_digit))(input)
|
///
|
||||||
|
/// ```abnf
|
||||||
|
/// zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
|
||||||
|
/// obs-zone = "UT" / "GMT" / ; Universal Time
|
||||||
|
/// ; North American UT
|
||||||
|
/// ; offsets
|
||||||
|
/// "EST" / "EDT" / ; Eastern: - 5/ - 4
|
||||||
|
/// "CST" / "CDT" / ; Central: - 6/ - 5
|
||||||
|
/// "MST" / "MDT" / ; Mountain: - 7/ - 6
|
||||||
|
/// "PST" / "PDT" / ; Pacific: - 8/ - 7
|
||||||
|
/// ;
|
||||||
|
/// %d65-73 / ; Military zones - "A"
|
||||||
|
/// %d75-90 / ; through "I" and "K"
|
||||||
|
/// %d97-105 / ; through "Z", both
|
||||||
|
/// %d107-122 / ; upper and lower case
|
||||||
|
/// ;
|
||||||
|
/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
///
|
||||||
|
|
||||||
|
fn zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
|
||||||
|
alt((strict_zone, obs_zone))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strict_time_digit(input: &str) -> IResult<&str, u32> {
|
fn strict_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
|
||||||
take_while_m_n(4, 4, is_digit)(input)
|
map(
|
||||||
|
tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))),
|
||||||
|
|(_, op, dig_zone_hour, dig_zone_min)| {
|
||||||
|
let zone_hour = dig_zone_hour.parse::<i32>().unwrap() * HOUR;
|
||||||
|
let zone_min = dig_zone_min.parse::<i32>().unwrap() * MIN;
|
||||||
|
match op {
|
||||||
|
"+" => FixedOffset::east_opt(zone_hour + zone_min),
|
||||||
|
"-" => FixedOffset::west_opt(zone_hour + zone_min),
|
||||||
|
_ => unreachable!(), }
|
||||||
|
}
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn obs_zone(input: &str) -> IResult<&str, Option<FixedOffset>> {
|
||||||
|
// The writing of this function is volontarily verbose
|
||||||
|
// to keep it straightforward to understand.
|
||||||
|
// @FIXME: Could return a TimeZone and not an Option<TimeZone>
|
||||||
|
// as it could be determined at compile time if values are correct
|
||||||
|
// and panic at this time if not. But not sure how to do it without unwrap.
|
||||||
|
alt((
|
||||||
|
// Legacy UTC/GMT
|
||||||
|
value(FixedOffset::west_opt(0 * HOUR), alt((tag("UT"), tag("GMT")))),
|
||||||
|
|
||||||
|
// USA Timezones
|
||||||
|
value(FixedOffset::west_opt(4 * HOUR), tag("EDT")),
|
||||||
|
value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))),
|
||||||
|
value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))),
|
||||||
|
value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))),
|
||||||
|
value(FixedOffset::west_opt(8 * HOUR), tag("PST")),
|
||||||
|
|
||||||
|
// Military Timezone UTC
|
||||||
|
value(FixedOffset::west_opt(0 * HOUR), tag("Z")),
|
||||||
|
|
||||||
|
// Military Timezones East
|
||||||
|
map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c {
|
||||||
|
'A' | 'a' => FixedOffset::east_opt(1 * HOUR),
|
||||||
|
'B' | 'b' => FixedOffset::east_opt(2 * HOUR),
|
||||||
|
'C' | 'c' => FixedOffset::east_opt(3 * HOUR),
|
||||||
|
'D' | 'd' => FixedOffset::east_opt(4 * HOUR),
|
||||||
|
'E' | 'e' => FixedOffset::east_opt(5 * HOUR),
|
||||||
|
'F' | 'f' => FixedOffset::east_opt(6 * HOUR),
|
||||||
|
'G' | 'g' => FixedOffset::east_opt(7 * HOUR),
|
||||||
|
'H' | 'h' => FixedOffset::east_opt(8 * HOUR),
|
||||||
|
'I' | 'i' => FixedOffset::east_opt(9 * HOUR),
|
||||||
|
'K' | 'k' => FixedOffset::east_opt(10 * HOUR),
|
||||||
|
'L' | 'l' => FixedOffset::east_opt(11 * HOUR),
|
||||||
|
'M' | 'm' => FixedOffset::east_opt(12 * HOUR),
|
||||||
|
_ => unreachable!(),
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Military Timezones West
|
||||||
|
map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c {
|
||||||
|
'N' | 'n' => FixedOffset::west_opt(1 * HOUR),
|
||||||
|
'O' | 'o' => FixedOffset::west_opt(2 * HOUR),
|
||||||
|
'P' | 'p' => FixedOffset::west_opt(3 * HOUR),
|
||||||
|
'Q' | 'q' => FixedOffset::west_opt(4 * HOUR),
|
||||||
|
'R' | 'r' => FixedOffset::west_opt(5 * HOUR),
|
||||||
|
'S' | 's' => FixedOffset::west_opt(6 * HOUR),
|
||||||
|
'T' | 't' => FixedOffset::west_opt(7 * HOUR),
|
||||||
|
'U' | 'u' => FixedOffset::west_opt(8 * HOUR),
|
||||||
|
'V' | 'v' => FixedOffset::west_opt(9 * HOUR),
|
||||||
|
'W' | 'w' => FixedOffset::west_opt(10 * HOUR),
|
||||||
|
'X' | 'x' => FixedOffset::west_opt(11 * HOUR),
|
||||||
|
'Y' | 'y' => FixedOffset::west_opt(12 * HOUR),
|
||||||
|
_ => unreachable!(),
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Unknown timezone
|
||||||
|
value(FixedOffset::west_opt(0 * HOUR), alphanumeric1),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ pub fn section(input: &str) -> IResult<&str, HeaderSection> {
|
||||||
// 3.6.1. The Origination Date Field
|
// 3.6.1. The Origination Date Field
|
||||||
// | orig-date | 1 | 1 | |
|
// | orig-date | 1 | 1 | |
|
||||||
Field::Date(FieldBody::Correct(d)) => {
|
Field::Date(FieldBody::Correct(d)) => {
|
||||||
section.date = Some(d);
|
section.date = d;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3.6.2. Originator Fields
|
// 3.6.2. Originator Fields
|
||||||
|
|
|
@ -64,7 +64,7 @@ pub enum FieldBody<'a, T> {
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum Field<'a> {
|
pub enum Field<'a> {
|
||||||
// 3.6.1. The Origination Date Field
|
// 3.6.1. The Origination Date Field
|
||||||
Date(FieldBody<'a, DateTime<FixedOffset>>),
|
Date(FieldBody<'a, Option<DateTime<FixedOffset>>>),
|
||||||
|
|
||||||
// 3.6.2. Originator Fields
|
// 3.6.2. Originator Fields
|
||||||
From(FieldBody<'a, Vec<MailboxRef>>),
|
From(FieldBody<'a, Vec<MailboxRef>>),
|
||||||
|
|
Loading…
Reference in a new issue