diff --git a/src/datetime.rs b/src/datetime.rs index fd31208..203b452 100644 --- a/src/datetime.rs +++ b/src/datetime.rs @@ -1,29 +1,49 @@ use chrono::{DateTime, FixedOffset, NaiveDate, NaiveTime, TimeZone}; use nom::{ IResult, - bytes::complete::take_while_m_n, + AsChar, + branch::alt, + bytes::complete::{tag, tag_no_case, take_while, take_while1, take_while_m_n, is_a}, + character, character::is_digit, + character::complete::{one_of, alphanumeric1}, + combinator::{map, opt, value}, + sequence::{preceded, terminated, tuple, delimited }, }; use crate::misc_token; +use crate::whitespace::{fws, cfws}; -/// date-time = [ day-of-week "," ] date time [CFWS] -/// time = time-of-day zone -/// @FIXME: if parsing fails, Option::None is silently returned... +const MIN: i32 = 60; +const HOUR: i32 = 60 * MIN; + +/// Read datetime +/// +/// ```abnf +/// date-time = [ day-of-week "," ] date time [CFWS] +/// time = time-of-day zone +/// ``` +/// +/// ## @FIXME - known bugs +/// +/// - if chrono fails, Option::None is silently returned instead of failing the parser +/// - `-0000` means NaiveDateTime, a date without a timezone +/// while this library interprets it as +0000 aka UTC. +/// - Obsolete military zones should be considered as NaiveTime +/// due to an error in RFC0822 but are interpreted as their respective +/// timezone according to the RFC5322 definition pub fn section(input: &str) -> IResult<&str, Option>> { - let (input, (_, date, time, tz, _)) = tuple(( - opt(terminated(day_of_week, tag(","))), - date, time_of_day, zone - opt(cfws)))(input)?; - - - //@TODO: rebuild DateTime from NaiveDate, NaiveTime and TimeZone - - - - // @FIXME want to extract datetime our way in the future - // to better handle obsolete/bad cases instead of returning raw text. - //let (input, raw_date) = misc_token::unstructured(input)?; - //Ok((input, DateTime::parse_from_rfc2822(&raw_date).unwrap())) + map(tuple(( + opt(terminated(day_of_week, tag(","))), + date, time_of_day, zone, + opt(cfws) + )), |res| { + match res { + (_, Some(date), Some(time), Some(tz), _) => { + date.and_time(time).and_local_timezone(tz).earliest() + }, + _ => None, + } + })(input) } /// day-of-week = ([FWS] day-name) / obs-day-of-week @@ -36,7 +56,7 @@ fn day_of_week_strict(input: &str) -> IResult<&str, &str> { } fn obs_day_of_week(input: &str) -> IResult<&str, &str> { - delimited(obs(cfws), day_name, obs(cfws))(input) + delimited(opt(cfws), day_name, opt(cfws))(input) } /// day-name = "Mon" / "Tue" / "Wed" / "Thu" / @@ -68,22 +88,18 @@ fn day(input: &str) -> IResult<&str, u32> { alt((day_strict, obs_day))(input) } -fn day_strict(input: &str) -> IResult<&str, u32) { - delimited(opt(fws), day_digit, fws)(input) +fn day_strict(input: &str) -> IResult<&str, u32> { + delimited(opt(fws), character::complete::u32, fws)(input) } -fn obs_day(input: &str) -> IResult<&str, u32) { - delimited(opt(cfws), day_digit, opt(cfws))(input) -} - -fn day_digit(input: &str) -> IRresult<&str, u32) { - map(take_while_m_n(1, 2, is_digit), |d| d.parse::().unwrap())(input) +fn obs_day(input: &str) -> IResult<&str, u32> { + delimited(opt(cfws), character::complete::u32, opt(cfws))(input) } /// month = "Jan" / "Feb" / "Mar" / "Apr" / /// "May" / "Jun" / "Jul" / "Aug" / /// "Sep" / "Oct" / "Nov" / "Dec" -fn month(input: &str) -> IResult<&str, u32) { +fn month(input: &str) -> IResult<&str, u32> { alt(( value(1, tag_no_case("Jan")), value(2, tag_no_case("Feb")), @@ -106,39 +122,125 @@ fn year(input: &str) -> IResult<&str, i32> { alt((strict_year, obs_year))(input) } -fn strict_year(input &str) -> IResult<&str, i32> { - delimited(fws, strict_year_digit, fws)(input) +fn strict_year(input: &str) -> IResult<&str, i32> { + delimited(fws, character::complete::i32, fws)(input) } fn obs_year(input: &str) -> IResult<&str, i32> { - delimited(opt(cfws), obs_year_digit, opt(cfws))(input) -} - -fn strict_year_digit(input: &str) -> IResult<&str, i32> { - // Max value for i32 is 2,147,483,647 ; in other words 10 digits. - // 9 digits should always be parsable into an i32 and enough for a year. - // @FIXME a better implementation is desirable - map(take_while_m_n(4, 9, is_digit), |d| d.parse::().unwrap())(input) -} - -fn obs_year_digit(input: &str) -> IResult<&str, i32> { - // @FIXME same as strict_year_digit - map(take_while_m_n(2, 9, is_digit), |d| d.parse::().unwrap())(input) + map(delimited(opt(cfws), character::complete::i32, opt(cfws)), + |d: i32| if d >= 0 && d <= 49 { + 2000 + d + } else if d >= 50 && d <= 999 { + 1900 + d + } else { + d + })(input) } /// time-of-day = hour ":" minute [ ":" second ] /// -fn time(input: &str) -> IResult<&str, (NaiveTime, TimeZone)> { +fn time_of_day(input: &str) -> IResult<&str, Option> { map( - tuple((time_digit, tag(":"), time_digit, opt(preceded(tag(":"), time_digit)))), - |(hour, _, minute, maybe_sec)| + tuple((character::complete::u32, tag(":"), character::complete::u32, opt(preceded(tag(":"), character::complete::u32)))), + |(hour, _, minute, maybe_sec)| NaiveTime::from_hms_opt(hour, minute, maybe_sec.unwrap_or(0)), + )(input) } -fn time_digit(input: &str) -> IResult<&str, u32> { - alt((strict_time_digit, obs_time_digit))(input) +/// Obsolete zones +/// +/// ```abnf +/// zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone +/// obs-zone = "UT" / "GMT" / ; Universal Time +/// ; North American UT +/// ; offsets +/// "EST" / "EDT" / ; Eastern: - 5/ - 4 +/// "CST" / "CDT" / ; Central: - 6/ - 5 +/// "MST" / "MDT" / ; Mountain: - 7/ - 6 +/// "PST" / "PDT" / ; Pacific: - 8/ - 7 +/// ; +/// %d65-73 / ; Military zones - "A" +/// %d75-90 / ; through "I" and "K" +/// %d97-105 / ; through "Z", both +/// %d107-122 / ; upper and lower case +/// ; +/// 1*(ALPHA / DIGIT) ; Unknown legacy timezones +/// ``` +/// +/// + +fn zone(input: &str) -> IResult<&str, Option> { + alt((strict_zone, obs_zone))(input) } -fn strict_time_digit(input: &str) -> IResult<&str, u32> { - take_while_m_n(4, 4, is_digit)(input) +fn strict_zone(input: &str) -> IResult<&str, Option> { + map( + tuple((opt(fws), is_a("+-"), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'), take_while_m_n(2,2,|c| c >= '\x30' && c <= '\x39'))), + |(_, op, dig_zone_hour, dig_zone_min)| { + let zone_hour = dig_zone_hour.parse::().unwrap() * HOUR; + let zone_min = dig_zone_min.parse::().unwrap() * MIN; + match op { + "+" => FixedOffset::east_opt(zone_hour + zone_min), + "-" => FixedOffset::west_opt(zone_hour + zone_min), + _ => unreachable!(), } + } + )(input) } +fn obs_zone(input: &str) -> IResult<&str, Option> { + // The writing of this function is volontarily verbose + // to keep it straightforward to understand. + // @FIXME: Could return a TimeZone and not an Option + // as it could be determined at compile time if values are correct + // and panic at this time if not. But not sure how to do it without unwrap. + alt(( + // Legacy UTC/GMT + value(FixedOffset::west_opt(0 * HOUR), alt((tag("UT"), tag("GMT")))), + + // USA Timezones + value(FixedOffset::west_opt(4 * HOUR), tag("EDT")), + value(FixedOffset::west_opt(5 * HOUR), alt((tag("EST"), tag("CDT")))), + value(FixedOffset::west_opt(6 * HOUR), alt((tag("CST"), tag("MDT")))), + value(FixedOffset::west_opt(7 * HOUR), alt((tag("MST"), tag("PDT")))), + value(FixedOffset::west_opt(8 * HOUR), tag("PST")), + + // Military Timezone UTC + value(FixedOffset::west_opt(0 * HOUR), tag("Z")), + + // Military Timezones East + map(one_of("ABCDEFGHIKLMabcdefghiklm"), |c| match c { + 'A' | 'a' => FixedOffset::east_opt(1 * HOUR), + 'B' | 'b' => FixedOffset::east_opt(2 * HOUR), + 'C' | 'c' => FixedOffset::east_opt(3 * HOUR), + 'D' | 'd' => FixedOffset::east_opt(4 * HOUR), + 'E' | 'e' => FixedOffset::east_opt(5 * HOUR), + 'F' | 'f' => FixedOffset::east_opt(6 * HOUR), + 'G' | 'g' => FixedOffset::east_opt(7 * HOUR), + 'H' | 'h' => FixedOffset::east_opt(8 * HOUR), + 'I' | 'i' => FixedOffset::east_opt(9 * HOUR), + 'K' | 'k' => FixedOffset::east_opt(10 * HOUR), + 'L' | 'l' => FixedOffset::east_opt(11 * HOUR), + 'M' | 'm' => FixedOffset::east_opt(12 * HOUR), + _ => unreachable!(), + }), + + // Military Timezones West + map(one_of("nopqrstuvwxyNOPQRSTUVWXY"), |c| match c { + 'N' | 'n' => FixedOffset::west_opt(1 * HOUR), + 'O' | 'o' => FixedOffset::west_opt(2 * HOUR), + 'P' | 'p' => FixedOffset::west_opt(3 * HOUR), + 'Q' | 'q' => FixedOffset::west_opt(4 * HOUR), + 'R' | 'r' => FixedOffset::west_opt(5 * HOUR), + 'S' | 's' => FixedOffset::west_opt(6 * HOUR), + 'T' | 't' => FixedOffset::west_opt(7 * HOUR), + 'U' | 'u' => FixedOffset::west_opt(8 * HOUR), + 'V' | 'v' => FixedOffset::west_opt(9 * HOUR), + 'W' | 'w' => FixedOffset::west_opt(10 * HOUR), + 'X' | 'x' => FixedOffset::west_opt(11 * HOUR), + 'Y' | 'y' => FixedOffset::west_opt(12 * HOUR), + _ => unreachable!(), + }), + + // Unknown timezone + value(FixedOffset::west_opt(0 * HOUR), alphanumeric1), + ))(input) +} diff --git a/src/header.rs b/src/header.rs index 184cbbd..47ab923 100644 --- a/src/header.rs +++ b/src/header.rs @@ -36,7 +36,7 @@ pub fn section(input: &str) -> IResult<&str, HeaderSection> { // 3.6.1. The Origination Date Field // | orig-date | 1 | 1 | | Field::Date(FieldBody::Correct(d)) => { - section.date = Some(d); + section.date = d; } // 3.6.2. Originator Fields diff --git a/src/model.rs b/src/model.rs index 2371a54..a83e6b8 100644 --- a/src/model.rs +++ b/src/model.rs @@ -64,7 +64,7 @@ pub enum FieldBody<'a, T> { #[derive(Debug, PartialEq)] pub enum Field<'a> { // 3.6.1. The Origination Date Field - Date(FieldBody<'a, DateTime>), + Date(FieldBody<'a, Option>>), // 3.6.2. Originator Fields From(FieldBody<'a, Vec>),