wip atom
This commit is contained in:
parent
6e76fed684
commit
1e6b18de5a
2 changed files with 97 additions and 21 deletions
|
@ -46,6 +46,7 @@ pub fn header_section(input: &str) -> IResult<&str, PermissiveHeaderSection> {
|
||||||
Ok((input, headers))
|
Ok((input, headers))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
enum HeaderField<'a> {
|
enum HeaderField<'a> {
|
||||||
// 3.6.1. The Origination Date Field
|
// 3.6.1. The Origination Date Field
|
||||||
Date(HeaderDate),
|
Date(HeaderDate),
|
||||||
|
@ -86,16 +87,17 @@ enum HeaderField<'a> {
|
||||||
Optional(&'a str, String)
|
Optional(&'a str, String)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract one header field
|
/// Parse one header field
|
||||||
///
|
///
|
||||||
/// Derived grammar inspired by RFC5322 optional-field:
|
/// RFC5322 optional-field seems to be a generalization of the field terminology.
|
||||||
|
/// We use it to parse all header names:
|
||||||
///
|
///
|
||||||
/// ```abnf
|
/// ```abnf
|
||||||
/// field = field-name ":" unstructured CRLF
|
/// field = field-name ":" unstructured CRLF
|
||||||
/// field-name = 1*ftext
|
/// field-name = 1*ftext
|
||||||
/// ftext = %d33-57 / ; Printable US-ASCII
|
/// ftext = %d33-57 / ; Printable US-ASCII
|
||||||
/// %d59-126 ; characters not including
|
/// %d59-126 ; characters not including
|
||||||
/// ; ":".
|
/// ; ":".
|
||||||
/// ```
|
/// ```
|
||||||
fn header_field(input: &str) -> IResult<&str, HeaderField> {
|
fn header_field(input: &str) -> IResult<&str, HeaderField> {
|
||||||
// Extract field name
|
// Extract field name
|
||||||
|
@ -104,21 +106,8 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
|
||||||
|
|
||||||
// Extract field body
|
// Extract field body
|
||||||
let (input, hfield) = match field_name {
|
let (input, hfield) = match field_name {
|
||||||
"Date" => {
|
"Date" => datetime(input)?,
|
||||||
// @FIXME want to extract datetime our way in the future
|
"From" => from(input)?,
|
||||||
// to better handle obsolete/bad cases instead of crashing.
|
|
||||||
let (input, raw_date) = unstructured(input)?;
|
|
||||||
let date = match DateTime::parse_from_rfc2822(&raw_date) {
|
|
||||||
Ok(chronodt) => HeaderDate::Parsed(chronodt),
|
|
||||||
Err(e) => HeaderDate::Unknown(raw_date, e),
|
|
||||||
};
|
|
||||||
(input, HeaderField::Date(date))
|
|
||||||
},
|
|
||||||
"From" => {
|
|
||||||
let (input, mbx) = mailbox(input)?;
|
|
||||||
//many0(
|
|
||||||
unimplemented!()
|
|
||||||
},
|
|
||||||
"Sender" => unimplemented!(),
|
"Sender" => unimplemented!(),
|
||||||
"Subject" => {
|
"Subject" => {
|
||||||
let (input, body) = unstructured(input)?;
|
let (input, body) = unstructured(input)?;
|
||||||
|
@ -130,6 +119,7 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Drop EOL
|
||||||
let (input, _) = crlf(input)?;
|
let (input, _) = crlf(input)?;
|
||||||
return Ok((input, hfield));
|
return Ok((input, hfield));
|
||||||
}
|
}
|
||||||
|
@ -159,6 +149,41 @@ fn unstructured(input: &str) -> IResult<&str, String> {
|
||||||
Ok((input, body))
|
Ok((input, body))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn datetime(input: &str) -> IResult<&str, HeaderField> {
|
||||||
|
// @FIXME want to extract datetime our way in the future
|
||||||
|
// to better handle obsolete/bad cases instead of returning raw text.
|
||||||
|
let (input, raw_date) = unstructured(input)?;
|
||||||
|
let date = match DateTime::parse_from_rfc2822(&raw_date) {
|
||||||
|
Ok(chronodt) => HeaderDate::Parsed(chronodt),
|
||||||
|
Err(e) => HeaderDate::Unknown(raw_date, e),
|
||||||
|
};
|
||||||
|
Ok((input, HeaderField::Date(date)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from(input: &str) -> IResult<&str, HeaderField> {
|
||||||
|
//let (input, mbox_list) = many0(mailbox)(input)?;
|
||||||
|
//Ok((input, HeaderField::From(mbox_list)))
|
||||||
|
unimplemented!();
|
||||||
|
}
|
||||||
|
|
||||||
fn mailbox(input: &str) -> IResult<&str, MailboxRef> {
|
fn mailbox(input: &str) -> IResult<&str, MailboxRef> {
|
||||||
unimplemented!();
|
unimplemented!();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_datetime() {
|
||||||
|
let datefield = "Thu,\r\n 13\r\n Feb\r\n 1969\r\n 23:32\r\n -0330 (Newfoundland Time)";
|
||||||
|
let (input, v) = datetime(datefield).unwrap();
|
||||||
|
assert_eq!(input, "");
|
||||||
|
match v {
|
||||||
|
HeaderField::Date(HeaderDate::Parsed(_)) => (),
|
||||||
|
_ => panic!("Date has not been parsed"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ use nom::{
|
||||||
character::complete::{crlf, satisfy, space0, space1},
|
character::complete::{crlf, satisfy, space0, space1},
|
||||||
combinator::{recognize, opt},
|
combinator::{recognize, opt},
|
||||||
multi::{many0, many1},
|
multi::{many0, many1},
|
||||||
sequence::{preceded, terminated, tuple},
|
sequence::{delimited, pair, preceded, terminated, tuple},
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Lexical tokens
|
/// Lexical tokens
|
||||||
|
@ -68,7 +68,7 @@ fn fold_marker(input: &str) -> IResult<&str, &str> {
|
||||||
/// CFWS = (1*([FWS] comment) [FWS]) / FWS
|
/// CFWS = (1*([FWS] comment) [FWS]) / FWS
|
||||||
/// ```
|
/// ```
|
||||||
pub fn cfws(input: &str) -> IResult<&str, &str> {
|
pub fn cfws(input: &str) -> IResult<&str, &str> {
|
||||||
alt((perm_fws, recognize(comments)))(input)
|
alt((recognize(comments), perm_fws))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn comments(input: &str) -> IResult<&str, ()> {
|
pub fn comments(input: &str) -> IResult<&str, ()> {
|
||||||
|
@ -122,10 +122,35 @@ pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
|
||||||
take_while1(is_vchar)(input)
|
take_while1(is_vchar)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_atext(c: char) -> bool {
|
||||||
|
c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// atom
|
||||||
|
///
|
||||||
|
/// `[CFWS] 1*atext [CFWS]`
|
||||||
|
fn atom(input: &str) -> IResult<&str, &str> {
|
||||||
|
delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// dot-atom-text
|
||||||
|
///
|
||||||
|
/// `1*atext *("." 1*atext)`
|
||||||
|
fn dot_atom_text(input: &str) -> IResult<&str, &str> {
|
||||||
|
recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// dot-atom
|
||||||
|
///
|
||||||
|
/// `[CFWS] dot-atom-text [CFWS]`
|
||||||
|
fn dot_atom(input: &str) -> IResult<&str, &str> {
|
||||||
|
delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use nom;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_vchar_seq() {
|
fn test_vchar_seq() {
|
||||||
|
@ -151,5 +176,31 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_cfws() {
|
fn test_cfws() {
|
||||||
assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) ")));
|
assert_eq!(cfws("(A nice \\) chap) <pete(his account)@silly.test(his host)>"), Ok(("<pete(his account)@silly.test(his host)>", "(A nice \\) chap) ")));
|
||||||
|
assert_eq!(cfws("(Chris's host.)public.example>,"), Ok(("public.example>,", "(Chris's host.)")));
|
||||||
|
assert_eq!(cfws("(double (comment) is fun) wouch"), Ok(("wouch", "(double (comment) is fun) ")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_atext() {
|
||||||
|
assert!(is_atext('='));
|
||||||
|
assert!(is_atext('5'));
|
||||||
|
assert!(is_atext('Q'));
|
||||||
|
assert!(!is_atext(' '));
|
||||||
|
assert!(!is_atext('É'));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_atom() {
|
||||||
|
assert_eq!(atom("(skip) imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dot_atom_text() {
|
||||||
|
assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dot_atom() {
|
||||||
|
assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue