word and phrase

This commit is contained in:
Quentin 2023-06-12 21:43:39 +02:00
parent 6b227d9830
commit db2fea9015
Signed by: quentin
GPG key ID: E9602264D639FF68
2 changed files with 54 additions and 27 deletions

View file

@ -11,7 +11,7 @@ use nom::{
sequence::tuple, sequence::tuple,
}; };
use crate::tokens::{fws, vchar_seq, perm_crlf}; use crate::tokens::{fws, vchar_seq, perm_crlf, unstructured};
use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef}; use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef};
/// HEADERS /// HEADERS
@ -124,31 +124,6 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
return Ok((input, hfield)); return Ok((input, hfield));
} }
/// Unstructured header field body
///
/// ```abnf
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ```
fn unstructured(input: &str) -> IResult<&str, String> {
let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
let (input, _) = space0(input)?;
// Try to optimize for the most common cases
let body = match r.as_slice() {
[(None, content)] => content.to_string(),
[(Some(_), content)] => " ".to_string() + content,
lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
let (may_ws, content) = item;
match may_ws {
Some(_) => acc + " " + content,
None => acc + content,
}
}),
};
Ok((input, body))
}
fn datetime(input: &str) -> IResult<&str, HeaderField> { fn datetime(input: &str) -> IResult<&str, HeaderField> {
// @FIXME want to extract datetime our way in the future // @FIXME want to extract datetime our way in the future
// to better handle obsolete/bad cases instead of returning raw text. // to better handle obsolete/bad cases instead of returning raw text.

View file

@ -1,9 +1,10 @@
use std::borrow::Cow;
use nom::{ use nom::{
IResult, IResult,
branch::alt, branch::alt,
bytes::complete::{tag, take_while1}, bytes::complete::{tag, take_while1},
character::complete::{crlf, satisfy, space0, space1}, character::complete::{crlf, satisfy, space0, space1},
combinator::{recognize, opt}, combinator::{into, recognize, opt},
multi::{many0, many1}, multi::{many0, many1},
sequence::{delimited, pair, preceded, terminated, tuple}, sequence::{delimited, pair, preceded, terminated, tuple},
}; };
@ -79,6 +80,50 @@ pub fn quoted_string(input: &str) -> IResult<&str, String> {
Ok((input, qstring)) Ok((input, qstring))
} }
/// Word
///
/// ```abnf
/// word = atom / quoted-string
/// ```
pub fn word(input: &str) -> IResult<&str, Cow<str>> {
alt((into(quoted_string), into(atom)))(input)
}
/// Phrase
///
/// ```abnf
/// phrase = 1*word / obs-phrase
/// ```
pub fn phrase(input: &str) -> IResult<&str, String> {
let (input, words) = many1(word)(input)?;
let phrase = words.join(" ");
Ok((input, phrase))
}
/// Unstructured header field body
///
/// ```abnf
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ```
pub fn unstructured(input: &str) -> IResult<&str, String> {
let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
let (input, _) = space0(input)?;
// Try to optimize for the most common cases
let body = match r.as_slice() {
[(None, content)] => content.to_string(),
[(Some(_), content)] => " ".to_string() + content,
lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
let (may_ws, content) = item;
match may_ws {
Some(_) => acc + " " + content,
None => acc + content,
}
}),
};
Ok((input, body))
}
// --- whitespaces and comments // --- whitespaces and comments
@ -272,4 +317,11 @@ mod tests {
assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string()))); assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string())));
assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string()))); assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string())));
} }
#[test]
fn test_phrase() {
assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into())));
assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into())));
}
} }