word and phrase

This commit is contained in:
Quentin 2023-06-12 21:43:39 +02:00
parent 6b227d9830
commit db2fea9015
Signed by: quentin
GPG key ID: E9602264D639FF68
2 changed files with 54 additions and 27 deletions

View file

@ -11,7 +11,7 @@ use nom::{
sequence::tuple,
};
use crate::tokens::{fws, vchar_seq, perm_crlf};
use crate::tokens::{fws, vchar_seq, perm_crlf, unstructured};
use crate::model::{PermissiveHeaderSection, HeaderDate, MailboxRef};
/// HEADERS
@ -124,31 +124,6 @@ fn header_field(input: &str) -> IResult<&str, HeaderField> {
return Ok((input, hfield));
}
/// Unstructured header field body
///
/// ```abnf
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ```
fn unstructured(input: &str) -> IResult<&str, String> {
let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
let (input, _) = space0(input)?;
// Try to optimize for the most common cases
let body = match r.as_slice() {
[(None, content)] => content.to_string(),
[(Some(_), content)] => " ".to_string() + content,
lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
let (may_ws, content) = item;
match may_ws {
Some(_) => acc + " " + content,
None => acc + content,
}
}),
};
Ok((input, body))
}
fn datetime(input: &str) -> IResult<&str, HeaderField> {
// @FIXME want to extract datetime our way in the future
// to better handle obsolete/bad cases instead of returning raw text.

View file

@ -1,9 +1,10 @@
use std::borrow::Cow;
use nom::{
IResult,
branch::alt,
bytes::complete::{tag, take_while1},
character::complete::{crlf, satisfy, space0, space1},
combinator::{recognize, opt},
combinator::{into, recognize, opt},
multi::{many0, many1},
sequence::{delimited, pair, preceded, terminated, tuple},
};
@ -79,6 +80,50 @@ pub fn quoted_string(input: &str) -> IResult<&str, String> {
Ok((input, qstring))
}
/// Word
///
/// ```abnf
/// word = atom / quoted-string
/// ```
pub fn word(input: &str) -> IResult<&str, Cow<str>> {
alt((into(quoted_string), into(atom)))(input)
}
/// Phrase
///
/// ```abnf
/// phrase = 1*word / obs-phrase
/// ```
pub fn phrase(input: &str) -> IResult<&str, String> {
let (input, words) = many1(word)(input)?;
let phrase = words.join(" ");
Ok((input, phrase))
}
/// Unstructured header field body
///
/// ```abnf
/// unstructured = (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
/// ```
pub fn unstructured(input: &str) -> IResult<&str, String> {
let (input, r) = many0(tuple((opt(fws), vchar_seq)))(input)?;
let (input, _) = space0(input)?;
// Try to optimize for the most common cases
let body = match r.as_slice() {
[(None, content)] => content.to_string(),
[(Some(_), content)] => " ".to_string() + content,
lines => lines.iter().fold(String::with_capacity(255), |acc, item| {
let (may_ws, content) = item;
match may_ws {
Some(_) => acc + " " + content,
None => acc + content,
}
}),
};
Ok((input, body))
}
// --- whitespaces and comments
@ -272,4 +317,11 @@ mod tests {
assert_eq!(quoted_string(" \"hello\\\"world\" "), Ok(("", "hello\"world".to_string())));
assert_eq!(quoted_string("\"hello\r\n world\""), Ok(("", "hello world".to_string())));
}
#[test]
fn test_phrase() {
assert_eq!(phrase("hello world"), Ok(("", "hello world".into())));
assert_eq!(phrase("salut \"le\" monde"), Ok(("", "salut le monde".into())));
assert_eq!(phrase("fin\r\n du\r\nmonde"), Ok(("\r\nmonde", "fin du".into())));
}
}