eml-codec/src/words.rs

88 lines
2.1 KiB
Rust

use nom::{
IResult,
bytes::complete::{tag, take_while1},
combinator::{recognize, opt},
multi::many0,
sequence::{delimited, pair},
};
use crate::whitespace::cfws;
/// VCHAR definition
pub fn is_vchar(c: char) -> bool {
(c >= '\x21' && c <= '\x7E') || !c.is_ascii()
}
/// Sequence of visible chars with the UTF-8 extension
///
/// ```abnf
/// VCHAR = %x21-7E
/// ; visible (printing) characters
/// VCHAR =/ UTF8-non-ascii
/// SEQ = 1*VCHAR
///```
pub fn vchar_seq(input: &str) -> IResult<&str, &str> {
take_while1(is_vchar)(input)
}
/// Atom allowed characters
fn is_atext(c: char) -> bool {
c.is_ascii_alphanumeric() || "!#$%&'*+-/=?^_`{|}~".contains(c) || !c.is_ascii()
}
/// Atom
///
/// `[CFWS] 1*atext [CFWS]`
pub fn atom(input: &str) -> IResult<&str, &str> {
delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
}
/// dot-atom-text
///
/// `1*atext *("." 1*atext)`
fn dot_atom_text(input: &str) -> IResult<&str, &str> {
recognize(pair(take_while1(is_atext), many0(pair(tag("."), take_while1(is_atext)))))(input)
}
/// dot-atom
///
/// `[CFWS] dot-atom-text [CFWS]`
pub fn dot_atom(input: &str) -> IResult<&str, &str> {
delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vchar_seq() {
assert_eq!(vchar_seq("hello world"), Ok((" world", "hello")));
assert_eq!(vchar_seq("hello👋 world"), Ok((" world", "hello👋")));
}
#[test]
fn test_atext() {
assert!(is_atext('='));
assert!(is_atext('5'));
assert!(is_atext('Q'));
assert!(!is_atext(' '));
assert!(is_atext('É')); // support utf8
}
#[test]
fn test_atom() {
assert_eq!(atom("(skip) imf_codec (hidden) aerogramme"), Ok(("aerogramme", "imf_codec")));
}
#[test]
fn test_dot_atom_text() {
assert_eq!(dot_atom_text("quentin.dufour.io abcdef"), Ok((" abcdef", "quentin.dufour.io")));
}
#[test]
fn test_dot_atom() {
assert_eq!(dot_atom(" (skip) quentin.dufour.io abcdef"), Ok(("abcdef", "quentin.dufour.io")));
}
}