eml-codec/src/text/words.rs

135 lines
3.2 KiB
Rust
Raw Normal View History

2023-07-18 23:25:10 +02:00
use crate::text::ascii;
2023-07-23 16:37:47 +02:00
use crate::text::whitespace::cfws;
2023-07-18 23:25:10 +02:00
use nom::{
bytes::complete::{tag, take_while1},
character::is_alphanumeric,
combinator::{opt, recognize},
multi::many0,
sequence::{delimited, pair},
IResult,
};
pub fn is_vchar(c: u8) -> bool {
2023-07-24 09:24:38 +02:00
(ascii::EXCLAMATION..=ascii::TILDE).contains(&c)
2023-07-18 23:25:10 +02:00
}
/// MIME Token allowed characters
///
/// forbidden: ()<>@,;:\"/[]?=
2023-07-21 18:31:56 +02:00
fn is_mime_atom_text(c: u8) -> bool {
2023-07-23 16:37:47 +02:00
is_alphanumeric(c)
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
|| c == ascii::DOLLAR
|| c == ascii::PERCENT
|| c == ascii::AMPERSAND
|| c == ascii::SQUOTE
|| c == ascii::ASTERISK
|| c == ascii::PLUS
|| c == ascii::MINUS
|| c == ascii::PERIOD
|| c == ascii::CARRET
|| c == ascii::UNDERSCORE
|| c == ascii::GRAVE
|| c == ascii::LEFT_CURLY
|| c == ascii::PIPE
|| c == ascii::RIGHT_CURLY
|| c == ascii::TILDE
2023-07-18 23:25:10 +02:00
}
/// MIME Token
///
/// `[CFWS] 1*token_text [CFWS]`
2023-07-21 18:31:56 +02:00
pub fn mime_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(opt(cfws), take_while1(is_mime_atom_text), opt(cfws))(input)
2023-07-18 23:25:10 +02:00
}
/// Atom allowed characters
///
/// authorized: !#$%&'*+-/=?^_`{|}~
fn is_atext(c: u8) -> bool {
is_alphanumeric(c)
2023-07-23 16:37:47 +02:00
|| c == ascii::EXCLAMATION
|| c == ascii::NUM
|| c == ascii::DOLLAR
|| c == ascii::PERCENT
|| c == ascii::AMPERSAND
|| c == ascii::SQUOTE
|| c == ascii::ASTERISK
|| c == ascii::PLUS
|| c == ascii::MINUS
|| c == ascii::SLASH
|| c == ascii::EQ
|| c == ascii::QUESTION
|| c == ascii::CARRET
|| c == ascii::UNDERSCORE
|| c == ascii::GRAVE
|| c == ascii::LEFT_CURLY
|| c == ascii::PIPE
|| c == ascii::RIGHT_CURLY
|| c == ascii::TILDE
2023-07-18 23:25:10 +02:00
}
/// Atom
///
/// `[CFWS] 1*atext [CFWS]`
pub fn atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
}
/// dot-atom-text
///
/// `1*atext *("." 1*atext)`
pub fn dot_atom_text(input: &[u8]) -> IResult<&[u8], &[u8]> {
recognize(pair(
take_while1(is_atext),
many0(pair(tag("."), take_while1(is_atext))),
))(input)
}
/// dot-atom
///
/// `[CFWS] dot-atom-text [CFWS]`
2023-07-23 18:33:20 +02:00
#[allow(dead_code)]
2023-07-18 23:25:10 +02:00
pub fn dot_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_atext() {
2023-07-24 09:24:38 +02:00
assert!(is_atext(b'='));
assert!(is_atext(b'5'));
assert!(is_atext(b'Q'));
assert!(!is_atext(b' '));
2023-07-18 23:25:10 +02:00
//assert!(is_atext('É')); // support utf8
}
#[test]
fn test_atom() {
assert_eq!(
atom(b"(skip) imf_codec (hidden) aerogramme"),
Ok((&b"aerogramme"[..], &b"imf_codec"[..]))
);
}
#[test]
fn test_dot_atom_text() {
assert_eq!(
2023-07-19 11:03:40 +02:00
dot_atom_text(b"quentin.dufour.io abcdef"),
Ok((&b" abcdef"[..], &b"quentin.dufour.io"[..]))
2023-07-18 23:25:10 +02:00
);
}
#[test]
fn test_dot_atom() {
assert_eq!(
2023-07-19 11:03:40 +02:00
dot_atom(b" (skip) quentin.dufour.io abcdef"),
Ok((&b"abcdef"[..], &b"quentin.dufour.io"[..]))
2023-07-18 23:25:10 +02:00
);
}
}