improve encoded word compatibility

This commit is contained in:
Quentin 2023-07-23 18:27:03 +02:00
parent 971bbc6945
commit 755a095743
Signed by: quentin
GPG key ID: E9602264D639FF68
4 changed files with 96 additions and 67 deletions

View file

@ -372,7 +372,7 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
enc: encoding_rs::WINDOWS_1252,
chunks: vec![
QuotedChunk::Safe(&b"Andr"[..]),
QuotedChunk::Encoded(0xE9),
QuotedChunk::Encoded(vec![0xE9]),
],
})),
Word::Atom(&b"Pirard"[..])

View file

@ -35,46 +35,6 @@ impl<'a> From<GroupRef<'a>> for AddressRef<'a> {
}
pub type AddressList<'a> = Vec<AddressRef<'a>>;
/*impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>;
fn try_from(mx: &'a lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
mailbox(mx.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::Mailbox(e))
}
}
impl<'a> TryFrom<&'a lazy::MailboxList<'a>> for MailboxList {
type Error = IMFError<'a>;
fn try_from(ml: &'a lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
mailbox_list(ml.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::MailboxList(e))
}
}
impl<'a> TryFrom<&'a lazy::AddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(al: &'a lazy::AddressList<'a>) -> Result<Self, Self::Error> {
address_list(al.0)
.map(|(_, a)| a)
.map_err(|e| IMFError::AddressList(e))
}
}
impl<'a> TryFrom<&'a lazy::NullableAddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(nal: &'a lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
opt(alt((address_list, address_list_cfws)))(nal.0)
.map(|(_, a)| a.unwrap_or(vec![]))
.map_err(|e| IMFError::NullableAddressList(e))
}
}*/
/// Address (section 3.4 of RFC5322)
///
/// ```abnf
@ -215,4 +175,61 @@ mod tests {
))
);
}
use crate::text::quoted::QuotedString;
use crate::text::encoding::{EncodedWord, QuotedWord, QuotedChunk};
#[test]
fn test_strange_groups() {
assert_eq!(
address_list(
br#""Colleagues": "James Smythe" <james@vandelay.com>;, Friends:
jane@example.com, =?UTF-8?Q?John_Sm=C3=AEth?= <john@example.com>;"#
),
Ok((&b""[..], vec![
AddressRef::Many(GroupRef {
name: Phrase(vec![Word::Quoted(QuotedString(vec![&b"Colleagues"[..]]))]),
participants: vec![
MailboxRef {
name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![
&b"James"[..], &b" "[..], &b"Smythe"[..]
]))])),
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"james"[..]))]),
domain: Domain::Atoms(vec![&b"vandelay"[..], &b"com"[..]]),
}
},
],
}),
AddressRef::Many(GroupRef {
name: Phrase(vec![Word::Atom(&b"Friends"[..])]),
participants: vec![
MailboxRef{
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"jane"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
},
MailboxRef{
name: Some(Phrase(vec![Word::Encoded(EncodedWord::Quoted(QuotedWord {
enc: encoding_rs::UTF_8,
chunks: vec![
QuotedChunk::Safe(&b"John"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"Sm"[..]),
QuotedChunk::Encoded(vec![0xc3, 0xae]),
QuotedChunk::Safe(&b"th"[..]),
]
}))])),
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"john"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
},
]
}),
]))
);
}
}

View file

@ -6,21 +6,23 @@ use nom::{
bytes::complete::{tag, take, take_while, take_while1},
character::complete::one_of,
character::is_alphanumeric,
combinator::map,
multi::many0,
combinator::{map, opt},
multi::{many0, many1},
sequence::{preceded, terminated, tuple},
IResult,
};
use crate::text::ascii;
use crate::text::words;
use crate::text::whitespace::cfws;
pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
alt((encoded_word_quoted, encoded_word_base64))(input)
}
pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
let (rest, (_, _, charset, _, _, _, txt, _, _)) = tuple((
opt(cfws),
tag("=?"),
words::mime_atom,
tag("?"),
@ -28,6 +30,7 @@ pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
tag("?"),
ptext,
tag("?="),
opt(cfws),
))(input)?;
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
@ -102,8 +105,7 @@ impl<'a> QuotedWord<'a> {
}
QuotedChunk::Space => acc.push(' '),
QuotedChunk::Encoded(v) => {
let w = &[*v];
let (d, _) = self.enc.decode_without_bom_handling(w);
let (d, _) = self.enc.decode_without_bom_handling(v.as_slice());
acc.push_str(d.as_ref());
}
};
@ -115,13 +117,13 @@ impl<'a> QuotedWord<'a> {
#[derive(PartialEq, Debug, Clone)]
pub enum QuotedChunk<'a> {
Safe(&'a [u8]),
Encoded(u8),
Encoded(Vec<u8>),
Space,
}
//quoted_printable
pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
many0(alt((safe_char2, encoded_space, hex_octet)))(input)
many0(alt((safe_char2, encoded_space, many_hex_octet)))(input)
}
fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
@ -136,27 +138,26 @@ fn is_safe_char2(c: u8) -> bool {
c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
}
/*
fn is_safe_char(c: char) -> bool {
(c >= '\x21' && c <= '\x3c') ||
(c >= '\x3e' && c <= '\x7e')
}*/
fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
map(tag("_"), |_| QuotedChunk::Space)(input)
}
fn hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
fn hex_octet(input: &[u8]) -> IResult<&[u8], u8> {
use nom::error::*;
let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
println!("TOOK: {:?}", hbytes);
let (hstr, _) = encoding_rs::UTF_8.decode_without_bom_handling(hbytes);
let hstr = String::from_utf8_lossy(hbytes);
let parsed = u8::from_str_radix(hstr.as_ref(), 16)
.map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
Ok((rest, QuotedChunk::Encoded(parsed)))
println!("PARSED: {}", parsed);
Ok((rest, parsed))
}
fn many_hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
map(many1(hex_octet), QuotedChunk::Encoded)(input)
}
//base64 (maybe use a crate)
@ -181,16 +182,16 @@ mod tests {
&b""[..],
vec![
QuotedChunk::Safe(&b"Accus"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Encoded(vec![0xe9]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"de"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"r"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Encoded(vec![0xe9]),
QuotedChunk::Safe(&b"ception"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"(affich"[..]),
QuotedChunk::Encoded(0xe9),
QuotedChunk::Encoded(vec![0xe9]),
QuotedChunk::Safe(&b")"[..]),
]
))
@ -219,4 +220,15 @@ mod tests {
"If you can read this yo".to_string(),
);
}
#[test]
fn test_strange_quoted() {
assert_eq!(
encoded_word(b"=?UTF-8?Q?John_Sm=C3=AEth?=")
.unwrap()
.1
.to_string(),
"John Smîth".to_string(),
);
}
}

View file

@ -51,7 +51,7 @@ pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord> {
))(input)
}
#[derive(PartialEq)]
#[derive(Debug, PartialEq)]
pub enum Word<'a> {
Quoted(QuotedString<'a>),
Encoded(encoding::EncodedWord<'a>),
@ -70,11 +70,11 @@ impl<'a> ToString for Word<'a> {
}
}
}
impl<'a> fmt::Debug for Word<'a> {
/*impl<'a> fmt::Debug for Word<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_tuple("Word").field(&format_args!("\"{}\"", self.to_string())).finish()
}
}
}*/
/// Word
///
@ -89,7 +89,7 @@ pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
))(input)
}
#[derive(PartialEq)]
#[derive(Debug, PartialEq)]
pub struct Phrase<'a>(pub Vec<Word<'a>>);
impl<'a> ToString for Phrase<'a> {
@ -101,11 +101,11 @@ impl<'a> ToString for Phrase<'a> {
.join(" ")
}
}
impl<'a> fmt::Debug for Phrase<'a> {
/*impl<'a> fmt::Debug for Phrase<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_tuple("Phrase").field(&format_args!("\"{}\"", self.to_string())).finish()
}
}
}*/
/// Phrase
///