From 755a095743a94bd9b6ce41d4c667e11578612b31 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sun, 23 Jul 2023 18:27:03 +0200 Subject: [PATCH] improve encoded word compatibility --- src/part/part.rs | 2 +- src/rfc5322/address.rs | 97 +++++++++++++++++++++++++----------------- src/text/encoding.rs | 52 +++++++++++++--------- src/text/misc_token.rs | 12 +++--- 4 files changed, 96 insertions(+), 67 deletions(-) diff --git a/src/part/part.rs b/src/part/part.rs index 41a21c2..a896406 100644 --- a/src/part/part.rs +++ b/src/part/part.rs @@ -372,7 +372,7 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO
enc: encoding_rs::WINDOWS_1252, chunks: vec![ QuotedChunk::Safe(&b"Andr"[..]), - QuotedChunk::Encoded(0xE9), + QuotedChunk::Encoded(vec![0xE9]), ], })), Word::Atom(&b"Pirard"[..]) diff --git a/src/rfc5322/address.rs b/src/rfc5322/address.rs index 75b446c..b01a38f 100644 --- a/src/rfc5322/address.rs +++ b/src/rfc5322/address.rs @@ -35,46 +35,6 @@ impl<'a> From> for AddressRef<'a> { } pub type AddressList<'a> = Vec>; -/*impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef { - type Error = IMFError<'a>; - - fn try_from(mx: &'a lazy::Mailbox<'a>) -> Result { - mailbox(mx.0) - .map(|(_, m)| m) - .map_err(|e| IMFError::Mailbox(e)) - } -} - -impl<'a> TryFrom<&'a lazy::MailboxList<'a>> for MailboxList { - type Error = IMFError<'a>; - - fn try_from(ml: &'a lazy::MailboxList<'a>) -> Result { - mailbox_list(ml.0) - .map(|(_, m)| m) - .map_err(|e| IMFError::MailboxList(e)) - } -} - -impl<'a> TryFrom<&'a lazy::AddressList<'a>> for AddressList { - type Error = IMFError<'a>; - - fn try_from(al: &'a lazy::AddressList<'a>) -> Result { - address_list(al.0) - .map(|(_, a)| a) - .map_err(|e| IMFError::AddressList(e)) - } -} - -impl<'a> TryFrom<&'a lazy::NullableAddressList<'a>> for AddressList { - type Error = IMFError<'a>; - - fn try_from(nal: &'a lazy::NullableAddressList<'a>) -> Result { - opt(alt((address_list, address_list_cfws)))(nal.0) - .map(|(_, a)| a.unwrap_or(vec![])) - .map_err(|e| IMFError::NullableAddressList(e)) - } -}*/ - /// Address (section 3.4 of RFC5322) /// /// ```abnf @@ -215,4 +175,61 @@ mod tests { )) ); } + + use crate::text::quoted::QuotedString; + use crate::text::encoding::{EncodedWord, QuotedWord, QuotedChunk}; + + #[test] + fn test_strange_groups() { + assert_eq!( + address_list( + br#""Colleagues": "James Smythe" ;, Friends: + jane@example.com, =?UTF-8?Q?John_Sm=C3=AEth?= ;"# + ), + Ok((&b""[..], vec![ + AddressRef::Many(GroupRef { + name: Phrase(vec![Word::Quoted(QuotedString(vec![&b"Colleagues"[..]]))]), + participants: vec![ + MailboxRef { + name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![ + &b"James"[..], &b" "[..], &b"Smythe"[..] + ]))])), + addrspec: AddrSpec { + local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"james"[..]))]), + domain: Domain::Atoms(vec![&b"vandelay"[..], &b"com"[..]]), + } + }, + ], + }), + AddressRef::Many(GroupRef { + name: Phrase(vec![Word::Atom(&b"Friends"[..])]), + participants: vec![ + MailboxRef{ + name: None, + addrspec: AddrSpec { + local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"jane"[..]))]), + domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), + } + }, + MailboxRef{ + name: Some(Phrase(vec![Word::Encoded(EncodedWord::Quoted(QuotedWord { + enc: encoding_rs::UTF_8, + chunks: vec![ + QuotedChunk::Safe(&b"John"[..]), + QuotedChunk::Space, + QuotedChunk::Safe(&b"Sm"[..]), + QuotedChunk::Encoded(vec![0xc3, 0xae]), + QuotedChunk::Safe(&b"th"[..]), + ] + }))])), + addrspec: AddrSpec { + local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"john"[..]))]), + domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]), + } + }, + ] + }), + ])) + ); + } } diff --git a/src/text/encoding.rs b/src/text/encoding.rs index 5c307c6..f208f5b 100644 --- a/src/text/encoding.rs +++ b/src/text/encoding.rs @@ -6,21 +6,23 @@ use nom::{ bytes::complete::{tag, take, take_while, take_while1}, character::complete::one_of, character::is_alphanumeric, - combinator::map, - multi::many0, + combinator::{map, opt}, + multi::{many0, many1}, sequence::{preceded, terminated, tuple}, IResult, }; use crate::text::ascii; use crate::text::words; +use crate::text::whitespace::cfws; pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> { alt((encoded_word_quoted, encoded_word_base64))(input) } pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> { - let (rest, (_, charset, _, _, _, txt, _)) = tuple(( + let (rest, (_, _, charset, _, _, _, txt, _, _)) = tuple(( + opt(cfws), tag("=?"), words::mime_atom, tag("?"), @@ -28,6 +30,7 @@ pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> { tag("?"), ptext, tag("?="), + opt(cfws), ))(input)?; let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252); @@ -102,8 +105,7 @@ impl<'a> QuotedWord<'a> { } QuotedChunk::Space => acc.push(' '), QuotedChunk::Encoded(v) => { - let w = &[*v]; - let (d, _) = self.enc.decode_without_bom_handling(w); + let (d, _) = self.enc.decode_without_bom_handling(v.as_slice()); acc.push_str(d.as_ref()); } }; @@ -115,13 +117,13 @@ impl<'a> QuotedWord<'a> { #[derive(PartialEq, Debug, Clone)] pub enum QuotedChunk<'a> { Safe(&'a [u8]), - Encoded(u8), + Encoded(Vec), Space, } //quoted_printable pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec> { - many0(alt((safe_char2, encoded_space, hex_octet)))(input) + many0(alt((safe_char2, encoded_space, many_hex_octet)))(input) } fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> { @@ -136,27 +138,26 @@ fn is_safe_char2(c: u8) -> bool { c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ } -/* -fn is_safe_char(c: char) -> bool { - (c >= '\x21' && c <= '\x3c') || - (c >= '\x3e' && c <= '\x7e') -}*/ - fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> { map(tag("_"), |_| QuotedChunk::Space)(input) } -fn hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> { +fn hex_octet(input: &[u8]) -> IResult<&[u8], u8> { use nom::error::*; let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?; + println!("TOOK: {:?}", hbytes); - let (hstr, _) = encoding_rs::UTF_8.decode_without_bom_handling(hbytes); - + let hstr = String::from_utf8_lossy(hbytes); let parsed = u8::from_str_radix(hstr.as_ref(), 16) .map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?; - Ok((rest, QuotedChunk::Encoded(parsed))) + println!("PARSED: {}", parsed); + Ok((rest, parsed)) +} + +fn many_hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> { + map(many1(hex_octet), QuotedChunk::Encoded)(input) } //base64 (maybe use a crate) @@ -181,16 +182,16 @@ mod tests { &b""[..], vec![ QuotedChunk::Safe(&b"Accus"[..]), - QuotedChunk::Encoded(0xe9), + QuotedChunk::Encoded(vec![0xe9]), QuotedChunk::Space, QuotedChunk::Safe(&b"de"[..]), QuotedChunk::Space, QuotedChunk::Safe(&b"r"[..]), - QuotedChunk::Encoded(0xe9), + QuotedChunk::Encoded(vec![0xe9]), QuotedChunk::Safe(&b"ception"[..]), QuotedChunk::Space, QuotedChunk::Safe(&b"(affich"[..]), - QuotedChunk::Encoded(0xe9), + QuotedChunk::Encoded(vec![0xe9]), QuotedChunk::Safe(&b")"[..]), ] )) @@ -219,4 +220,15 @@ mod tests { "If you can read this yo".to_string(), ); } + + #[test] + fn test_strange_quoted() { + assert_eq!( + encoded_word(b"=?UTF-8?Q?John_Sm=C3=AEth?=") + .unwrap() + .1 + .to_string(), + "John Smîth".to_string(), + ); + } } diff --git a/src/text/misc_token.rs b/src/text/misc_token.rs index 039276d..7cc3cde 100644 --- a/src/text/misc_token.rs +++ b/src/text/misc_token.rs @@ -51,7 +51,7 @@ pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord> { ))(input) } -#[derive(PartialEq)] +#[derive(Debug, PartialEq)] pub enum Word<'a> { Quoted(QuotedString<'a>), Encoded(encoding::EncodedWord<'a>), @@ -70,11 +70,11 @@ impl<'a> ToString for Word<'a> { } } } -impl<'a> fmt::Debug for Word<'a> { +/*impl<'a> fmt::Debug for Word<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_tuple("Word").field(&format_args!("\"{}\"", self.to_string())).finish() } -} +}*/ /// Word /// @@ -89,7 +89,7 @@ pub fn word(input: &[u8]) -> IResult<&[u8], Word> { ))(input) } -#[derive(PartialEq)] +#[derive(Debug, PartialEq)] pub struct Phrase<'a>(pub Vec>); impl<'a> ToString for Phrase<'a> { @@ -101,11 +101,11 @@ impl<'a> ToString for Phrase<'a> { .join(" ") } } -impl<'a> fmt::Debug for Phrase<'a> { +/*impl<'a> fmt::Debug for Phrase<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_tuple("Phrase").field(&format_args!("\"{}\"", self.to_string())).finish() } -} +}*/ /// Phrase ///