improve encoded word compatibility

This commit is contained in:
Quentin 2023-07-23 18:27:03 +02:00
parent 971bbc6945
commit 755a095743
Signed by: quentin
GPG key ID: E9602264D639FF68
4 changed files with 96 additions and 67 deletions

View file

@ -372,7 +372,7 @@ OoOoOoOoOoOoOoOoOoOoOoOoOoOoOoOoO<br />
enc: encoding_rs::WINDOWS_1252, enc: encoding_rs::WINDOWS_1252,
chunks: vec![ chunks: vec![
QuotedChunk::Safe(&b"Andr"[..]), QuotedChunk::Safe(&b"Andr"[..]),
QuotedChunk::Encoded(0xE9), QuotedChunk::Encoded(vec![0xE9]),
], ],
})), })),
Word::Atom(&b"Pirard"[..]) Word::Atom(&b"Pirard"[..])

View file

@ -35,46 +35,6 @@ impl<'a> From<GroupRef<'a>> for AddressRef<'a> {
} }
pub type AddressList<'a> = Vec<AddressRef<'a>>; pub type AddressList<'a> = Vec<AddressRef<'a>>;
/*impl<'a> TryFrom<&'a lazy::Mailbox<'a>> for MailboxRef {
type Error = IMFError<'a>;
fn try_from(mx: &'a lazy::Mailbox<'a>) -> Result<Self, Self::Error> {
mailbox(mx.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::Mailbox(e))
}
}
impl<'a> TryFrom<&'a lazy::MailboxList<'a>> for MailboxList {
type Error = IMFError<'a>;
fn try_from(ml: &'a lazy::MailboxList<'a>) -> Result<Self, Self::Error> {
mailbox_list(ml.0)
.map(|(_, m)| m)
.map_err(|e| IMFError::MailboxList(e))
}
}
impl<'a> TryFrom<&'a lazy::AddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(al: &'a lazy::AddressList<'a>) -> Result<Self, Self::Error> {
address_list(al.0)
.map(|(_, a)| a)
.map_err(|e| IMFError::AddressList(e))
}
}
impl<'a> TryFrom<&'a lazy::NullableAddressList<'a>> for AddressList {
type Error = IMFError<'a>;
fn try_from(nal: &'a lazy::NullableAddressList<'a>) -> Result<Self, Self::Error> {
opt(alt((address_list, address_list_cfws)))(nal.0)
.map(|(_, a)| a.unwrap_or(vec![]))
.map_err(|e| IMFError::NullableAddressList(e))
}
}*/
/// Address (section 3.4 of RFC5322) /// Address (section 3.4 of RFC5322)
/// ///
/// ```abnf /// ```abnf
@ -215,4 +175,61 @@ mod tests {
)) ))
); );
} }
use crate::text::quoted::QuotedString;
use crate::text::encoding::{EncodedWord, QuotedWord, QuotedChunk};
#[test]
fn test_strange_groups() {
assert_eq!(
address_list(
br#""Colleagues": "James Smythe" <james@vandelay.com>;, Friends:
jane@example.com, =?UTF-8?Q?John_Sm=C3=AEth?= <john@example.com>;"#
),
Ok((&b""[..], vec![
AddressRef::Many(GroupRef {
name: Phrase(vec![Word::Quoted(QuotedString(vec![&b"Colleagues"[..]]))]),
participants: vec![
MailboxRef {
name: Some(Phrase(vec![Word::Quoted(QuotedString(vec![
&b"James"[..], &b" "[..], &b"Smythe"[..]
]))])),
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"james"[..]))]),
domain: Domain::Atoms(vec![&b"vandelay"[..], &b"com"[..]]),
}
},
],
}),
AddressRef::Many(GroupRef {
name: Phrase(vec![Word::Atom(&b"Friends"[..])]),
participants: vec![
MailboxRef{
name: None,
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"jane"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
},
MailboxRef{
name: Some(Phrase(vec![Word::Encoded(EncodedWord::Quoted(QuotedWord {
enc: encoding_rs::UTF_8,
chunks: vec![
QuotedChunk::Safe(&b"John"[..]),
QuotedChunk::Space,
QuotedChunk::Safe(&b"Sm"[..]),
QuotedChunk::Encoded(vec![0xc3, 0xae]),
QuotedChunk::Safe(&b"th"[..]),
]
}))])),
addrspec: AddrSpec {
local_part: LocalPart(vec![LocalPartToken::Word(Word::Atom(&b"john"[..]))]),
domain: Domain::Atoms(vec![&b"example"[..], &b"com"[..]]),
}
},
]
}),
]))
);
}
} }

View file

@ -6,21 +6,23 @@ use nom::{
bytes::complete::{tag, take, take_while, take_while1}, bytes::complete::{tag, take, take_while, take_while1},
character::complete::one_of, character::complete::one_of,
character::is_alphanumeric, character::is_alphanumeric,
combinator::map, combinator::{map, opt},
multi::many0, multi::{many0, many1},
sequence::{preceded, terminated, tuple}, sequence::{preceded, terminated, tuple},
IResult, IResult,
}; };
use crate::text::ascii; use crate::text::ascii;
use crate::text::words; use crate::text::words;
use crate::text::whitespace::cfws;
pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> { pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
alt((encoded_word_quoted, encoded_word_base64))(input) alt((encoded_word_quoted, encoded_word_base64))(input)
} }
pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> { pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple(( let (rest, (_, _, charset, _, _, _, txt, _, _)) = tuple((
opt(cfws),
tag("=?"), tag("=?"),
words::mime_atom, words::mime_atom,
tag("?"), tag("?"),
@ -28,6 +30,7 @@ pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
tag("?"), tag("?"),
ptext, ptext,
tag("?="), tag("?="),
opt(cfws),
))(input)?; ))(input)?;
let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252); let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
@ -102,8 +105,7 @@ impl<'a> QuotedWord<'a> {
} }
QuotedChunk::Space => acc.push(' '), QuotedChunk::Space => acc.push(' '),
QuotedChunk::Encoded(v) => { QuotedChunk::Encoded(v) => {
let w = &[*v]; let (d, _) = self.enc.decode_without_bom_handling(v.as_slice());
let (d, _) = self.enc.decode_without_bom_handling(w);
acc.push_str(d.as_ref()); acc.push_str(d.as_ref());
} }
}; };
@ -115,13 +117,13 @@ impl<'a> QuotedWord<'a> {
#[derive(PartialEq, Debug, Clone)] #[derive(PartialEq, Debug, Clone)]
pub enum QuotedChunk<'a> { pub enum QuotedChunk<'a> {
Safe(&'a [u8]), Safe(&'a [u8]),
Encoded(u8), Encoded(Vec<u8>),
Space, Space,
} }
//quoted_printable //quoted_printable
pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> { pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
many0(alt((safe_char2, encoded_space, hex_octet)))(input) many0(alt((safe_char2, encoded_space, many_hex_octet)))(input)
} }
fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> { fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
@ -136,27 +138,26 @@ fn is_safe_char2(c: u8) -> bool {
c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
} }
/*
fn is_safe_char(c: char) -> bool {
(c >= '\x21' && c <= '\x3c') ||
(c >= '\x3e' && c <= '\x7e')
}*/
fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> { fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
map(tag("_"), |_| QuotedChunk::Space)(input) map(tag("_"), |_| QuotedChunk::Space)(input)
} }
fn hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> { fn hex_octet(input: &[u8]) -> IResult<&[u8], u8> {
use nom::error::*; use nom::error::*;
let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?; let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
println!("TOOK: {:?}", hbytes);
let (hstr, _) = encoding_rs::UTF_8.decode_without_bom_handling(hbytes); let hstr = String::from_utf8_lossy(hbytes);
let parsed = u8::from_str_radix(hstr.as_ref(), 16) let parsed = u8::from_str_radix(hstr.as_ref(), 16)
.map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?; .map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
Ok((rest, QuotedChunk::Encoded(parsed))) println!("PARSED: {}", parsed);
Ok((rest, parsed))
}
fn many_hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
map(many1(hex_octet), QuotedChunk::Encoded)(input)
} }
//base64 (maybe use a crate) //base64 (maybe use a crate)
@ -181,16 +182,16 @@ mod tests {
&b""[..], &b""[..],
vec![ vec![
QuotedChunk::Safe(&b"Accus"[..]), QuotedChunk::Safe(&b"Accus"[..]),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(vec![0xe9]),
QuotedChunk::Space, QuotedChunk::Space,
QuotedChunk::Safe(&b"de"[..]), QuotedChunk::Safe(&b"de"[..]),
QuotedChunk::Space, QuotedChunk::Space,
QuotedChunk::Safe(&b"r"[..]), QuotedChunk::Safe(&b"r"[..]),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(vec![0xe9]),
QuotedChunk::Safe(&b"ception"[..]), QuotedChunk::Safe(&b"ception"[..]),
QuotedChunk::Space, QuotedChunk::Space,
QuotedChunk::Safe(&b"(affich"[..]), QuotedChunk::Safe(&b"(affich"[..]),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(vec![0xe9]),
QuotedChunk::Safe(&b")"[..]), QuotedChunk::Safe(&b")"[..]),
] ]
)) ))
@ -219,4 +220,15 @@ mod tests {
"If you can read this yo".to_string(), "If you can read this yo".to_string(),
); );
} }
#[test]
fn test_strange_quoted() {
assert_eq!(
encoded_word(b"=?UTF-8?Q?John_Sm=C3=AEth?=")
.unwrap()
.1
.to_string(),
"John Smîth".to_string(),
);
}
} }

View file

@ -51,7 +51,7 @@ pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord> {
))(input) ))(input)
} }
#[derive(PartialEq)] #[derive(Debug, PartialEq)]
pub enum Word<'a> { pub enum Word<'a> {
Quoted(QuotedString<'a>), Quoted(QuotedString<'a>),
Encoded(encoding::EncodedWord<'a>), Encoded(encoding::EncodedWord<'a>),
@ -70,11 +70,11 @@ impl<'a> ToString for Word<'a> {
} }
} }
} }
impl<'a> fmt::Debug for Word<'a> { /*impl<'a> fmt::Debug for Word<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_tuple("Word").field(&format_args!("\"{}\"", self.to_string())).finish() fmt.debug_tuple("Word").field(&format_args!("\"{}\"", self.to_string())).finish()
} }
} }*/
/// Word /// Word
/// ///
@ -89,7 +89,7 @@ pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
))(input) ))(input)
} }
#[derive(PartialEq)] #[derive(Debug, PartialEq)]
pub struct Phrase<'a>(pub Vec<Word<'a>>); pub struct Phrase<'a>(pub Vec<Word<'a>>);
impl<'a> ToString for Phrase<'a> { impl<'a> ToString for Phrase<'a> {
@ -101,11 +101,11 @@ impl<'a> ToString for Phrase<'a> {
.join(" ") .join(" ")
} }
} }
impl<'a> fmt::Debug for Phrase<'a> { /*impl<'a> fmt::Debug for Phrase<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_tuple("Phrase").field(&format_args!("\"{}\"", self.to_string())).finish() fmt.debug_tuple("Phrase").field(&format_args!("\"{}\"", self.to_string())).finish()
} }
} }*/
/// Phrase /// Phrase
/// ///