successfully decode q encoding in header

This commit is contained in:
Quentin 2023-07-10 17:52:18 +02:00
parent d9471f443e
commit 76b921c9a3
Signed by: quentin
GPG key ID: E9602264D639FF68

View file

@ -8,44 +8,70 @@ use nom::{
sequence::{preceded, tuple}, sequence::{preceded, tuple},
multi::many0, multi::many0,
}; };
use encoding_rs::Encoding;
use crate::fragments::mime; use crate::fragments::mime;
pub fn encoded_word(input: &str) -> IResult<&str, Cow<str>> { pub fn encoded_word(input: &str) -> IResult<&str, String> {
let (rest, (_, charset, _, enc, _, txt, _)) = tuple(( let (rest, (_, charset, _, enc, _, txt, _)) = tuple((
tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), encoded_text, tag("?=") tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), ptext, tag("?=")
))(input)?; ))(input)?;
match enc { let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = match enc {
// quoted printable // quoted printable
'q'|'Q' => todo!(), 'q'|'Q' => decode_quoted_encoding(renc, txt.iter()),
// base64 // base64
'b'|'B' => todo!(), 'b'|'B' => todo!(),
_ => unreachable!(), _ => unreachable!(),
} };
Ok((rest, parsed))
} }
fn encoded_text(input: &str) -> IResult<&str, &str> { fn decode_quoted_encoding<'a>(enc: &'static Encoding, q: impl Iterator<Item = &'a QuotedChunk<'a>>) -> String {
take_while1(is_encoded_text)(input) q.fold(
String::new(),
|mut acc, c| {
let dec = match c {
QuotedChunk::Safe(v) => Cow::Borrowed(*v),
QuotedChunk::Space => Cow::Borrowed(" "),
QuotedChunk::Encoded(v) => {
let w = &[*v];
let (d, _, _) = enc.decode(w);
Cow::Owned(d.into_owned())
},
};
acc.push_str(dec.as_ref());
acc
})
} }
fn is_encoded_text(c: char) -> bool {
c.is_ascii() && !c.is_ascii_control() && !c.is_ascii_whitespace()
}
#[derive(PartialEq,Debug)] #[derive(PartialEq,Debug)]
pub enum QuotedChunk<'a> { pub enum QuotedChunk<'a> {
Safe(&'a str), Safe(&'a str),
Encoded(u8), Encoded(u8),
Space,
} }
//quoted_printable //quoted_printable
pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> { pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> {
many0(alt((safe_char, hex_octet)))(input) many0(alt((safe_char2, encoded_space, hex_octet)))(input)
} }
fn safe_char(input: &str) -> IResult<&str, QuotedChunk> { fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
map(take_while1(is_safe_char), |v| QuotedChunk::Safe(v))(input) map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
}
/// RFC2047 section 4.2
/// 8-bit values which correspond to printable ASCII characters other
/// than "=", "?", and "_" (underscore), MAY be represented as those
/// characters.
fn is_safe_char2(c: char) -> bool {
c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '='
} }
fn is_safe_char(c: char) -> bool { fn is_safe_char(c: char) -> bool {
@ -53,6 +79,10 @@ fn is_safe_char(c: char) -> bool {
(c >= '\x3e' && c <= '\x7e') (c >= '\x3e' && c <= '\x7e')
} }
fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> {
map(tag("_"), |_| QuotedChunk::Space)(input)
}
fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> { fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
use nom; use nom;
use nom::error::*; use nom::error::*;
@ -80,12 +110,26 @@ mod tests {
Ok(("", vec![ Ok(("", vec![
QuotedChunk::Safe("Accus"), QuotedChunk::Safe("Accus"),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe("_de_r"), QuotedChunk::Space,
QuotedChunk::Safe("de"),
QuotedChunk::Space,
QuotedChunk::Safe("r"),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe("ception_(affich"), QuotedChunk::Safe("ception"),
QuotedChunk::Space,
QuotedChunk::Safe("(affich"),
QuotedChunk::Encoded(0xe9), QuotedChunk::Encoded(0xe9),
QuotedChunk::Safe(")"), QuotedChunk::Safe(")"),
])) ]))
); );
} }
#[test]
fn test_decode_word() {
assert_eq!(
encoded_word("=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?="),
Ok(("", "Accusé de réception (affiché)".into())),
);
}
} }