successfully decode q encoding in header
This commit is contained in:
parent
d9471f443e
commit
76b921c9a3
1 changed files with 59 additions and 15 deletions
|
@ -8,44 +8,70 @@ use nom::{
|
||||||
sequence::{preceded, tuple},
|
sequence::{preceded, tuple},
|
||||||
multi::many0,
|
multi::many0,
|
||||||
};
|
};
|
||||||
|
use encoding_rs::Encoding;
|
||||||
|
|
||||||
use crate::fragments::mime;
|
use crate::fragments::mime;
|
||||||
|
|
||||||
pub fn encoded_word(input: &str) -> IResult<&str, Cow<str>> {
|
pub fn encoded_word(input: &str) -> IResult<&str, String> {
|
||||||
let (rest, (_, charset, _, enc, _, txt, _)) = tuple((
|
let (rest, (_, charset, _, enc, _, txt, _)) = tuple((
|
||||||
tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), encoded_text, tag("?=")
|
tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), ptext, tag("?=")
|
||||||
))(input)?;
|
))(input)?;
|
||||||
|
|
||||||
match enc {
|
let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
|
||||||
|
|
||||||
|
let parsed = match enc {
|
||||||
// quoted printable
|
// quoted printable
|
||||||
'q'|'Q' => todo!(),
|
'q'|'Q' => decode_quoted_encoding(renc, txt.iter()),
|
||||||
// base64
|
// base64
|
||||||
'b'|'B' => todo!(),
|
'b'|'B' => todo!(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
};
|
||||||
|
|
||||||
|
Ok((rest, parsed))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encoded_text(input: &str) -> IResult<&str, &str> {
|
fn decode_quoted_encoding<'a>(enc: &'static Encoding, q: impl Iterator<Item = &'a QuotedChunk<'a>>) -> String {
|
||||||
take_while1(is_encoded_text)(input)
|
q.fold(
|
||||||
|
String::new(),
|
||||||
|
|mut acc, c| {
|
||||||
|
let dec = match c {
|
||||||
|
QuotedChunk::Safe(v) => Cow::Borrowed(*v),
|
||||||
|
QuotedChunk::Space => Cow::Borrowed(" "),
|
||||||
|
QuotedChunk::Encoded(v) => {
|
||||||
|
let w = &[*v];
|
||||||
|
let (d, _, _) = enc.decode(w);
|
||||||
|
Cow::Owned(d.into_owned())
|
||||||
|
},
|
||||||
|
};
|
||||||
|
acc.push_str(dec.as_ref());
|
||||||
|
acc
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_encoded_text(c: char) -> bool {
|
|
||||||
c.is_ascii() && !c.is_ascii_control() && !c.is_ascii_whitespace()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(PartialEq,Debug)]
|
#[derive(PartialEq,Debug)]
|
||||||
pub enum QuotedChunk<'a> {
|
pub enum QuotedChunk<'a> {
|
||||||
Safe(&'a str),
|
Safe(&'a str),
|
||||||
Encoded(u8),
|
Encoded(u8),
|
||||||
|
Space,
|
||||||
}
|
}
|
||||||
|
|
||||||
//quoted_printable
|
//quoted_printable
|
||||||
pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> {
|
pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> {
|
||||||
many0(alt((safe_char, hex_octet)))(input)
|
many0(alt((safe_char2, encoded_space, hex_octet)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn safe_char(input: &str) -> IResult<&str, QuotedChunk> {
|
fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
|
||||||
map(take_while1(is_safe_char), |v| QuotedChunk::Safe(v))(input)
|
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// RFC2047 section 4.2
|
||||||
|
/// 8-bit values which correspond to printable ASCII characters other
|
||||||
|
/// than "=", "?", and "_" (underscore), MAY be represented as those
|
||||||
|
/// characters.
|
||||||
|
fn is_safe_char2(c: char) -> bool {
|
||||||
|
c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '='
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_safe_char(c: char) -> bool {
|
fn is_safe_char(c: char) -> bool {
|
||||||
|
@ -53,6 +79,10 @@ fn is_safe_char(c: char) -> bool {
|
||||||
(c >= '\x3e' && c <= '\x7e')
|
(c >= '\x3e' && c <= '\x7e')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> {
|
||||||
|
map(tag("_"), |_| QuotedChunk::Space)(input)
|
||||||
|
}
|
||||||
|
|
||||||
fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
|
fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
|
||||||
use nom;
|
use nom;
|
||||||
use nom::error::*;
|
use nom::error::*;
|
||||||
|
@ -80,12 +110,26 @@ mod tests {
|
||||||
Ok(("", vec![
|
Ok(("", vec![
|
||||||
QuotedChunk::Safe("Accus"),
|
QuotedChunk::Safe("Accus"),
|
||||||
QuotedChunk::Encoded(0xe9),
|
QuotedChunk::Encoded(0xe9),
|
||||||
QuotedChunk::Safe("_de_r"),
|
QuotedChunk::Space,
|
||||||
|
QuotedChunk::Safe("de"),
|
||||||
|
QuotedChunk::Space,
|
||||||
|
QuotedChunk::Safe("r"),
|
||||||
QuotedChunk::Encoded(0xe9),
|
QuotedChunk::Encoded(0xe9),
|
||||||
QuotedChunk::Safe("ception_(affich"),
|
QuotedChunk::Safe("ception"),
|
||||||
|
QuotedChunk::Space,
|
||||||
|
QuotedChunk::Safe("(affich"),
|
||||||
QuotedChunk::Encoded(0xe9),
|
QuotedChunk::Encoded(0xe9),
|
||||||
QuotedChunk::Safe(")"),
|
QuotedChunk::Safe(")"),
|
||||||
]))
|
]))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_decode_word() {
|
||||||
|
assert_eq!(
|
||||||
|
encoded_word("=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?="),
|
||||||
|
Ok(("", "Accusé de réception (affiché)".into())),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue