From 4e82941fd068cb27f6b75cb18444b4a660b7948d Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Thu, 13 Jul 2023 09:56:24 +0200 Subject: [PATCH] decode base64 headers --- Cargo.lock | 7 +++++ Cargo.toml | 1 + src/fragments/encoding.rs | 60 ++++++++++++++++++++++++++++----------- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ce2f6f..fcfc840 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" + [[package]] name = "bumpalo" version = "3.13.0" @@ -109,6 +115,7 @@ dependencies = [ name = "imf-codec" version = "0.1.0" dependencies = [ + "base64", "chardetng", "chrono", "encoding_rs", diff --git a/Cargo.toml b/Cargo.toml index 9e7946b..80a2e71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ nom = "7" chrono = "0.4" chardetng = "0.1" encoding_rs = "0.8" +base64 = "0.21" [dev-dependencies] walkdir = "2" diff --git a/src/fragments/encoding.rs b/src/fragments/encoding.rs index 16fa723..22f139a 100644 --- a/src/fragments/encoding.rs +++ b/src/fragments/encoding.rs @@ -2,30 +2,42 @@ use std::borrow::Cow; use nom::{ IResult, branch::alt, - bytes::complete::{tag, take, take_while1}, - character::complete::{hex_digit1, one_of}, + bytes::complete::{tag, take, take_while1, take_while}, + character::complete::{one_of}, combinator::map, - sequence::{preceded, tuple}, + sequence::{preceded, terminated, tuple}, multi::many0, }; use encoding_rs::Encoding; +use base64::{Engine as _, engine::general_purpose}; use crate::fragments::mime; pub fn encoded_word(input: &str) -> IResult<&str, String> { - let (rest, (_, charset, _, enc, _, txt, _)) = tuple(( - tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), ptext, tag("?=") - ))(input)?; + alt((encoded_word_quoted, encoded_word_base64))(input) +} - let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252); +pub fn encoded_word_quoted(input: &str) -> IResult<&str, String> { + let (rest, (_, charset, _, _, _, txt, _)) = tuple(( + tag("=?"), mime::token, + tag("?"), one_of("Qq"), + tag("?"), ptext, + tag("?=")))(input)?; - let parsed = match enc { - // quoted printable - 'q'|'Q' => decode_quoted_encoding(renc, txt.iter()), - // base64 - 'b'|'B' => todo!(), - _ => unreachable!(), - }; + let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252); + let parsed = decode_quoted_encoding(renc, txt.iter()); + Ok((rest, parsed)) +} + +pub fn encoded_word_base64(input: &str) -> IResult<&str, String> { + let (rest, (_, charset, _, _, _, txt, _)) = tuple(( + tag("=?"), mime::token, + tag("?"), one_of("Bb"), + tag("?"), btext, + tag("?=")))(input)?; + + let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252); + let parsed = general_purpose::STANDARD_NO_PAD.decode(txt).map(|d| renc.decode(d.as_slice()).0.to_string()).unwrap_or("".into()); Ok((rest, parsed)) } @@ -61,6 +73,7 @@ pub fn ptext(input: &str) -> IResult<&str, Vec> { many0(alt((safe_char2, encoded_space, hex_octet)))(input) } + fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> { map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input) } @@ -74,17 +87,17 @@ fn is_safe_char2(c: char) -> bool { c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '=' } +/* fn is_safe_char(c: char) -> bool { (c >= '\x21' && c <= '\x3c') || (c >= '\x3e' && c <= '\x7e') -} +}*/ fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> { map(tag("_"), |_| QuotedChunk::Space)(input) } fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> { - use nom; use nom::error::*; let (rest, hstr) = preceded(tag("="), take(2usize))(input)?; @@ -96,7 +109,13 @@ fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> { } //base64 (maybe use a crate) +pub fn btext(input: &str) -> IResult<&str, &str> { + terminated(take_while(is_bchar), many0(tag("=")))(input) +} +fn is_bchar(c: char) -> bool { + c.is_ascii_alphanumeric() || c == '+' || c == '/' +} #[cfg(test)] mod tests { @@ -132,4 +151,13 @@ mod tests { Ok(("", "Accusé de réception (affiché)".into())), ); } + + // =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + #[test] + fn test_decode_word_b64() { + assert_eq!( + encoded_word("=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?="), + Ok(("", "If you can read this yo".into())) + ); + } }