decode base64 headers

This commit is contained in:
Quentin 2023-07-13 09:56:24 +02:00
parent 76b921c9a3
commit 4e82941fd0
Signed by: quentin
GPG key ID: E9602264D639FF68
3 changed files with 52 additions and 16 deletions

7
Cargo.lock generated
View file

@ -23,6 +23,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base64"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d"
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.13.0" version = "3.13.0"
@ -109,6 +115,7 @@ dependencies = [
name = "imf-codec" name = "imf-codec"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"base64",
"chardetng", "chardetng",
"chrono", "chrono",
"encoding_rs", "encoding_rs",

View file

@ -20,6 +20,7 @@ nom = "7"
chrono = "0.4" chrono = "0.4"
chardetng = "0.1" chardetng = "0.1"
encoding_rs = "0.8" encoding_rs = "0.8"
base64 = "0.21"
[dev-dependencies] [dev-dependencies]
walkdir = "2" walkdir = "2"

View file

@ -2,30 +2,42 @@ use std::borrow::Cow;
use nom::{ use nom::{
IResult, IResult,
branch::alt, branch::alt,
bytes::complete::{tag, take, take_while1}, bytes::complete::{tag, take, take_while1, take_while},
character::complete::{hex_digit1, one_of}, character::complete::{one_of},
combinator::map, combinator::map,
sequence::{preceded, tuple}, sequence::{preceded, terminated, tuple},
multi::many0, multi::many0,
}; };
use encoding_rs::Encoding; use encoding_rs::Encoding;
use base64::{Engine as _, engine::general_purpose};
use crate::fragments::mime; use crate::fragments::mime;
pub fn encoded_word(input: &str) -> IResult<&str, String> { pub fn encoded_word(input: &str) -> IResult<&str, String> {
let (rest, (_, charset, _, enc, _, txt, _)) = tuple(( alt((encoded_word_quoted, encoded_word_base64))(input)
tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), ptext, tag("?=") }
))(input)?;
pub fn encoded_word_quoted(input: &str) -> IResult<&str, String> {
let (rest, (_, charset, _, _, _, txt, _)) = tuple((
tag("=?"), mime::token,
tag("?"), one_of("Qq"),
tag("?"), ptext,
tag("?=")))(input)?;
let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252); let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = decode_quoted_encoding(renc, txt.iter());
Ok((rest, parsed))
}
let parsed = match enc { pub fn encoded_word_base64(input: &str) -> IResult<&str, String> {
// quoted printable let (rest, (_, charset, _, _, _, txt, _)) = tuple((
'q'|'Q' => decode_quoted_encoding(renc, txt.iter()), tag("=?"), mime::token,
// base64 tag("?"), one_of("Bb"),
'b'|'B' => todo!(), tag("?"), btext,
_ => unreachable!(), tag("?=")))(input)?;
};
let renc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::WINDOWS_1252);
let parsed = general_purpose::STANDARD_NO_PAD.decode(txt).map(|d| renc.decode(d.as_slice()).0.to_string()).unwrap_or("".into());
Ok((rest, parsed)) Ok((rest, parsed))
} }
@ -61,6 +73,7 @@ pub fn ptext(input: &str) -> IResult<&str, Vec<QuotedChunk>> {
many0(alt((safe_char2, encoded_space, hex_octet)))(input) many0(alt((safe_char2, encoded_space, hex_octet)))(input)
} }
fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> { fn safe_char2(input: &str) -> IResult<&str, QuotedChunk> {
map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input) map(take_while1(is_safe_char2), |v| QuotedChunk::Safe(v))(input)
} }
@ -74,17 +87,17 @@ fn is_safe_char2(c: char) -> bool {
c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '=' c.is_ascii() && !c.is_ascii_control() && c != '_' && c != '?' && c != '='
} }
/*
fn is_safe_char(c: char) -> bool { fn is_safe_char(c: char) -> bool {
(c >= '\x21' && c <= '\x3c') || (c >= '\x21' && c <= '\x3c') ||
(c >= '\x3e' && c <= '\x7e') (c >= '\x3e' && c <= '\x7e')
} }*/
fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> { fn encoded_space(input: &str) -> IResult<&str, QuotedChunk> {
map(tag("_"), |_| QuotedChunk::Space)(input) map(tag("_"), |_| QuotedChunk::Space)(input)
} }
fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> { fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
use nom;
use nom::error::*; use nom::error::*;
let (rest, hstr) = preceded(tag("="), take(2usize))(input)?; let (rest, hstr) = preceded(tag("="), take(2usize))(input)?;
@ -96,7 +109,13 @@ fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> {
} }
//base64 (maybe use a crate) //base64 (maybe use a crate)
pub fn btext(input: &str) -> IResult<&str, &str> {
terminated(take_while(is_bchar), many0(tag("=")))(input)
}
fn is_bchar(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '+' || c == '/'
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
@ -132,4 +151,13 @@ mod tests {
Ok(("", "Accusé de réception (affiché)".into())), Ok(("", "Accusé de réception (affiché)".into())),
); );
} }
// =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
#[test]
fn test_decode_word_b64() {
assert_eq!(
encoded_word("=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?="),
Ok(("", "If you can read this yo".into()))
);
}
} }