From decb4209d9d75d4227c56c966392a67146eb02de Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Mon, 10 Jul 2023 11:16:16 +0200 Subject: [PATCH] wip encoding --- src/fragments/encoding.rs | 85 +++++++++++++++++++++++++++++++++++++++ src/fragments/mod.rs | 2 + 2 files changed, 87 insertions(+) create mode 100644 src/fragments/encoding.rs diff --git a/src/fragments/encoding.rs b/src/fragments/encoding.rs new file mode 100644 index 0000000..75ea491 --- /dev/null +++ b/src/fragments/encoding.rs @@ -0,0 +1,85 @@ +use std::borrow::Cow; +use nom::{ + IResult, + branch::alt, + bytes::complete::{tag, take_while1}, + character::complete::{hex_digit1, one_of}, + combinator::map, + sequence::{preceded, tuple}, + multi::many0, +}; + +use crate::fragments::mime; + +pub fn encoded_word(input: &str) -> IResult<&str, Cow> { + let (rest, (_, charset, _, enc, _, txt, _)) = tuple(( + tag("=?"), mime::token, tag("?"), one_of("QBqb"), tag("?"), encoded_text, tag("?=") + ))(input)?; + + match enc { + // quoted printable + 'q'|'Q' => todo!(), + // base64 + 'b'|'B' => todo!(), + _ => unreachable!(), + } +} + +fn encoded_text(input: &str) -> IResult<&str, &str> { + take_while1(is_encoded_text)(input) +} + +fn is_encoded_text(c: char) -> bool { + c.is_ascii() && !c.is_ascii_control() && !c.is_ascii_whitespace() +} + +pub enum QuotedChunk<'a> { + Safe(&'a str), + Encoded(u8), +} + +//quoted_printable +pub fn ptext(input: &str) -> IResult<&str, Vec> { + many0(alt((safe_char, hex_octet)))(input) +} + +fn safe_char(input: &str) -> IResult<&str, QuotedChunk> { + map(take_while1(is_safe_char), |v| QuotedChunk::Safe(v))(input) +} + +fn is_safe_char(c: char) -> bool { + (c >= '\x21' && c <= '\x3c') || + (c >= '\x3e' && c <= '\x7e') +} + +fn hex_octet(input: &str) -> IResult<&str, QuotedChunk> { + use nom; + use nom::error::*; + + let (rest, hstr) = preceded(tag("="), hex_digit1)(input)?; + + let parsed = u8::from_str_radix(hstr, 16) + .map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?; + + Ok((rest, QuotedChunk::Encoded(parsed))) +} + +//base64 (maybe use a crate) + + +#[cfg(test)] +mod tests { + use super::*; + + // =?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?= + + #[test] + fn test_ptext() { + assert_eq!( + ptext("Accus=E9_de_r=E9ception_(affich=E9)"), + vec![ + QuotedChunk::Safe("Accus"), + ] + ); + } +} diff --git a/src/fragments/mod.rs b/src/fragments/mod.rs index a1a1b01..a14cb1c 100644 --- a/src/fragments/mod.rs +++ b/src/fragments/mod.rs @@ -17,4 +17,6 @@ mod mailbox; pub mod section; pub mod trace; +// MIME related pub mod mime; +pub mod encoding;