From 0e23e491998eb8ad888cc04218c6fd3ab040f05d Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Fri, 16 Jun 2023 10:50:37 +0200 Subject: [PATCH] rescue logic --- src/header.rs | 40 +++++++++++++++++++++++++++++++++++----- src/model.rs | 3 +-- src/parse.rs | 3 +++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/src/header.rs b/src/header.rs index 960881d..f5a2ac2 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,10 +1,9 @@ use nom::{ IResult, branch::alt, - bytes::complete::take_while1, - bytes::complete::tag, + bytes::complete::{is_not, take_while1, take_while, tag}, character::complete::space0, - combinator::{map, opt}, + combinator::{map, opt, recognize}, multi::{many0, many1, fold_many0, separated_list1}, sequence::{terminated, preceded, pair, tuple}, }; @@ -25,7 +24,7 @@ use crate::{datetime, trace, model}; /// See: https://www.rfc-editor.org/rfc/rfc5322.html#section-2.2 pub fn section(input: &str) -> IResult<&str, HeaderSection> { let (input, headers) = fold_many0( - alt((header_field, unknown_field)), + alt((header_field, unknown_field, rescue)), HeaderSection::default, |mut section, head| { match head { @@ -109,6 +108,11 @@ pub fn section(input: &str) -> IResult<&str, HeaderSection> { HeaderField::Optional(name, body) => { section.optional.insert(name, body); } + + // Rescue + HeaderField::Rescue(x) => { + section.unparsed.push(x); + } }; section } @@ -149,7 +153,10 @@ pub enum HeaderField<'a> { ReturnPath(Option), // 3.6.8. Optional Fields - Optional(&'a str, String) + Optional(&'a str, String), + + // None + Rescue(&'a str), } /// Parse one known header field @@ -285,6 +292,21 @@ fn field_name(input: &str) -> IResult<&str, &str> { )(input) } +/// Rescue rule +/// +/// Something went wrong while parsing headers, +/// trying to fix parsing by consuming +/// one unfolded header line. +/// +/// ```abnf +/// rescue = *(*any FWS) *any CRLF +fn rescue(input: &str) -> IResult<&str, HeaderField> { + map(recognize(pair( + many0(pair(is_not("\r\n"), fws)), + pair(is_not("\r\n"), perm_crlf, + ))), |x| HeaderField::Rescue(x))(input) +} + #[cfg(test)] mod tests { use super::*; @@ -447,6 +469,14 @@ mod tests { fn test_invalid_field_name() { assert!(header_field("Unknown: unknown\r\n").is_err()); } + + #[test] + fn test_rescue() { + assert_eq!( + rescue("Héron: élan\r\n\tnoël: test\r\n"), + Ok(("", HeaderField::Rescue("Héron: élan\r\n\tnoël: test\r\n"))), + ); + } } diff --git a/src/model.rs b/src/model.rs index a18c434..6bce998 100644 --- a/src/model.rs +++ b/src/model.rs @@ -101,6 +101,5 @@ pub struct HeaderSection<'a> { // 3.6.8. Optional Fields pub optional: HashMap<&'a str, String>, - //pub failed: HashMap<&'a str, String>, - //pub garbage: &'a str, + pub unparsed: Vec<&'a str>, } diff --git a/src/parse.rs b/src/parse.rs index 72a6278..adc3880 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -20,6 +20,9 @@ Subject: Re: Saying Hello Comments: A simple message Comments: Not that complicated Keywords: hello, world +Héron: Raté + Raté raté +raté raté Keywords: salut, le, monde Message-ID: <3456@example.net> In-Reply-To: <1234@local.machine.example>