implement the guess charset parser
This commit is contained in:
parent
ddf6311925
commit
2bc62edba8
2 changed files with 52 additions and 0 deletions
|
@ -0,0 +1,51 @@
|
|||
use std::borrow::Cow;
|
||||
use chardetng::EncodingDetector;
|
||||
use encoding_rs::Encoding;
|
||||
|
||||
use crate::multipass::segment::Segment;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct GuessCharset<'a> {
|
||||
pub header: Cow<'a, str>,
|
||||
pub encoding: &'static Encoding,
|
||||
pub malformed: bool,
|
||||
pub body: &'a [u8],
|
||||
}
|
||||
|
||||
const IS_LAST_BUFFER: bool = true;
|
||||
const ALLOW_UTF8: bool = true;
|
||||
const NO_TLD: Option<&[u8]> = None;
|
||||
|
||||
impl<'a> From<Segment<'a>> for GuessCharset<'a> {
|
||||
fn from(seg: Segment<'a>) -> Self {
|
||||
// Create detector
|
||||
let mut detector = EncodingDetector::new();
|
||||
detector.feed(&seg.header, IS_LAST_BUFFER);
|
||||
|
||||
// Get encoding
|
||||
let enc: &Encoding = detector.guess(NO_TLD, ALLOW_UTF8);
|
||||
let (header, encoding, malformed) = enc.decode(&seg.header);
|
||||
|
||||
GuessCharset { header, encoding, malformed, body: seg.body }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_charset() {
|
||||
assert_eq!(
|
||||
GuessCharset::from(Segment {
|
||||
body: b"Hello world!",
|
||||
header: b"From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n",
|
||||
}),
|
||||
GuessCharset {
|
||||
header: Cow::Borrowed("From: hello@world.com\r\nDate: 12 Mar 1997 07:33:25 Z\r\n"),
|
||||
encoding: encoding_rs::UTF_8,
|
||||
malformed: false,
|
||||
body: b"Hello world!",
|
||||
});
|
||||
}
|
||||
}
|
|
@ -1 +1,2 @@
|
|||
pub mod segment;
|
||||
pub mod guess_charset;
|
||||
|
|
Loading…
Reference in a new issue