Skip to content

Commit

Permalink
Add .eml example with odd behavior (#88)
Browse files Browse the repository at this point in the history
* Add .eml example with odd behavior

* Parse malformed, non-multipart, single text/plain messages best-effort as if text is not malformed
  • Loading branch information
sftse authored Jan 25, 2025
1 parent ed16429 commit e97d14a
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 7 deletions.
8 changes: 5 additions & 3 deletions resources/eml/malformed/012.crlf.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,13 @@
"is_encoding_problem": false,
"body": {
"Message": {
"html_body": [],
"text_body": [],
"attachments": [
"html_body": [
0
],
"text_body": [
0
],
"attachments": [],
"parts": [
{
"headers": [
Expand Down
8 changes: 5 additions & 3 deletions resources/eml/malformed/012.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,13 @@
"is_encoding_problem": false,
"body": {
"Message": {
"html_body": [],
"text_body": [],
"attachments": [
"html_body": [
0
],
"text_body": [
0
],
"attachments": [],
"parts": [
{
"headers": [
Expand Down
58 changes: 58 additions & 0 deletions resources/eml/malformed/018.crlf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"html_body": [
0
],
"text_body": [
0
],
"attachments": [],
"parts": [
{
"headers": [
{
"name": "content_type",
"value": {
"ContentType": {
"c_type": "text",
"c_subtype": "plain",
"attributes": [
[
"charset",
"Windows-1252"
]
]
}
},
"offset_field": 0,
"offset_start": 13,
"offset_end": 52
},
{
"name": "content_transfer_encoding",
"value": {
"Text": "quoted-printable"
},
"offset_field": 52,
"offset_start": 78,
"offset_end": 97
},
{
"name": "mime_version",
"value": {
"Text": "1.0"
},
"offset_field": 97,
"offset_start": 110,
"offset_end": 116
}
],
"is_encoding_problem": true,
"body": {
"Text": "Best\r\nEnviado desde mi BlackBerry=AE de Vodafone=\r\n<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\r\n<plist version=\"1.0\">\r\n<dict>\r\n</dict>\r\n</plist>"
},
"offset_header": 0,
"offset_body": 118,
"offset_end": 361
}
]
}
13 changes: 13 additions & 0 deletions resources/eml/malformed/018.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Content-Type: text/plain;
charset="Windows-1252"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0

Best
Enviado desde mi BlackBerry=AE de Vodafone=
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
</dict>
</plist>
58 changes: 58 additions & 0 deletions resources/eml/malformed/018.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"html_body": [
0
],
"text_body": [
0
],
"attachments": [],
"parts": [
{
"headers": [
{
"name": "content_type",
"value": {
"ContentType": {
"c_type": "text",
"c_subtype": "plain",
"attributes": [
[
"charset",
"Windows-1252"
]
]
}
},
"offset_field": 0,
"offset_start": 13,
"offset_end": 52
},
{
"name": "content_transfer_encoding",
"value": {
"Text": "quoted-printable"
},
"offset_field": 52,
"offset_start": 78,
"offset_end": 97
},
{
"name": "mime_version",
"value": {
"Text": "1.0"
},
"offset_field": 97,
"offset_start": 110,
"offset_end": 116
}
],
"is_encoding_problem": true,
"body": {
"Text": "Best\nEnviado desde mi BlackBerry=AE de Vodafone=\n<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n<plist version=\"1.0\">\n<dict>\n</dict>\n</plist>"
},
"offset_header": 0,
"offset_body": 118,
"offset_end": 354
}
]
}
12 changes: 11 additions & 1 deletion src/parsers/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ impl MessageParser {
let mut is_encoding_problem = offset_end == usize::MAX;
if is_encoding_problem {
encoding = Encoding::None;
mime_type = MimeType::TextOther;
if mime_type != MimeType::TextPlain {
mime_type = MimeType::TextOther;
}
is_inline = false;
is_text = true;

Expand Down Expand Up @@ -278,6 +280,14 @@ impl MessageParser {
&& (mime_type == MimeType::Inline
|| content_type.map_or(true, |c| !c.has_attribute("name"))));

// if message consists of single text/plain part, classify as text regardless
// of encoding issues: see malformed/018.eml
let is_inline = is_inline
|| state.parts == 1
&& state.mime_type == MimeType::Message
&& mime_type == MimeType::TextPlain
&& is_encoding_problem;

let (add_to_html, add_to_text) =
if let MimeType::MultipartAlternative = state.mime_type {
match mime_type {
Expand Down

0 comments on commit e97d14a

Please sign in to comment.