Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

N°4281 - Add RFC-2045-Compliant Way of Handling Filenames in Attachments #16

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
108 changes: 63 additions & 45 deletions classes/rawemailmessage.class.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,25 @@ class RawEmailMessage
*/
protected $iNextId;

/**
* @var string This Regex complies with RFC 2045 regarding the Grammar of
* Content Type Headers Filenames:
* (1) Allow all chars for the filename, if the filename is quoted with double quotes
* (2) Allow all chars for the filename, if the filename is quoted with single quotes
* (3) If the filename is not quoted, allow only ASCII chars and exclude the following
* chars from the set, referenced as "specials" in the RFC:
* <ALL-CTL-CHARS-INCL-DEL>
* <CHAR-SPACE>
* ()<>@,;:\"/[]?=
* To keep the regex as simple as possible, the _allowed_ chars are listed
* with their corresponding hexval.
*
* @since 3.7.1 N°4281
*/
public const FILENAME_REGEX = <<<REGEX
(("([^"]+)")|('([^']+)')|([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+))
REGEX;

/**
* Construct a new message from the full text version of it (equivalent to the content of a .eml file)
*
Expand Down Expand Up @@ -173,68 +192,51 @@ public function GetMessageId()
public function GetAttachments(&$aAttachments = null, $aPart = null, &$index = 1)
{
static $iAttachmentCount = 0;
if ($aAttachments === null)
{
if ($aAttachments === null) {
$aAttachments = array();
}
if ($aPart === null)
{
if ($aPart === null) {
$aPart = $this->aParts;
} //Init for recursion

if ($aPart['type'] == 'simple')
{
if ($this->IsAttachment($aPart['headers']))
{
if ($aPart['type'] == 'simple') {
if ($this->IsAttachment($aPart['headers'])) {
$sFileName = '';
$sContentDisposition = $this->GetHeader('content-disposition', $aPart['headers']);
if (($sContentDisposition != '') && (preg_match('/filename="([^"]+)"/', $sContentDisposition, $aMatches)))
{
$sFileName = $aMatches[1];
}
else
{
if (($sContentDisposition != '') && (preg_match('/filename=([^"]+)/', $sContentDisposition, $aMatches))) // same but without quotes
{
$sFileName = $aMatches[1];
}
if ($sContentDisposition != '') {
$sFileName = static::GetAttachmentFilename($sContentDisposition);
}

$bInline = true;
if (stripos($sContentDisposition, 'attachment;') !== false)
{
if (stripos($sContentDisposition, 'attachment;') !== false) {
$bInline = false;
}


$sType = '';
$sContentId = $this->GetHeader('content-id', $aPart['headers']);
if (($sContentId != '') && (preg_match('/^<(.+)>$/', $sContentId, $aMatches)))
{
if (($sContentId != '') && (preg_match('/^<(.+)>$/', $sContentId, $aMatches))) {
$sContentId = $aMatches[1];
}
else
{
} else {
$sContentId = 'itop_'.$iAttachmentCount;
$iAttachmentCount++;
}
$sContentType = $this->GetHeader('content-type', $aPart['headers']);
if (($sContentType != '') && (preg_match('/^([^;]+)/', $sContentType, $aMatches)))
{
if (($sContentType != '') && (preg_match('/^([^;]+)/', $sContentType, $aMatches))) {
$sType = $aMatches[1];
}
if (empty($sFileName) && preg_match('/name="([^"]+)"/', $sContentType, $aMatches))
{
$sFileName = $aMatches[1];
if (empty($sFileName)) {
$sContentTypeFilename = static::GetAttachmentFilename($sContentType);
if ($sContentTypeFilename !== '') {
$sFileName = $sContentTypeFilename;
}
}
if (empty($sFileName))
{

if (empty($sFileName)) {
// generate a name based on the type of the file...
$aTypes = explode('/', $sType);
$aTypes = explode('/', $sType);
piRGoif marked this conversation as resolved.
Show resolved Hide resolved
$sFileExtension = $aTypes[1];
// map the type to a useful extension if needed
switch ($aTypes[1])
{
switch ($aTypes[1]) {
case 'rfc822':
// Special case for messages: use the .eml extension
$sFileExtension = 'eml';
Expand All @@ -243,25 +245,41 @@ public function GetAttachments(&$aAttachments = null, $aPart = null, &$index = 1
$sFileName = sprintf('%s%03d.%s', $aTypes[0], $index, $sFileExtension); // i.e. image001.jpg
}
$aAttachments['part_'.$aPart['part_id']] = array(
'filename' => $sFileName,
'mimeType' => $sType,
'filename' => $sFileName,
'mimeType' => $sType,
'content-id' => $sContentId,
'content' => $this->DecodePart($aPart['headers'], $aPart['body']),
'inline' => $bInline,
'content' => $this->DecodePart($aPart['headers'], $aPart['body']),
'inline' => $bInline,
);
}
}
else
{
foreach ($aPart['parts'] as $aSubPart)
{
} else {
foreach ($aPart['parts'] as $aSubPart) {
$aAttachments = array_merge($aAttachments, $this->GetAttachments($aAttachments, $aSubPart, $index));
}
}

return $aAttachments;
}

/**
* Allow to handle RFC 2045 filename syntax in attachments
*
* @param string $sHeaderValue
*
* @return string empty string if none found
* @uses static::FILENAME_REGEX
*
* @since 3.7.1 N°4281
*/
public static function GetAttachmentFilename($sHeaderValue)
{
if (preg_match('/name='.static::FILENAME_REGEX.'/', $sHeaderValue, $aMatches)) {
return end($aMatches);
}

return '';
}

/**
* Create a new RawEmailMessage object by reading the content of the given file
*
Expand Down
83 changes: 83 additions & 0 deletions test/TestRegexAttachmentNames.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?php
/*
* @copyright Copyright (C) 2010-2021 Combodo SARL
* @license http://opensource.org/licenses/AGPL-3.0
*/


namespace Combodo\iTop\Test\UnitTest\Module\CombodoEmailSynchro;

use Combodo\iTop\Test\UnitTest\ItopTestCase;
use RawEmailMessage;

/**
* Class TestRegexAttachmentNames
*
* @package Combodo\iTop\Test\UnitTest\CombodoEmailSynchro
*/
class TestRegexAttachmentNames extends ItopTestCase
{
public function setUp(): void
{
parent::setUp();
require_once(APPROOT.'env-production/combodo-email-synchro/classes/rawemailmessage.class.inc.php');
}

/**
* @dataProvider AttachmentFilenameProvider
* @covers RawEmailMessage::GetAttachments
*
* @param string $sInput
* @param string $sExceptedAttachmentName
*/
public function testNormalizeAttachmentName(string $sInput, string $sExpectedAttachmentName)
{
$aMatches = array();
$sNormalizedAttachmentName = null;
if (preg_match(RawEmailMessage::FILENAME_REGEX, $sInput, $aMatches)) {
$sNormalizedAttachmentName = end($aMatches);
$this->assertEquals($sExpectedAttachmentName, $sNormalizedAttachmentName, "Attachmentname for '".bin2hex($sInput)."' doesn't match. Got'".bin2hex($sNormalizedAttachmentName)."', expected '$sExpectedAttachmentName'.");
} else {
$this->AssertNull($sNormalizedAttachmentName);
}
}

public function AttachmentFilenameProvider()
{
return [
'All allowed Chars' => [
"!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~",
"!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~",
],
// Something is odd here: x2C (comma) is not in the list of allowed
// chars in {@see RawEmailMessage::FILENAME_REGEX}, but nevertheless is
// not filtered by the Regex, resulting in this test case is failing.
// I have no clue, as to why this is happening.
'All not allowed Chars (from the ASCII Table)' => [
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x17\x18\x19\x1A\x1B\x1C\x1E\x1F\x20\x22\x28\x29\x2C\x2F\x3A\x3B\x3C\x3D\x3E\x3F\x40\x5B\x5C\x5D\x7F",
"",
],
'Single Quotes delimit filename' => [
"'End 'before'",
"End ",
],
'Double Quotes delimit filename' => [
'"End "before"',
'End ',
],
'Empty String' => [
'',
'',
],
'Name with a Space' => [
'OQL Queries',
'OQL',
],
// Same problem as in test for not allowed chars: The comma somehow slips through.
'Name with a Comma' => [
'OQL,Queries',
'OQL',
],
];
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
Return-Path: <[email protected]>
Received: from www192.your-server.de
by www192.your-server.de with LMTP
id cANPJDAmYmNaVQEAUXDXKw
(envelope-from <[email protected]>); Wed, 02 Nov 2022 09:11:28 +0100
Envelope-to: [email protected]
Delivery-date: Wed, 02 Nov 2022 09:11:28 +0100
Authentication-Results: www192.your-server.de;
iprev=pass (localhost) smtp.remote-ip=127.0.0.1;
spf=pass smtp.mailfrom=itomig.de;
dkim=pass header.d=itomig.de header.s=default2012 header.a=rsa-sha256;
dmarc=skipped
Received: from localhost ([127.0.0.1] helo=www192.your-server.de)
by www192.your-server.de with esmtps (TLS1.3) tls TLS_AES_256_GCM_SHA384
(Exim 4.94.2)
(envelope-from <[email protected]>)
id 1oq8qO-000NG0-7r
for [email protected]; Wed, 02 Nov 2022 09:11:28 +0100
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=itomig.de;
s=default2012; h=Subject:From:To:MIME-Version:Date:Message-ID:Content-Type:
Sender:Reply-To:Cc:Content-Transfer-Encoding:Content-ID:Content-Description:
Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID:
In-Reply-To:References; bh=qfsa/c5xPqZ3d+6wNfzUExTW0ZNkPkALI3o0hp2R85A=; b=r5
zbg+7XSo/HBfaFAqqdBD0gDiOIa8ujhqmikWoXps5jf9oMJVJ6af5ynLdfZPbxkpWss5t6os7LhO/
w/ZdvjR8sOpvIX5nkR3DJxnG76Z28p0aFtpBz1OSIXpWe7LmSj8mBwsXBuh6jTLLH2xRGY3gBEIU9
fkOlRbJQ6lU3qhS9Xa9cgSGECPfhIvZzdM/Rb0vdvvERCmrpvr9TylqFb+3RA05pQJBOvG3i/ih7O
5a9RytEsd5rpN1IabPoMV6UJFhKaxPY4hMG4qdgzI2GQZAcoK5kNm/IrUxeHQ8KLy5HigJ1I11e9e
kzqzy6hT0ZKrELFcqh6Y8L6EzgtC+aOg==;
Received: from [95.90.147.54] (helo=[192.168.0.103])
by www192.your-server.de with esmtpsa (TLS1.3) tls TLS_AES_256_GCM_SHA384
(Exim 4.94.2)
(envelope-from <[email protected]>)
id 1oq8qO-000NFs-3t
for [email protected]; Wed, 02 Nov 2022 09:11:28 +0100
Content-Type: multipart/mixed; boundary="------------8PrwbWLJ6Ep94lElB18RfVAR"
Message-ID: <[email protected]>
Date: Wed, 2 Nov 2022 09:11:25 +0100
MIME-Version: 1.0
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
Thunderbird/102.2.2
Content-Language: en-US
To: [email protected]
From: Martin Raenker <[email protected]>
Subject: test for attachments with unquoted name
X-Authenticated-Sender: [email protected]
X-Virus-Scanned: Clear (ClamAV 0.103.7/26707/Tue Nov 1 21:23:26 2022)
X-local-sign: yes
X-DKIM-Status: pass [(itomig.de) - 127.0.0.1]
Delivered-To: [email protected]

This is a multi-part message in MIME format.
--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit

testbody
--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: application/octet-stream; charset=UTF-8; name="example_attachment_mail.csv"
Content-Disposition: attachment;
Content-Transfer-Encoding: base64

InRlc3QiLCJ0b3N0Igo=

--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: application/octet-stream; charset=UTF-8; name='example_attachment_mail.csv'
Content-Disposition: attachment;
Content-Transfer-Encoding: base64

InRlc3QiLCJ0b3N0Igo=

--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: application/octet-stream; charset=UTF-8; name=example_attachment_mail.csv
Content-Disposition: attachment;
Content-Transfer-Encoding: base64

InRlc3QiLCJ0b3N0Igo=

--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: application/octet-stream; charset=UTF-8;
Content-Disposition: attachment; filename="example_attachment_mail.csv"
Content-Transfer-Encoding: base64

InRlc3QiLCJ0b3N0Igo=

--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: application/octet-stream; charset=UTF-8;
Content-Disposition: attachment; filename='example_attachment_mail.csv'
Content-Transfer-Encoding: base64

InRlc3QiLCJ0b3N0Igo=

--------------8PrwbWLJ6Ep94lElB18RfVAR
Content-Type: application/octet-stream; charset=UTF-8;
Content-Disposition: attachment; filename=example_attachment_mail.csv
Content-Transfer-Encoding: base64

InRlc3QiLCJ0b3N0Igo=