diff --git a/classes/rawemailmessage.class.inc.php b/classes/rawemailmessage.class.inc.php index 27119e5..b4ff3e5 100644 --- a/classes/rawemailmessage.class.inc.php +++ b/classes/rawemailmessage.class.inc.php @@ -66,6 +66,25 @@ class RawEmailMessage */ protected $iNextId; + /** + * @var string This Regex complies with RFC 2045 regarding the Grammar of + * Content Type Headers Filenames: + * (1) Allow all chars for the filename, if the filename is quoted with double quotes + * (2) Allow all chars for the filename, if the filename is quoted with single quotes + * (3) If the filename is not quoted, allow only ASCII chars and exclude the following + * chars from the set, referenced as "specials" in the RFC: + * + * + * ()<>@,;:\"/[]?= + * To keep the regex as simple as possible, the _allowed_ chars are listed + * with their corresponding hexval. + * + * @since 3.7.1 N°4281 + */ + public const FILENAME_REGEX = <<aParts; } //Init for recursion - if ($aPart['type'] == 'simple') - { - if ($this->IsAttachment($aPart['headers'])) - { + if ($aPart['type'] == 'simple') { + if ($this->IsAttachment($aPart['headers'])) { $sFileName = ''; $sContentDisposition = $this->GetHeader('content-disposition', $aPart['headers']); - if (($sContentDisposition != '') && (preg_match('/filename="([^"]+)"/', $sContentDisposition, $aMatches))) - { - $sFileName = $aMatches[1]; - } - else - { - if (($sContentDisposition != '') && (preg_match('/filename=([^"]+)/', $sContentDisposition, $aMatches))) // same but without quotes - { - $sFileName = $aMatches[1]; - } + if ($sContentDisposition != '') { + $sFileName = static::GetAttachmentFilename($sContentDisposition); } $bInline = true; - if (stripos($sContentDisposition, 'attachment;') !== false) - { + if (stripos($sContentDisposition, 'attachment;') !== false) { $bInline = false; } - $sType = ''; $sContentId = $this->GetHeader('content-id', $aPart['headers']); - if (($sContentId != '') && (preg_match('/^<(.+)>$/', $sContentId, $aMatches))) - { + if (($sContentId != '') && (preg_match('/^<(.+)>$/', $sContentId, $aMatches))) { $sContentId = $aMatches[1]; - } - else - { + } else { $sContentId = 'itop_'.$iAttachmentCount; $iAttachmentCount++; } $sContentType = $this->GetHeader('content-type', $aPart['headers']); - if (($sContentType != '') && (preg_match('/^([^;]+)/', $sContentType, $aMatches))) - { + if (($sContentType != '') && (preg_match('/^([^;]+)/', $sContentType, $aMatches))) { $sType = $aMatches[1]; } - if (empty($sFileName) && preg_match('/name="([^"]+)"/', $sContentType, $aMatches)) - { - $sFileName = $aMatches[1]; + if (empty($sFileName)) { + $sContentTypeFilename = static::GetAttachmentFilename($sContentType); + if ($sContentTypeFilename !== '') { + $sFileName = $sContentTypeFilename; + } } - if (empty($sFileName)) - { + + if (empty($sFileName)) { // generate a name based on the type of the file... $aTypes = explode('/', $sType); $sFileExtension = $aTypes[1]; // map the type to a useful extension if needed - switch ($aTypes[1]) - { + switch ($aTypes[1]) { case 'rfc822': // Special case for messages: use the .eml extension $sFileExtension = 'eml'; @@ -243,18 +245,15 @@ public function GetAttachments(&$aAttachments = null, $aPart = null, &$index = 1 $sFileName = sprintf('%s%03d.%s', $aTypes[0], $index, $sFileExtension); // i.e. image001.jpg } $aAttachments['part_'.$aPart['part_id']] = array( - 'filename' => $sFileName, - 'mimeType' => $sType, + 'filename' => $sFileName, + 'mimeType' => $sType, 'content-id' => $sContentId, - 'content' => $this->DecodePart($aPart['headers'], $aPart['body']), - 'inline' => $bInline, + 'content' => $this->DecodePart($aPart['headers'], $aPart['body']), + 'inline' => $bInline, ); } - } - else - { - foreach ($aPart['parts'] as $aSubPart) - { + } else { + foreach ($aPart['parts'] as $aSubPart) { $aAttachments = array_merge($aAttachments, $this->GetAttachments($aAttachments, $aSubPart, $index)); } } @@ -262,6 +261,25 @@ public function GetAttachments(&$aAttachments = null, $aPart = null, &$index = 1 return $aAttachments; } + /** + * Allow to handle RFC 2045 filename syntax in attachments + * + * @param string $sHeaderValue + * + * @return string empty string if none found + * @uses static::FILENAME_REGEX + * + * @since 3.7.1 N°4281 + */ + public static function GetAttachmentFilename($sHeaderValue) + { + if (preg_match('/name='.static::FILENAME_REGEX.'/', $sHeaderValue, $aMatches)) { + return end($aMatches); + } + + return ''; + } + /** * Create a new RawEmailMessage object by reading the content of the given file * diff --git a/test/TestRegexAttachmentNames.php b/test/TestRegexAttachmentNames.php new file mode 100644 index 0000000..198c2a9 --- /dev/null +++ b/test/TestRegexAttachmentNames.php @@ -0,0 +1,83 @@ +assertEquals($sExpectedAttachmentName, $sNormalizedAttachmentName, "Attachmentname for '".bin2hex($sInput)."' doesn't match. Got'".bin2hex($sNormalizedAttachmentName)."', expected '$sExpectedAttachmentName'."); + } else { + $this->AssertNull($sNormalizedAttachmentName); + } + } + + public function AttachmentFilenameProvider() + { + return [ + 'All allowed Chars' => [ + "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~", + "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~", + ], + // Something is odd here: x2C (comma) is not in the list of allowed + // chars in {@see RawEmailMessage::FILENAME_REGEX}, but nevertheless is + // not filtered by the Regex, resulting in this test case is failing. + // I have no clue, as to why this is happening. + 'All not allowed Chars (from the ASCII Table)' => [ + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x17\x18\x19\x1A\x1B\x1C\x1E\x1F\x20\x22\x28\x29\x2C\x2F\x3A\x3B\x3C\x3D\x3E\x3F\x40\x5B\x5C\x5D\x7F", + "", + ], + 'Single Quotes delimit filename' => [ + "'End 'before'", + "End ", + ], + 'Double Quotes delimit filename' => [ + '"End "before"', + 'End ', + ], + 'Empty String' => [ + '', + '', + ], + 'Name with a Space' => [ + 'OQL Queries', + 'OQL', + ], + // Same problem as in test for not allowed chars: The comma somehow slips through. + 'Name with a Comma' => [ + 'OQL,Queries', + 'OQL', + ], + ]; + } +} diff --git a/test/emailsSample/email_with_and_without_quotes_around_attachment_name.eml b/test/emailsSample/email_with_and_without_quotes_around_attachment_name.eml new file mode 100644 index 0000000..6dd4f06 --- /dev/null +++ b/test/emailsSample/email_with_and_without_quotes_around_attachment_name.eml @@ -0,0 +1,97 @@ +Return-Path: +Received: from www192.your-server.de + by www192.your-server.de with LMTP + id cANPJDAmYmNaVQEAUXDXKw + (envelope-from ); Wed, 02 Nov 2022 09:11:28 +0100 +Envelope-to: martin.raenker@itomig.de +Delivery-date: Wed, 02 Nov 2022 09:11:28 +0100 +Authentication-Results: www192.your-server.de; + iprev=pass (localhost) smtp.remote-ip=127.0.0.1; + spf=pass smtp.mailfrom=itomig.de; + dkim=pass header.d=itomig.de header.s=default2012 header.a=rsa-sha256; + dmarc=skipped +Received: from localhost ([127.0.0.1] helo=www192.your-server.de) + by www192.your-server.de with esmtps (TLS1.3) tls TLS_AES_256_GCM_SHA384 + (Exim 4.94.2) + (envelope-from ) + id 1oq8qO-000NG0-7r + for martin.raenker@itomig.de; Wed, 02 Nov 2022 09:11:28 +0100 +DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=itomig.de; + s=default2012; h=Subject:From:To:MIME-Version:Date:Message-ID:Content-Type: + Sender:Reply-To:Cc:Content-Transfer-Encoding:Content-ID:Content-Description: + Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID: + In-Reply-To:References; bh=qfsa/c5xPqZ3d+6wNfzUExTW0ZNkPkALI3o0hp2R85A=; b=r5 + zbg+7XSo/HBfaFAqqdBD0gDiOIa8ujhqmikWoXps5jf9oMJVJ6af5ynLdfZPbxkpWss5t6os7LhO/ + w/ZdvjR8sOpvIX5nkR3DJxnG76Z28p0aFtpBz1OSIXpWe7LmSj8mBwsXBuh6jTLLH2xRGY3gBEIU9 + fkOlRbJQ6lU3qhS9Xa9cgSGECPfhIvZzdM/Rb0vdvvERCmrpvr9TylqFb+3RA05pQJBOvG3i/ih7O + 5a9RytEsd5rpN1IabPoMV6UJFhKaxPY4hMG4qdgzI2GQZAcoK5kNm/IrUxeHQ8KLy5HigJ1I11e9e + kzqzy6hT0ZKrELFcqh6Y8L6EzgtC+aOg==; +Received: from [95.90.147.54] (helo=[192.168.0.103]) + by www192.your-server.de with esmtpsa (TLS1.3) tls TLS_AES_256_GCM_SHA384 + (Exim 4.94.2) + (envelope-from ) + id 1oq8qO-000NFs-3t + for martin.raenker@itomig.de; Wed, 02 Nov 2022 09:11:28 +0100 +Content-Type: multipart/mixed; boundary="------------8PrwbWLJ6Ep94lElB18RfVAR" +Message-ID: <2525d616-c82b-fa1e-05c6-065e9f91f46d@itomig.de> +Date: Wed, 2 Nov 2022 09:11:25 +0100 +MIME-Version: 1.0 +User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 + Thunderbird/102.2.2 +Content-Language: en-US +To: martin.raenker@itomig.de +From: Martin Raenker +Subject: test for attachments with unquoted name +X-Authenticated-Sender: martin.raenker@itomig.de +X-Virus-Scanned: Clear (ClamAV 0.103.7/26707/Tue Nov 1 21:23:26 2022) +X-local-sign: yes +X-DKIM-Status: pass [(itomig.de) - 127.0.0.1] +Delivered-To: itomig-martin.raenker@itomig.de + +This is a multi-part message in MIME format. +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: text/plain; charset=UTF-8; format=flowed +Content-Transfer-Encoding: 7bit + +testbody +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: application/octet-stream; charset=UTF-8; name="example_attachment_mail.csv" +Content-Disposition: attachment; +Content-Transfer-Encoding: base64 + +InRlc3QiLCJ0b3N0Igo= + +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: application/octet-stream; charset=UTF-8; name='example_attachment_mail.csv' +Content-Disposition: attachment; +Content-Transfer-Encoding: base64 + +InRlc3QiLCJ0b3N0Igo= + +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: application/octet-stream; charset=UTF-8; name=example_attachment_mail.csv +Content-Disposition: attachment; +Content-Transfer-Encoding: base64 + +InRlc3QiLCJ0b3N0Igo= + +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: application/octet-stream; charset=UTF-8; +Content-Disposition: attachment; filename="example_attachment_mail.csv" +Content-Transfer-Encoding: base64 + +InRlc3QiLCJ0b3N0Igo= + +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: application/octet-stream; charset=UTF-8; +Content-Disposition: attachment; filename='example_attachment_mail.csv' +Content-Transfer-Encoding: base64 + +InRlc3QiLCJ0b3N0Igo= + +--------------8PrwbWLJ6Ep94lElB18RfVAR +Content-Type: application/octet-stream; charset=UTF-8; +Content-Disposition: attachment; filename=example_attachment_mail.csv +Content-Transfer-Encoding: base64 + +InRlc3QiLCJ0b3N0Igo=