From fdf4380ff570deb2d28f697ee2a15c325d20b77f Mon Sep 17 00:00:00 2001 From: Frederik Bosch Date: Tue, 29 Sep 2020 12:19:00 +0200 Subject: [PATCH] Quote encoding (#85) * improve encoding of e-mail creation, extraction and quotation --- .gitattributes | 3 +- src/AlternativeText.php | 58 +++++++++++++- src/MessageBodyCollection.php | 78 +++++++++++++++---- src/Mime/PlainTextPart.php | 5 +- src/MimeMessageFactory.php | 17 ++-- .../Integration/MessageBodyCollectionTest.php | 2 +- .../AlternativeText/different-encoding.html | 10 +++ .../AlternativeText/different-encoding.txt | 1 + test/Stub/AlternativeText/encoding.html | 9 +++ test/Stub/AlternativeText/encoding.txt | 1 + .../attached-html-and-text.eml | 19 +++-- .../forward-html-and-text.eml | 5 +- .../full-formatted-message.eml | 7 +- .../MessageBodyCollection/html-and-text.eml | 7 +- test/Stub/MessageBodyCollection/html-only.eml | 2 +- .../quoted-html-and-text.eml | 5 +- .../reply-all-html-and-text.eml | 5 +- .../reply-html-and-text.eml | 5 +- .../reply-to-all-html-and-text.eml | 5 +- .../reply-to-html-and-text.eml | 5 +- test/Stub/MessageBodyCollection/text-only.eml | 2 +- test/Stub/Quote/base64-quoted.eml | 5 +- .../Quote/correct-encoding-quoted.crlf.txt | 4 + test/Stub/Quote/correct-encoding-quoted.html | 1 + test/Stub/Quote/correct-encoding.eml | 22 ++++++ .../Quote/double-quoted-html-and-text.eml | 5 +- test/Stub/Quote/html-and-text-quoted.html | 2 +- test/Stub/Quote/no-body-tag-quoted.html | 2 +- .../Quote/non-utf8-encoding-quoted.crlf.html | 3 + .../Quote/non-utf8-encoding-quoted.crlf.txt | 4 + test/Stub/Quote/non-utf8-encoding.eml | 31 ++++++++ test/Unit/AlternativeTextTest.php | 4 +- test/Unit/MessageBodyCollectionTest.php | 6 +- test/Unit/Quotation/FixedQuotationTest.php | 52 +++++++++++++ 34 files changed, 327 insertions(+), 65 deletions(-) create mode 100644 test/Stub/AlternativeText/different-encoding.html create mode 100644 test/Stub/AlternativeText/different-encoding.txt create mode 100644 test/Stub/AlternativeText/encoding.html create mode 100644 test/Stub/AlternativeText/encoding.txt create mode 100644 test/Stub/Quote/correct-encoding-quoted.crlf.txt create mode 100644 test/Stub/Quote/correct-encoding-quoted.html create mode 100644 test/Stub/Quote/correct-encoding.eml create mode 100644 test/Stub/Quote/non-utf8-encoding-quoted.crlf.html create mode 100644 test/Stub/Quote/non-utf8-encoding-quoted.crlf.txt create mode 100644 test/Stub/Quote/non-utf8-encoding.eml diff --git a/.gitattributes b/.gitattributes index d7d295df..df452fca 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ # Declare files that will always have CRLF line endings on checkout. *.eml text eol=crlf -*.crlf.txt text eol=crlf \ No newline at end of file +*.crlf.txt text eol=crlf +*.crlf.html text eol=crlf diff --git a/src/AlternativeText.php b/src/AlternativeText.php index 754525ce..772f33ea 100644 --- a/src/AlternativeText.php +++ b/src/AlternativeText.php @@ -5,6 +5,8 @@ final class AlternativeText { + private const DEFAULT_CHARSET = ''; + /** * @var string */ @@ -58,7 +60,33 @@ private function normalizeSpace(string $string): string } /** - * @param string $html AlternativeText + * @param string $text + * @param string $charset + * @return AlternativeText + */ + public static function fromEncodedText(string $text, string $charset): AlternativeText + { + if ($charset === '') { + return new self($text); + } + + $charset = \strtoupper($charset); + if ($charset === 'UTF-8' || $charset === 'UTF8') { + return new self($text); + } + + $converted = \iconv($charset, 'UTF-8', $text); + if ($converted === false) { + throw new \InvalidArgumentException( + 'The encoded text cannot be converted to UTF-8. Is the charset ' . $charset . ' correct?' + ); + } + + return new self($converted); + } + + /** + * @param string $html * @return AlternativeText */ public static function fromHtml(string $html): AlternativeText @@ -67,6 +95,7 @@ public static function fromHtml(string $html): AlternativeText return new self($html); } + $html = self::ensureHtmlCharset($html); $html = \preg_replace('/\h\h+/', ' ', (string)$html); $html = \preg_replace('/\v/', '', (string)$html); $text = new self((string)$html); @@ -374,4 +403,31 @@ private function wrap(string $unwrappedText, int $width = 75): string return \implode('', $result); } + + /** + * @param string $html + * @return string + */ + private static function ensureHtmlCharset(string $html): string + { + if ($html === '') { + return ''; + } + + if (\strpos($html, 'content="text/html') !== false || \strpos($html, 'charset="') !== false) { + return $html; + } + + $headCloseStart = \strpos($html, ''); + if ($headCloseStart !== false) { + return \substr_replace($html, self::DEFAULT_CHARSET, $headCloseStart, 0); + } + + $bodyOpenStart = \strpos($html, '' . self::DEFAULT_CHARSET . '', $bodyOpenStart, 0); + } + + return '' . self::DEFAULT_CHARSET . '' . $html . ''; + } } diff --git a/src/MessageBodyCollection.php b/src/MessageBodyCollection.php index 3f512e01..b39a252a 100644 --- a/src/MessageBodyCollection.php +++ b/src/MessageBodyCollection.php @@ -4,7 +4,6 @@ namespace Genkgo\Mail; use Genkgo\Mail\Header\Cc; -use Genkgo\Mail\Header\ContentTransferEncoding; use Genkgo\Mail\Header\ContentType; use Genkgo\Mail\Header\GenericHeader; use Genkgo\Mail\Header\HeaderName; @@ -25,6 +24,8 @@ final class MessageBodyCollection { + private const DEFAULT_CHARSET = ''; + /** * @var string */ @@ -50,8 +51,8 @@ final class MessageBodyCollection */ public function __construct(string $html = '') { - $this->html = $html; - $this->text = AlternativeText::fromHtml($html); + $this->html = self::ensureHtmlCharset($html); + $this->text = AlternativeText::fromHtml($this->html); } /** @@ -61,8 +62,8 @@ public function __construct(string $html = '') public function withHtml(string $html): self { $clone = clone $this; - $clone->html = $html; - $clone->text = AlternativeText::fromHtml($html); + $clone->html = self::ensureHtmlCharset($html); + $clone->text = AlternativeText::fromHtml($clone->html); return $clone; } @@ -317,7 +318,7 @@ private function createMessageHumanReadable(): PartInterface private function createMessageText(): PartInterface { if ($this->text->isEmpty() && $this->html === '') { - return new PlainTextPart(''); + return new PlainTextPart('', 'us-ascii'); } if ($this->text->isEmpty()) { @@ -325,14 +326,14 @@ private function createMessageText(): PartInterface } if ($this->html === '') { - return new PlainTextPart((string)$this->text); + return new PlainTextPart((string)$this->text, 'UTF-8'); } return (new MultiPart( Boundary::newRandom(), new ContentType('multipart/alternative') )) - ->withPart(new PlainTextPart((string)$this->text)) + ->withPart(new PlainTextPart((string)$this->text, 'UTF-8')) ->withPart(new HtmlPart($this->html)); } @@ -349,13 +350,23 @@ public static function extract(MessageInterface $message): MessageBodyCollection } catch (\InvalidArgumentException $e) { foreach ($message->getHeader('Content-Type') as $header) { $contentType = $header->getValue()->getRaw(); + try { + $charset = $header->getValue()->getParameter('charset')->getValue(); + } catch (\UnexpectedValueException $e) { + $charset = ''; + } + if ($contentType === 'text/html') { - $collection->html = \rtrim((string)self::decodeMessageBody($message)); + $collection->html = self::ensureHtmlCharset( + \rtrim((string)self::decodeMessageBody($message)), + $charset + ); } if ($contentType === 'text/plain') { - $collection->text = new AlternativeText( - \rtrim((string)self::decodeMessageBody($message)) + $collection->text = AlternativeText::fromEncodedText( + \rtrim((string)self::decodeMessageBody($message)), + $charset ); } } @@ -370,16 +381,27 @@ public static function extract(MessageInterface $message): MessageBodyCollection private function extractFromMimePart(MultiPartInterface $parts): void { foreach ($parts->getParts() as $part) { - $contentType = $part->getHeader('Content-Type')->getValue()->getRaw(); + $header = $part->getHeader('Content-Type'); + $contentType = $header->getValue()->getRaw(); + + try { + $charset = $header->getValue()->getParameter('charset')->getValue(); + } catch (\UnexpectedValueException $e) { + $charset = ''; + } + $hasDisposition = $part->hasHeader('Content-Disposition'); if (!$hasDisposition && $contentType === 'text/html') { - $this->html = (string)new MimeBodyDecodedStream($part); + $this->html = self::ensureHtmlCharset( + (string)new MimeBodyDecodedStream($part), + $charset + ); continue; } if (!$hasDisposition && $contentType === 'text/plain') { - $this->text = new AlternativeText((string)new MimeBodyDecodedStream($part)); + $this->text = AlternativeText::fromEncodedText((string)new MimeBodyDecodedStream($part), $charset); continue; } @@ -482,4 +504,32 @@ private static function decodeMessageBody(MessageInterface $message): StreamInte return $message->getBody(); } + + /** + * @param string $html + * @param string $charset + * @return string + */ + private static function ensureHtmlCharset(string $html, string $charset = 'UTF-8'): string + { + if ($html === '') { + return ''; + } + + if (\strpos($html, 'content="text/html') !== false || \strpos($html, 'charset="') !== false) { + return $html; + } + + $headCloseStart = \strpos($html, ''); + if ($headCloseStart !== false) { + return \substr_replace($html, \sprintf(self::DEFAULT_CHARSET, $charset), $headCloseStart, 0); + } + + $bodyOpenStart = \strpos($html, '' . \sprintf(self::DEFAULT_CHARSET, $charset) . '', $bodyOpenStart, 0); + } + + return '' . \sprintf(self::DEFAULT_CHARSET, $charset) . '' . $html . ''; + } } diff --git a/src/Mime/PlainTextPart.php b/src/Mime/PlainTextPart.php index 7916a2fd..828acf96 100644 --- a/src/Mime/PlainTextPart.php +++ b/src/Mime/PlainTextPart.php @@ -18,15 +18,16 @@ final class PlainTextPart implements PartInterface /** * @param string $text + * @param string $charset */ - public function __construct(string $text) + public function __construct(string $text, string $charset = '') { $stream = new OptimalTransferEncodedTextStream($text); $encoding = $stream->getMetadata(['transfer-encoding'])['transfer-encoding']; $this->decoratedPart = (new GenericPart()) ->withBody($stream) - ->withHeader(new ContentType('text/plain')) + ->withHeader(new ContentType('text/plain', $charset)) ->withHeader(new ContentTransferEncoding($encoding)); } diff --git a/src/MimeMessageFactory.php b/src/MimeMessageFactory.php index 6c75913f..3dcda079 100644 --- a/src/MimeMessageFactory.php +++ b/src/MimeMessageFactory.php @@ -132,15 +132,16 @@ private function pickOptimalCharset(PartInterface $part): PartInterface return $part; } } catch (\UnexpectedValueException $e) { + return $part->withHeader( + new ParsedHeader( + new HeaderName('Content-Type'), + $contentTypeHeader->withParameter( + new HeaderValueParameter('charset', 'us-ascii') + ) + ) + ); } - return $part->withHeader( - new ParsedHeader( - new HeaderName('Content-Type'), - $contentTypeHeader->withParameter( - new HeaderValueParameter('charset', 'us-ascii') - ) - ) - ); + return $part; } } diff --git a/test/Integration/MessageBodyCollectionTest.php b/test/Integration/MessageBodyCollectionTest.php index e6d82604..82a06336 100644 --- a/test/Integration/MessageBodyCollectionTest.php +++ b/test/Integration/MessageBodyCollectionTest.php @@ -19,7 +19,7 @@ final class MessageBodyCollectionTest extends AbstractTestCase public function it_can_parse_a_formatted_message_string_into_a_generic_message(): void { $message = (new MessageBodyCollection()) - ->withHtml('

Hello World

') + ->withHtml('

€ 0? Hello World?

') ->withAttachment( ResourceAttachment::fromString( 'Attachment text', diff --git a/test/Stub/AlternativeText/different-encoding.html b/test/Stub/AlternativeText/different-encoding.html new file mode 100644 index 00000000..e3db52cd --- /dev/null +++ b/test/Stub/AlternativeText/different-encoding.html @@ -0,0 +1,10 @@ + + + + + Title + + +

T[oXgʂł́A

+ + diff --git a/test/Stub/AlternativeText/different-encoding.txt b/test/Stub/AlternativeText/different-encoding.txt new file mode 100644 index 00000000..272cf468 --- /dev/null +++ b/test/Stub/AlternativeText/different-encoding.txt @@ -0,0 +1 @@ +サーバリスト画面では、 diff --git a/test/Stub/AlternativeText/encoding.html b/test/Stub/AlternativeText/encoding.html new file mode 100644 index 00000000..23238cf3 --- /dev/null +++ b/test/Stub/AlternativeText/encoding.html @@ -0,0 +1,9 @@ + + + + Title + + +

zażółć gęślą jaźń

+ + diff --git a/test/Stub/AlternativeText/encoding.txt b/test/Stub/AlternativeText/encoding.txt new file mode 100644 index 00000000..8d9f6626 --- /dev/null +++ b/test/Stub/AlternativeText/encoding.txt @@ -0,0 +1 @@ +zażółć gęślą jaźń \ No newline at end of file diff --git a/test/Stub/MessageBodyCollection/attached-html-and-text.eml b/test/Stub/MessageBodyCollection/attached-html-and-text.eml index 12b5f21b..d7958756 100644 --- a/test/Stub/MessageBodyCollection/attached-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/attached-html-and-text.eml @@ -8,16 +8,17 @@ Content-Type: multipart/alternative; boundary=GenkgoMailV2Part1088cfc2cd8d --GenkgoMailV2Part1088cfc2cd8d -Content-Type: text/plain; charset=us-ascii +Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Reply text --GenkgoMailV2Part1088cfc2cd8d -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

+

Reply text

--GenkgoMailV2Part1088cfc2cd8d-- --GenkgoMailV2Partd00da1fe277a @@ -30,9 +31,11 @@ PHlvdUBleGFtcGxlLmNvbT4NCkNjOiBvdGhlciA8b3RoZXJAZXhhbXBsZS5jb20+DQpNSU1FLVZl cnNpb246IDEuMA0KQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5 PUdlbmtnb01haWxWMlBhcnQxODdlMjhiZjNjYjQNCg0KVGhpcyBpcyBhIG11bHRpcGFydCBtZXNz YWdlIGluIE1JTUUgZm9ybWF0Lg0KDQotLUdlbmtnb01haWxWMlBhcnQxODdlMjhiZjNjYjQNCkNv -bnRlbnQtVHlwZTogdGV4dC9wbGFpbjsgY2hhcnNldD11cy1hc2NpaQ0KQ29udGVudC1UcmFuc2Zl -ci1FbmNvZGluZzogN2JpdA0KDQpIZWxsbyBXb3JsZA0KDQotLUdlbmtnb01haWxWMlBhcnQxODdl -MjhiZjNjYjQNCkNvbnRlbnQtVHlwZTogdGV4dC9odG1sOyBjaGFyc2V0PXVzLWFzY2lpDQpDb250 -ZW50LVRyYW5zZmVyLUVuY29kaW5nOiA3Yml0DQoNCjxodG1sPjxib2R5PjxwPkhlbGxvIFdvcmxk -PC9wPjwvYm9keT48L2h0bWw+DQotLUdlbmtnb01haWxWMlBhcnQxODdlMjhiZjNjYjQtLQ0K +bnRlbnQtVHlwZTogdGV4dC9wbGFpbjsgY2hhcnNldD1VVEYtOA0KQ29udGVudC1UcmFuc2Zlci1F +bmNvZGluZzogN2JpdA0KDQpIZWxsbyBXb3JsZA0KDQotLUdlbmtnb01haWxWMlBhcnQxODdlMjhi +ZjNjYjQNCkNvbnRlbnQtVHlwZTogdGV4dC9odG1sOyBjaGFyc2V0PVVURi04DQpDb250ZW50LVRy +YW5zZmVyLUVuY29kaW5nOiA3Yml0DQoNCjxodG1sPjxoZWFkPjxtZXRhIGh0dHAtZXF1aXY9IkNv +bnRlbnQtVHlwZSIgY29udGVudD0idGV4dC9odG1sOw0KY2hhcnNldD1VVEYtOCIvPjwvaGVhZD48 +Ym9keT48cD5IZWxsbyBXb3JsZDwvcD48L2JvZHk+PC9odG1sPg0KLS1HZW5rZ29NYWlsVjJQYXJ0 +MTg3ZTI4YmYzY2I0LS0NCg== --GenkgoMailV2Partd00da1fe277a-- diff --git a/test/Stub/MessageBodyCollection/forward-html-and-text.eml b/test/Stub/MessageBodyCollection/forward-html-and-text.eml index 73db8fb8..82dcea28 100644 --- a/test/Stub/MessageBodyCollection/forward-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/forward-html-and-text.eml @@ -14,9 +14,10 @@ me (unknown): >Hello World --boundary -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

me (unknown):

Reply text

me (unknown):

Hello World

--boundary-- diff --git a/test/Stub/MessageBodyCollection/full-formatted-message.eml b/test/Stub/MessageBodyCollection/full-formatted-message.eml index eee4484e..40f333c9 100644 --- a/test/Stub/MessageBodyCollection/full-formatted-message.eml +++ b/test/Stub/MessageBodyCollection/full-formatted-message.eml @@ -15,16 +15,17 @@ Content-Type: multipart/alternative; boundary=GenkgoMailV2Part8c6b95fae17e --GenkgoMailV2Part8c6b95fae17e -Content-Type: text/plain; charset=us-ascii +Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Hello World --GenkgoMailV2Part8c6b95fae17e -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Hello World

+

Hello World

--GenkgoMailV2Part8c6b95fae17e-- --GenkgoMailV2Partecbe5baa2673 diff --git a/test/Stub/MessageBodyCollection/html-and-text.eml b/test/Stub/MessageBodyCollection/html-and-text.eml index df490ff9..be9b95cb 100644 --- a/test/Stub/MessageBodyCollection/html-and-text.eml +++ b/test/Stub/MessageBodyCollection/html-and-text.eml @@ -8,14 +8,15 @@ Content-Type: multipart/alternative; boundary=GenkgoMailV2Part187e28bf3cb4 This is a multipart message in MIME format. --GenkgoMailV2Part187e28bf3cb4 -Content-Type: text/plain; charset=us-ascii +Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Hello World --GenkgoMailV2Part187e28bf3cb4 -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Hello World

+

Hello World

--GenkgoMailV2Part187e28bf3cb4-- diff --git a/test/Stub/MessageBodyCollection/html-only.eml b/test/Stub/MessageBodyCollection/html-only.eml index 070450a0..c6ad6d32 100644 --- a/test/Stub/MessageBodyCollection/html-only.eml +++ b/test/Stub/MessageBodyCollection/html-only.eml @@ -2,7 +2,7 @@ Subject: Hello World To: me Cc: other MIME-Version: 1.0 -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit

Hello World

diff --git a/test/Stub/MessageBodyCollection/quoted-html-and-text.eml b/test/Stub/MessageBodyCollection/quoted-html-and-text.eml index 3931d608..f79252f4 100644 --- a/test/Stub/MessageBodyCollection/quoted-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/quoted-html-and-text.eml @@ -13,9 +13,10 @@ me (unknown): >Hello World --boundary -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

me (unknown):

Reply text

me (unknown):

Hello World

--boundary-- diff --git a/test/Stub/MessageBodyCollection/reply-all-html-and-text.eml b/test/Stub/MessageBodyCollection/reply-all-html-and-text.eml index c41d33c1..2edf4434 100644 --- a/test/Stub/MessageBodyCollection/reply-all-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/reply-all-html-and-text.eml @@ -16,9 +16,10 @@ me (unknown): >Hello World --boundary -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

me (unknown):

Reply text

me (unknown):

Hello World

--boundary-- diff --git a/test/Stub/MessageBodyCollection/reply-html-and-text.eml b/test/Stub/MessageBodyCollection/reply-html-and-text.eml index c5845e28..23a2c3ec 100644 --- a/test/Stub/MessageBodyCollection/reply-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/reply-html-and-text.eml @@ -15,9 +15,10 @@ me (unknown): >Hello World --boundary -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

me (unknown):

Reply text

me (unknown):

Hello World

--boundary-- diff --git a/test/Stub/MessageBodyCollection/reply-to-all-html-and-text.eml b/test/Stub/MessageBodyCollection/reply-to-all-html-and-text.eml index 9d20c855..c3fda013 100644 --- a/test/Stub/MessageBodyCollection/reply-to-all-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/reply-to-all-html-and-text.eml @@ -15,9 +15,10 @@ me (unknown): >Hello World --boundary -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

me (unknown):

Reply text

me (unknown):

Hello World

--boundary-- diff --git a/test/Stub/MessageBodyCollection/reply-to-html-and-text.eml b/test/Stub/MessageBodyCollection/reply-to-html-and-text.eml index 9d20c855..c3fda013 100644 --- a/test/Stub/MessageBodyCollection/reply-to-html-and-text.eml +++ b/test/Stub/MessageBodyCollection/reply-to-html-and-text.eml @@ -15,9 +15,10 @@ me (unknown): >Hello World --boundary -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -

Reply text

me (unknown):

Reply text

me (unknown):

Hello World

--boundary-- diff --git a/test/Stub/MessageBodyCollection/text-only.eml b/test/Stub/MessageBodyCollection/text-only.eml index f5d38d4b..fc2a9b10 100644 --- a/test/Stub/MessageBodyCollection/text-only.eml +++ b/test/Stub/MessageBodyCollection/text-only.eml @@ -2,7 +2,7 @@ Subject: Hello World To: me Cc: other MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii +Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Hello World diff --git a/test/Stub/Quote/base64-quoted.eml b/test/Stub/Quote/base64-quoted.eml index 18912054..27588a47 100644 --- a/test/Stub/Quote/base64-quoted.eml +++ b/test/Stub/Quote/base64-quoted.eml @@ -23,8 +23,9 @@ me (Jan 1, 2017, 12:00:00 AM): Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: quoted-printable -

Hello Universe

me (Jan 1, 2017, 12:00:00 AM):

Lorem Ipsum
Lorem +

Hello Universe

me (Jan 1, 2017, 12:00:00 AM):= +

Lorem Ipsum
Lorem Ipsum

me (Jan 1, 2015, 12:00:00 AM):

Hello Universe

me (Jan 1, 2017, 12:00:00 AM):

Hel= lo diff --git a/test/Stub/Quote/correct-encoding-quoted.crlf.txt b/test/Stub/Quote/correct-encoding-quoted.crlf.txt new file mode 100644 index 00000000..08b0895b --- /dev/null +++ b/test/Stub/Quote/correct-encoding-quoted.crlf.txt @@ -0,0 +1,4 @@ +Hello Universe € + +me (Jan 1, 2017, 12:00:00 AM): +>Hello World diff --git a/test/Stub/Quote/correct-encoding-quoted.html b/test/Stub/Quote/correct-encoding-quoted.html new file mode 100644 index 00000000..c76d4eb1 --- /dev/null +++ b/test/Stub/Quote/correct-encoding-quoted.html @@ -0,0 +1 @@ +Universe TitleHello Universe €

me (Jan 1, 2017, 12:00:00 AM):

Hello World

diff --git a/test/Stub/Quote/correct-encoding.eml b/test/Stub/Quote/correct-encoding.eml new file mode 100644 index 00000000..92741da1 --- /dev/null +++ b/test/Stub/Quote/correct-encoding.eml @@ -0,0 +1,22 @@ +Date: Sun, 01 Jan 2017 00:00:00 +0000 +Subject: Hello World +From: me +To: you +Cc: other +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary=GenkgoMailV2Part187e28bf3cb4 + +This is a multipart message in MIME format. + +--GenkgoMailV2Part187e28bf3cb4 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit + +Hello World + +--GenkgoMailV2Part187e28bf3cb4 +Content-Type: text/html; charset=us-ascii +Content-Transfer-Encoding: 7bit + +

Hello World

+--GenkgoMailV2Part187e28bf3cb4-- diff --git a/test/Stub/Quote/double-quoted-html-and-text.eml b/test/Stub/Quote/double-quoted-html-and-text.eml index 1b160008..606b477f 100644 --- a/test/Stub/Quote/double-quoted-html-and-text.eml +++ b/test/Stub/Quote/double-quoted-html-and-text.eml @@ -17,10 +17,11 @@ me (Jan 1, 2015, 12:00:00 AM): >>Hello World --GenkgoMailV2Part09d54671f9f5 -Content-Type: text/html; charset=us-ascii +Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit -Quoted Quoted

Lorem Ipsum
Lorem +Quoted Quoted

Lorem Ipsum
Lorem Ipsum

me (Jan 1, 2015, 12:00:00 AM):

Hello Universe

me (Jan 1, 2017, 12:00:00 AM):

Hello World

diff --git a/test/Stub/Quote/html-and-text-quoted.html b/test/Stub/Quote/html-and-text-quoted.html index 5bab8a66..835b60ca 100644 --- a/test/Stub/Quote/html-and-text-quoted.html +++ b/test/Stub/Quote/html-and-text-quoted.html @@ -1 +1 @@ -Universe TitleHello Universe

me (Jan 1, 2017, 12:00:00 AM):

Hello World

+Universe TitleHello Universe

me (Jan 1, 2017, 12:00:00 AM):

Hello World

diff --git a/test/Stub/Quote/no-body-tag-quoted.html b/test/Stub/Quote/no-body-tag-quoted.html index cf84661f..aabb043f 100644 --- a/test/Stub/Quote/no-body-tag-quoted.html +++ b/test/Stub/Quote/no-body-tag-quoted.html @@ -1 +1 @@ -

Hello Universe

me (Jan 1, 2017, 12:00:00 AM):

Hello World

+

Hello Universe

me (Jan 1, 2017, 12:00:00 AM):

Hello World

diff --git a/test/Stub/Quote/non-utf8-encoding-quoted.crlf.html b/test/Stub/Quote/non-utf8-encoding-quoted.crlf.html new file mode 100644 index 00000000..79b361d8 --- /dev/null +++ b/test/Stub/Quote/non-utf8-encoding-quoted.crlf.html @@ -0,0 +1,3 @@ +Universe TitleHello Universe €

me (Jan 1, 2017, 12:00:00 AM):

+

サーバリスト画面では、

+
diff --git a/test/Stub/Quote/non-utf8-encoding-quoted.crlf.txt b/test/Stub/Quote/non-utf8-encoding-quoted.crlf.txt new file mode 100644 index 00000000..04bbfd8c --- /dev/null +++ b/test/Stub/Quote/non-utf8-encoding-quoted.crlf.txt @@ -0,0 +1,4 @@ +Hello Universe € + +me (Jan 1, 2017, 12:00:00 AM): +>サーバリスト画面では、 diff --git a/test/Stub/Quote/non-utf8-encoding.eml b/test/Stub/Quote/non-utf8-encoding.eml new file mode 100644 index 00000000..70192dd4 --- /dev/null +++ b/test/Stub/Quote/non-utf8-encoding.eml @@ -0,0 +1,31 @@ +Date: Sun, 01 Jan 2017 00:00:00 +0000 +Subject: Hello World +From: me +To: you +Cc: other +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary=GenkgoMailV2Part187e28bf3cb4 + +This is a multipart message in MIME format. + +--GenkgoMailV2Part187e28bf3cb4 +Content-Type: text/plain; charset=Shift-JIS +Content-Transfer-Encoding: 7bit + +T[oXgʂł́A + +--GenkgoMailV2Part187e28bf3cb4 +Content-Type: text/html; charset=Shift-JIS +Content-Transfer-Encoding: 7bit + + + + + + Title + + +

T[oXgʂł́A

+ + +--GenkgoMailV2Part187e28bf3cb4-- diff --git a/test/Unit/AlternativeTextTest.php b/test/Unit/AlternativeTextTest.php index cc5049ca..c4616e18 100644 --- a/test/Unit/AlternativeTextTest.php +++ b/test/Unit/AlternativeTextTest.php @@ -18,7 +18,7 @@ public function it_converts_html_to_plain_text($htmlFile, $txtFile): void $text = \file_get_contents(__DIR__ . '/../Stub/AlternativeText/' . $txtFile); $alternativeText = AlternativeText::fromHtml($html); - $this->assertEquals($text, (string) $alternativeText); + $this->assertEquals(\trim($text), \trim((string) $alternativeText)); } /** @@ -42,6 +42,8 @@ public function provideHtmlFiles(): array ['simple.html', 'simple.crlf.txt'], ['error.html', 'error.txt'], ['bug_59.html', 'bug_59.txt'], + ['encoding.html', 'encoding.txt'], + ['different-encoding.html', 'different-encoding.txt'], ]; } } diff --git a/test/Unit/MessageBodyCollectionTest.php b/test/Unit/MessageBodyCollectionTest.php index c075e81e..1d1dce6b 100644 --- a/test/Unit/MessageBodyCollectionTest.php +++ b/test/Unit/MessageBodyCollectionTest.php @@ -255,7 +255,7 @@ public function it_extracts_body_from_full_formatted_messages(): void ); $body = MessageBodyCollection::extract($message); - $this->assertSame('

Hello World

', $body->getHtml()); + $this->assertSame("

Hello World

", $body->getHtml()); $this->assertSame('Hello World', (string)$body->getText()); $this->assertCount(1, $body->getEmbeddedImages()); $this->assertCount(1, $body->getAttachments()); @@ -271,7 +271,7 @@ public function it_extracts_body_from_html_only_messages(): void ); $body = MessageBodyCollection::extract($message); - $this->assertSame('

Hello World

', $body->getHtml()); + $this->assertSame("

Hello World

", $body->getHtml()); $this->assertSame('', (string)$body->getText()); $this->assertCount(0, $body->getEmbeddedImages()); $this->assertCount(0, $body->getAttachments()); @@ -303,7 +303,7 @@ public function it_extracts_body_from_html_and_text_messages(): void ); $body = MessageBodyCollection::extract($message); - $this->assertSame('

Hello World

', $body->getHtml()); + $this->assertSame("

Hello World

", $body->getHtml()); $this->assertSame('Hello World', (string)$body->getText()); $this->assertCount(0, $body->getEmbeddedImages()); $this->assertCount(0, $body->getAttachments()); diff --git a/test/Unit/Quotation/FixedQuotationTest.php b/test/Unit/Quotation/FixedQuotationTest.php index 6e0b07cf..ecc0321c 100644 --- a/test/Unit/Quotation/FixedQuotationTest.php +++ b/test/Unit/Quotation/FixedQuotationTest.php @@ -159,4 +159,56 @@ public function it_quotes_base64_content(): void $this->replaceBoundaries((string)$quotation->quote($reply, $original)->createMessage()) ); } + + /** + * @test + */ + public function it_quotes_special_characters_correct_encoding(): void + { + $quotation = new FixedQuotation(); + + $original = GenericMessage::fromString( + \file_get_contents(__DIR__ . '/../../Stub/Quote/correct-encoding.eml') + ); + + $reply = new MessageBodyCollection( + 'Universe TitleHello Universe €' + ); + + $this->assertSame( + \trim(\file_get_contents(__DIR__ . '/../../Stub/Quote/correct-encoding-quoted.html')), + $quotation->quote($reply, $original)->getHtml() + ); + + $this->assertSame( + \trim(\file_get_contents(__DIR__ . '/../../Stub/Quote/correct-encoding-quoted.crlf.txt')), + (string)$quotation->quote($reply, $original)->getText() + ); + } + + /** + * @test + */ + public function it_quotes_non_utf8_encoding(): void + { + $quotation = new FixedQuotation(); + + $original = GenericMessage::fromString( + \file_get_contents(__DIR__ . '/../../Stub/Quote/non-utf8-encoding.eml') + ); + + $reply = new MessageBodyCollection( + 'Universe TitleHello Universe €' + ); + + $this->assertSame( + \trim(\file_get_contents(__DIR__ . '/../../Stub/Quote/non-utf8-encoding-quoted.crlf.html')), + $quotation->quote($reply, $original)->getHtml() + ); + + $this->assertSame( + \trim(\file_get_contents(__DIR__ . '/../../Stub/Quote/non-utf8-encoding-quoted.crlf.txt')), + (string)$quotation->quote($reply, $original)->getText() + ); + } }