Skip to content

Commit

Permalink
Quote encoding (#85)
Browse files Browse the repository at this point in the history
* improve encoding of e-mail creation, extraction and quotation
  • Loading branch information
frederikbosch authored Sep 29, 2020
1 parent 31b88f9 commit fdf4380
Show file tree
Hide file tree
Showing 34 changed files with 327 additions and 65 deletions.
3 changes: 2 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Declare files that will always have CRLF line endings on checkout.
*.eml text eol=crlf
*.crlf.txt text eol=crlf
*.crlf.txt text eol=crlf
*.crlf.html text eol=crlf
58 changes: 57 additions & 1 deletion src/AlternativeText.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

final class AlternativeText
{
private const DEFAULT_CHARSET = '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>';

/**
* @var string
*/
Expand Down Expand Up @@ -58,7 +60,33 @@ private function normalizeSpace(string $string): string
}

/**
* @param string $html AlternativeText
* @param string $text
* @param string $charset
* @return AlternativeText
*/
public static function fromEncodedText(string $text, string $charset): AlternativeText
{
if ($charset === '') {
return new self($text);
}

$charset = \strtoupper($charset);
if ($charset === 'UTF-8' || $charset === 'UTF8') {
return new self($text);
}

$converted = \iconv($charset, 'UTF-8', $text);
if ($converted === false) {
throw new \InvalidArgumentException(
'The encoded text cannot be converted to UTF-8. Is the charset ' . $charset . ' correct?'
);
}

return new self($converted);
}

/**
* @param string $html
* @return AlternativeText
*/
public static function fromHtml(string $html): AlternativeText
Expand All @@ -67,6 +95,7 @@ public static function fromHtml(string $html): AlternativeText
return new self($html);
}

$html = self::ensureHtmlCharset($html);
$html = \preg_replace('/\h\h+/', ' ', (string)$html);
$html = \preg_replace('/\v/', '', (string)$html);
$text = new self((string)$html);
Expand Down Expand Up @@ -374,4 +403,31 @@ private function wrap(string $unwrappedText, int $width = 75): string

return \implode('', $result);
}

/**
* @param string $html
* @return string
*/
private static function ensureHtmlCharset(string $html): string
{
if ($html === '') {
return '';
}

if (\strpos($html, 'content="text/html') !== false || \strpos($html, 'charset="') !== false) {
return $html;
}

$headCloseStart = \strpos($html, '</head>');
if ($headCloseStart !== false) {
return \substr_replace($html, self::DEFAULT_CHARSET, $headCloseStart, 0);
}

$bodyOpenStart = \strpos($html, '<body');
if ($bodyOpenStart !== false) {
return \substr_replace($html, '<head>' . self::DEFAULT_CHARSET . '</head>', $bodyOpenStart, 0);
}

return '<html><head>' . self::DEFAULT_CHARSET . '</head><body>' . $html . '</body></html>';
}
}
78 changes: 64 additions & 14 deletions src/MessageBodyCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
namespace Genkgo\Mail;

use Genkgo\Mail\Header\Cc;
use Genkgo\Mail\Header\ContentTransferEncoding;
use Genkgo\Mail\Header\ContentType;
use Genkgo\Mail\Header\GenericHeader;
use Genkgo\Mail\Header\HeaderName;
Expand All @@ -25,6 +24,8 @@

final class MessageBodyCollection
{
private const DEFAULT_CHARSET = '<meta http-equiv="Content-Type" content="text/html; charset=%s"/>';

/**
* @var string
*/
Expand All @@ -50,8 +51,8 @@ final class MessageBodyCollection
*/
public function __construct(string $html = '')
{
$this->html = $html;
$this->text = AlternativeText::fromHtml($html);
$this->html = self::ensureHtmlCharset($html);
$this->text = AlternativeText::fromHtml($this->html);
}

/**
Expand All @@ -61,8 +62,8 @@ public function __construct(string $html = '')
public function withHtml(string $html): self
{
$clone = clone $this;
$clone->html = $html;
$clone->text = AlternativeText::fromHtml($html);
$clone->html = self::ensureHtmlCharset($html);
$clone->text = AlternativeText::fromHtml($clone->html);
return $clone;
}

Expand Down Expand Up @@ -317,22 +318,22 @@ private function createMessageHumanReadable(): PartInterface
private function createMessageText(): PartInterface
{
if ($this->text->isEmpty() && $this->html === '') {
return new PlainTextPart('');
return new PlainTextPart('', 'us-ascii');
}

if ($this->text->isEmpty()) {
return new HtmlPart($this->html);
}

if ($this->html === '') {
return new PlainTextPart((string)$this->text);
return new PlainTextPart((string)$this->text, 'UTF-8');
}

return (new MultiPart(
Boundary::newRandom(),
new ContentType('multipart/alternative')
))
->withPart(new PlainTextPart((string)$this->text))
->withPart(new PlainTextPart((string)$this->text, 'UTF-8'))
->withPart(new HtmlPart($this->html));
}

Expand All @@ -349,13 +350,23 @@ public static function extract(MessageInterface $message): MessageBodyCollection
} catch (\InvalidArgumentException $e) {
foreach ($message->getHeader('Content-Type') as $header) {
$contentType = $header->getValue()->getRaw();
try {
$charset = $header->getValue()->getParameter('charset')->getValue();
} catch (\UnexpectedValueException $e) {
$charset = '';
}

if ($contentType === 'text/html') {
$collection->html = \rtrim((string)self::decodeMessageBody($message));
$collection->html = self::ensureHtmlCharset(
\rtrim((string)self::decodeMessageBody($message)),
$charset
);
}

if ($contentType === 'text/plain') {
$collection->text = new AlternativeText(
\rtrim((string)self::decodeMessageBody($message))
$collection->text = AlternativeText::fromEncodedText(
\rtrim((string)self::decodeMessageBody($message)),
$charset
);
}
}
Expand All @@ -370,16 +381,27 @@ public static function extract(MessageInterface $message): MessageBodyCollection
private function extractFromMimePart(MultiPartInterface $parts): void
{
foreach ($parts->getParts() as $part) {
$contentType = $part->getHeader('Content-Type')->getValue()->getRaw();
$header = $part->getHeader('Content-Type');
$contentType = $header->getValue()->getRaw();

try {
$charset = $header->getValue()->getParameter('charset')->getValue();
} catch (\UnexpectedValueException $e) {
$charset = '';
}

$hasDisposition = $part->hasHeader('Content-Disposition');

if (!$hasDisposition && $contentType === 'text/html') {
$this->html = (string)new MimeBodyDecodedStream($part);
$this->html = self::ensureHtmlCharset(
(string)new MimeBodyDecodedStream($part),
$charset
);
continue;
}

if (!$hasDisposition && $contentType === 'text/plain') {
$this->text = new AlternativeText((string)new MimeBodyDecodedStream($part));
$this->text = AlternativeText::fromEncodedText((string)new MimeBodyDecodedStream($part), $charset);
continue;
}

Expand Down Expand Up @@ -482,4 +504,32 @@ private static function decodeMessageBody(MessageInterface $message): StreamInte

return $message->getBody();
}

/**
* @param string $html
* @param string $charset
* @return string
*/
private static function ensureHtmlCharset(string $html, string $charset = 'UTF-8'): string
{
if ($html === '') {
return '';
}

if (\strpos($html, 'content="text/html') !== false || \strpos($html, 'charset="') !== false) {
return $html;
}

$headCloseStart = \strpos($html, '</head>');
if ($headCloseStart !== false) {
return \substr_replace($html, \sprintf(self::DEFAULT_CHARSET, $charset), $headCloseStart, 0);
}

$bodyOpenStart = \strpos($html, '<body');
if ($bodyOpenStart !== false) {
return \substr_replace($html, '<head>' . \sprintf(self::DEFAULT_CHARSET, $charset) . '</head>', $bodyOpenStart, 0);
}

return '<html><head>' . \sprintf(self::DEFAULT_CHARSET, $charset) . '</head><body>' . $html . '</body></html>';
}
}
5 changes: 3 additions & 2 deletions src/Mime/PlainTextPart.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ final class PlainTextPart implements PartInterface

/**
* @param string $text
* @param string $charset
*/
public function __construct(string $text)
public function __construct(string $text, string $charset = '')
{
$stream = new OptimalTransferEncodedTextStream($text);
$encoding = $stream->getMetadata(['transfer-encoding'])['transfer-encoding'];

$this->decoratedPart = (new GenericPart())
->withBody($stream)
->withHeader(new ContentType('text/plain'))
->withHeader(new ContentType('text/plain', $charset))
->withHeader(new ContentTransferEncoding($encoding));
}

Expand Down
17 changes: 9 additions & 8 deletions src/MimeMessageFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,16 @@ private function pickOptimalCharset(PartInterface $part): PartInterface
return $part;
}
} catch (\UnexpectedValueException $e) {
return $part->withHeader(
new ParsedHeader(
new HeaderName('Content-Type'),
$contentTypeHeader->withParameter(
new HeaderValueParameter('charset', 'us-ascii')
)
)
);
}

return $part->withHeader(
new ParsedHeader(
new HeaderName('Content-Type'),
$contentTypeHeader->withParameter(
new HeaderValueParameter('charset', 'us-ascii')
)
)
);
return $part;
}
}
2 changes: 1 addition & 1 deletion test/Integration/MessageBodyCollectionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ final class MessageBodyCollectionTest extends AbstractTestCase
public function it_can_parse_a_formatted_message_string_into_a_generic_message(): void
{
$message = (new MessageBodyCollection())
->withHtml('<html><body><p>Hello World</p></body></html>')
->withHtml('<html><body><p>€ 0? Hello World?</p></body></html>')
->withAttachment(
ResourceAttachment::fromString(
'Attachment text',
Expand Down
10 changes: 10 additions & 0 deletions test/Stub/AlternativeText/different-encoding.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=shift_jis">
<title>Title</title>
</head>
<body>
<p>サーバリスト画面では、</p>
</body>
</html>
1 change: 1 addition & 0 deletions test/Stub/AlternativeText/different-encoding.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
サーバリスト画面では、
9 changes: 9 additions & 0 deletions test/Stub/AlternativeText/encoding.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Title</title>
</head>
<body>
<p>zażółć gęślą jaźń</p>
</body>
</html>
1 change: 1 addition & 0 deletions test/Stub/AlternativeText/encoding.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
zażółć gęślą jaźń
19 changes: 11 additions & 8 deletions test/Stub/MessageBodyCollection/attached-html-and-text.eml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ Content-Type: multipart/alternative; boundary=GenkgoMailV2Part1088cfc2cd8d
--GenkgoMailV2Part1088cfc2cd8d
Content-Type: text/plain; charset=us-ascii
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
Reply text
--GenkgoMailV2Part1088cfc2cd8d
Content-Type: text/html; charset=us-ascii
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 7bit

<html><body><p>Reply text</p></body></html>
<html><head><meta http-equiv="Content-Type" content="text/html;
charset=UTF-8"/></head><body><p>Reply text</p></body></html>
--GenkgoMailV2Part1088cfc2cd8d--

--GenkgoMailV2Partd00da1fe277a
Expand All @@ -30,9 +31,11 @@ PHlvdUBleGFtcGxlLmNvbT4NCkNjOiBvdGhlciA8b3RoZXJAZXhhbXBsZS5jb20+DQpNSU1FLVZl
cnNpb246IDEuMA0KQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5
PUdlbmtnb01haWxWMlBhcnQxODdlMjhiZjNjYjQNCg0KVGhpcyBpcyBhIG11bHRpcGFydCBtZXNz
YWdlIGluIE1JTUUgZm9ybWF0Lg0KDQotLUdlbmtnb01haWxWMlBhcnQxODdlMjhiZjNjYjQNCkNv
bnRlbnQtVHlwZTogdGV4dC9wbGFpbjsgY2hhcnNldD11cy1hc2NpaQ0KQ29udGVudC1UcmFuc2Zl
ci1FbmNvZGluZzogN2JpdA0KDQpIZWxsbyBXb3JsZA0KDQotLUdlbmtnb01haWxWMlBhcnQxODdl
MjhiZjNjYjQNCkNvbnRlbnQtVHlwZTogdGV4dC9odG1sOyBjaGFyc2V0PXVzLWFzY2lpDQpDb250
ZW50LVRyYW5zZmVyLUVuY29kaW5nOiA3Yml0DQoNCjxodG1sPjxib2R5PjxwPkhlbGxvIFdvcmxk
PC9wPjwvYm9keT48L2h0bWw+DQotLUdlbmtnb01haWxWMlBhcnQxODdlMjhiZjNjYjQtLQ0K
bnRlbnQtVHlwZTogdGV4dC9wbGFpbjsgY2hhcnNldD1VVEYtOA0KQ29udGVudC1UcmFuc2Zlci1F
bmNvZGluZzogN2JpdA0KDQpIZWxsbyBXb3JsZA0KDQotLUdlbmtnb01haWxWMlBhcnQxODdlMjhi
ZjNjYjQNCkNvbnRlbnQtVHlwZTogdGV4dC9odG1sOyBjaGFyc2V0PVVURi04DQpDb250ZW50LVRy
YW5zZmVyLUVuY29kaW5nOiA3Yml0DQoNCjxodG1sPjxoZWFkPjxtZXRhIGh0dHAtZXF1aXY9IkNv
bnRlbnQtVHlwZSIgY29udGVudD0idGV4dC9odG1sOw0KY2hhcnNldD1VVEYtOCIvPjwvaGVhZD48
Ym9keT48cD5IZWxsbyBXb3JsZDwvcD48L2JvZHk+PC9odG1sPg0KLS1HZW5rZ29NYWlsVjJQYXJ0
MTg3ZTI4YmYzY2I0LS0NCg==
--GenkgoMailV2Partd00da1fe277a--
5 changes: 3 additions & 2 deletions test/Stub/MessageBodyCollection/forward-html-and-text.eml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ me (unknown):
>Hello World
--boundary
Content-Type: text/html; charset=us-ascii
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 7bit

<html><body><p>Reply text</p><p>me (unknown):</p><blockquote
<html><head><meta http-equiv="Content-Type" content="text/html;
charset=UTF-8"></head><body><p>Reply text</p><p>me (unknown):</p><blockquote
type="cite"><p>Hello World</p></blockquote></body></html>
--boundary--
7 changes: 4 additions & 3 deletions test/Stub/MessageBodyCollection/full-formatted-message.eml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@ Content-Type: multipart/alternative; boundary=GenkgoMailV2Part8c6b95fae17e
--GenkgoMailV2Part8c6b95fae17e
Content-Type: text/plain; charset=us-ascii
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
Hello World
--GenkgoMailV2Part8c6b95fae17e
Content-Type: text/html; charset=us-ascii
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 7bit

<html><body><p>Hello World</p></body></html>
<html><head><meta http-equiv="Content-Type" content="text/html;
charset=UTF-8"/></head><body><p>Hello World</p></body></html>
--GenkgoMailV2Part8c6b95fae17e--

--GenkgoMailV2Partecbe5baa2673
Expand Down
Loading

0 comments on commit fdf4380

Please sign in to comment.