Skip to content

Commit

Permalink
Merge pull request #134 from Masterminds/ampersand-in-urls
Browse files Browse the repository at this point in the history
Raw & in attributes
  • Loading branch information
goetas authored Sep 1, 2017
2 parents b8afbae + 2a38f56 commit 39e2a7a
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 13 deletions.
10 changes: 7 additions & 3 deletions src/HTML5/Parser/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1074,8 +1074,10 @@ protected function decodeCharacterReference($inAttribute = false)
}
$entity = CharacterReference::lookupDecimal($numeric);
}
} // String entity.
else {
} elseif ($tok === '=' && $inAttribute) {
return '&';
} else { // String entity.

// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
$cname = $this->scanner->getAsciiAlphaNum();
Expand All @@ -1085,7 +1087,9 @@ protected function decodeCharacterReference($inAttribute = false)
// and continue on as the & is not part of an entity. The & will
// be converted to & elsewhere.
if ($entity == null) {
$this->parseError("No match in entity table for '%s'", $cname);
if (!$inAttribute || strlen($cname) === 0) {
$this->parseError("No match in entity table for '%s'", $cname);
}
$this->scanner->unconsume($this->scanner->position() - $start);
return '&';
}
Expand Down
74 changes: 74 additions & 0 deletions test/HTML5/Parser/DOMTreeBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,80 @@ public function testDocument()
$this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
}

public function testBareAmpersand()
{
$html = "<!doctype html>
<html>
<body>
<img src='a&b' />
<img src='a&=' />
<img src='a&=c' />
<img src='a&=9' />
</body>
</html>";
$doc = $this->parse($html);

$this->assertEmpty($this->errors);
$this->assertXmlStringEqualsXmlString('
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
<img src="a&amp;b"/>
<img src="a&amp;="/>
<img src="a&amp;=c"/>
<img src="a&amp;=9"/>
</body>
</html>', $doc->saveXML());
}

public function testBareAmpersandNotAllowedInAttributes()
{
$html = "<!doctype html>
<html>
<body>
<img src='a&' />
<img src='a&+' />
</body>
</html>";
$doc = $this->parse($html);

$this->assertCount(2, $this->errors);
$this->assertXmlStringEqualsXmlString('
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
<img src="a&amp;"/>
<img src="a&amp;+"/>
</body>
</html>', $doc->saveXML());
}
public function testBareAmpersandNotAllowedInBody()
{
$html = "<!doctype html>
<html>
<body>
a&b
a&=
a&=c
a&=9
a&+
a& -- valid
</body>
</html>";
$doc = $this->parse($html);

$this->assertCount(5, $this->errors);
$this->assertXmlStringEqualsXmlString('
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
a&amp;b
a&amp;=
a&amp;=c
a&amp;=9
a&amp;+
a&amp; -- valid
</body>
</html>', $doc->saveXML());
}

public function testStrangeCapitalization()
{
$html = "<!doctype html>
Expand Down
27 changes: 17 additions & 10 deletions test/HTML5/Parser/TokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -622,13 +622,27 @@ public function testTagAttributes()
),
false
),
"<foo a='blue&red'>" => array(
'foo',
array(
'a' => 'blue&red'
),
false
),
"<foo a='blue&amp;red'>" => array(
'foo',
array(
'a' => 'blue&red'
),
false
),
"<foo a='blue&&amp;&red'>" => array(
'foo',
array(
'a' => 'blue&&&red'
),
false
),
"<foo a='blue&&amp;red'>" => array(
'foo',
array(
Expand Down Expand Up @@ -725,18 +739,11 @@ public function testTagAttributes()

// Cause a parse error.
$bad = array(
// This will emit an entity lookup failure for &red.
"<foo a='blue&red'>" => array(
// This will emit an entity lookup failure for &+dark.
"<foo a='blue&+dark'>" => array(
'foo',
array(
'a' => 'blue&red'
),
false
),
"<foo a='blue&&amp;&red'>" => array(
'foo',
array(
'a' => 'blue&&&red'
'a' => 'blue&+dark'
),
false
),
Expand Down

0 comments on commit 39e2a7a

Please sign in to comment.