Skip to content

Commit

Permalink
Merge pull request #55 from xp-framework/feature/heredoc
Browse files Browse the repository at this point in the history
Add suport for heredoc (and its nowdoc variant)
  • Loading branch information
thekid authored Jan 26, 2025
2 parents d8b7370 + af447f3 commit a7b7b95
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 25 deletions.
66 changes: 41 additions & 25 deletions src/main/php/lang/ast/Tokens.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
class Tokens {
const DELIMITERS = " \r\n\t'\$\"`=,;.:?!(){}[]#+-*/|&^@%~<>";
const OPERATORS = [
'<' => ['<=>', '<<=', '<=', '<<', '<>', '<?'],
'<' => ['<=>', '<<=', '<<<', '<=', '<<', '<>', '<?'],
'>' => ['>>=', '>=', '>>'],
'=' => ['===', '=>', '=='],
'!' => ['!==', '!='],
Expand All @@ -22,7 +22,7 @@ class Tokens {
'+' => ['+=', '++'],
'-' => ['-=', '--', '->'],
'*' => ['**=', '*=', '**'],
'/' => ['/='],
'/' => ['/=', '//', '/*'],
'~' => ['~='],
'%' => ['%='],
'?' => ['?->', '??=', '?:', '??'],
Expand Down Expand Up @@ -106,10 +106,10 @@ public function iterator($language) {
$end= '\\'.$token;
do {
$chunk= $next($end);
if (null === $chunk) {
throw new FormatException('Unclosed string literal starting at line '.$line);
} else if ('\\' === $chunk) {
if ('\\' === $chunk) {
$string.= $chunk.$next($end);
} else if (null === $chunk) {
throw new FormatException('Unclosed string literal starting at line '.$line);
} else {
$string.= $chunk;
}
Expand Down Expand Up @@ -162,43 +162,59 @@ public function iterator($language) {
goto number;
}
$offset-= strlen($t);
} else if ('/' === $token) {
$t= $next(self::DELIMITERS);
if ('/' === $t) {
yield new Token(null, 'comment', '//'.$next("\r\n"), $line);
continue;
} else if ('*' === $t) {
$comment= '';
do {
$chunk= $next('/');
$comment.= $chunk;
} while (null !== $chunk && '*' !== $chunk[strlen($chunk) - 1]);
$comment.= $next('/');
yield new Token(null, '*' === $comment[0] ? 'apidoc' : 'comment', '/*'.$comment, $line);
$line+= substr_count($comment, "\n");
continue;
}
null === $t || $offset-= strlen($t);
}

// Handle combined operators. First, ensure we have enough bytes in our buffer
// Our longest operator is 3 characters, hardcode this here.
if (self::OPERATORS[$token]) {
if ($combined= self::OPERATORS[$token]) {
$offset--;
while ($offset + 3 > $length && $this->in->available()) {
$buffer.= $this->in->read(8192);
$length= strlen($buffer);
}
foreach (self::OPERATORS[$token] as $operator) {
foreach ($combined as $operator) {
if ($offset + strlen($operator) > $length) continue;
if (0 === substr_compare($buffer, $operator, $offset, strlen($operator))) {
$token= $operator;
break;
}
}
$offset+= strlen($token);
}

// Distinguish single- and multiline comments as well as heredoc from operators
if ('//' === $token) {
yield new Token(null, 'comment', '//'.$next("\r\n"), $line);
continue;
} else if ('/*' === $token) {
$comment= '';
do {
$chunk= $next('/');
$comment.= $chunk;
} while (null !== $chunk && '*' !== $chunk[strlen($chunk) - 1]);
$comment.= $next('/');
yield new Token(null, '*' === $comment[0] ? 'apidoc' : 'comment', '/*'.$comment, $line);
$line+= substr_count($comment, "\n");
continue;
} else if ('<<<' === $token) {
$label= $next("\r\n");
$end= trim($label, '"\'');
$l= strlen($end);
$string= "<<<{$label}";

heredoc: $token= $next("\r\n");
if (0 === substr_compare($token, $end, $p= strspn($token, ' '), $l)) {
$p+= $l;
$offset-= strlen($token) - $p;
yield new Token($language->symbol('(literal)'), 'heredoc', $string.substr($token, 0, $p), $line);
$line+= substr_count($string, "\n");
continue;
} else if (null === $token) {
throw new FormatException('Unclosed heredoc literal starting at line '.$line);
}
$string.= $token;
goto heredoc;
}
}
yield new Token($language->symbol($token), 'operator', $token, $line);
} else {
yield new Token($language->symbols[$token] ?? $language->symbol('(name)'), 'name', $token, $line);
Expand Down
5 changes: 5 additions & 0 deletions src/test/php/lang/ast/unittest/TokensTest.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ public function unclosed_string_literals($input) {
(new Tokens($input))->iterator($this->language)->current();
}

#[Test, Expect(class: FormatException::class, message: '/Unclosed heredoc literal/'), Values(['<<<EOD', "<<<EOD\n", "<<<EOD\nLine 1"])]
public function unclosed_heredoc_literals($input) {
(new Tokens($input))->iterator($this->language)->current();
}

#[Test, Values(['0', '1', '1_000_000_000'])]
public function integer_literal($input) {
$this->assertTokens([['integer' => str_replace('_', '', $input)]], new Tokens($input));
Expand Down
52 changes: 52 additions & 0 deletions src/test/php/lang/ast/unittest/parse/LiteralsTest.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,56 @@ public function dangling_comma_in_key_value_map($declaration) {
$pair= [new Literal('"key"', self::LINE), new Literal('"value"', self::LINE)];
$this->assertParsed([new ArrayLiteral([$pair], self::LINE)], $declaration);
}

#[Test, Values(['EOD', '"EOD"', "'EOD'"])]
public function heredoc($label) {
$nowdoc= (
"<<<{$label}\n".
"Line 1\n".
"Line 2\n".
"\n".
"Line 4\n".
"EOD"
);
$this->assertParsed([new Literal($nowdoc, self::LINE)], $nowdoc.';');
}

#[Test]
public function heredoc_indentation() {
$nowdoc= (
"<<<EOD\n".
" Line 1\n".
" Line 2\n".
"\n".
" Line 4\n".
" EOD"
);
$this->assertParsed([new Literal($nowdoc, self::LINE)], $nowdoc.';');
}

#[Test]
public function line_number_after_multiline_string() {
$string= (
"'<html>\n".
" ...\n".
"</html>'"
);
$this->assertParsed(
[new Literal($string, self::LINE), new Literal('null', self::LINE + 3)],
$string.";\nnull;"
);
}

#[Test]
public function line_number_after_heredoc() {
$nowdoc= (
"<<<EOD\n".
" Line 1\n".
" EOD"
);
$this->assertParsed(
[new Literal($nowdoc, self::LINE), new Literal('null', self::LINE + 3)],
$nowdoc.";\nnull;"
);
}
}

0 comments on commit a7b7b95

Please sign in to comment.