Skip to content

Commit

Permalink
charData - add Zero-Width Non-Joiner, Zero-Width Joiner, & Word-Joiner
Browse files Browse the repository at this point in the history
textAnsi - don't replace confusable chars (other than whitespace & separators)..  color/highlight is enough
  • Loading branch information
bkdotcom committed Jun 6, 2024
1 parent 5f936f9 commit 809501b
Show file tree
Hide file tree
Showing 20 changed files with 361 additions and 164 deletions.
76 changes: 70 additions & 6 deletions dev/charData.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?php
<?php // @phpcs:ignore SlevomatCodingStandard.Files.FileLength.FileTooLong

/**
* Define characters that will be highlighted / replaced
Expand Down Expand Up @@ -179,110 +179,174 @@
),

"\xC2\xA0" => array(
'class' => 'char-ws',
'codePoint' => '00A0',
'desc' => 'NBSP',
'replaceWith' => '\u{00a0}',
'similarTo' => ' ',
),
"\xE1\x9A\x80" => array(
'class' => 'char-ws',
'codePoint' => '1680',
'desc' => 'Ogham Space Mark',
'replaceWith' => '\u{1680}',
'similarTo' => ' ',
),
"\xE1\xA0\x8E" => array(
'class' => 'char-ws',
'codePoint' => '180E',
'desc' => 'Mongolian Vowel Separator',
'desc' => 'Mongolian Vowel Separator', // not included in Separator Category (Other, Format)
'replaceWith' => '\u{180e}',
'similarTo' => '',
),
"\xE2\x80\x80" => array(
'class' => 'char-ws',
'codePoint' => '2000',
'desc' => 'En Quad',
'replaceWith' => '\u{2000}',
'similarTo' => ' ',
),
"\xE2\x80\x81" => array(
'class' => 'char-ws',
'codePoint' => '2001',
'desc' => 'Em Quad',
'replaceWith' => '\u{2001}',
'similarTo' => ' ',
),
"\xE2\x80\x82" => array(
'class' => 'char-ws',
'codePoint' => '2002',
'desc' => 'En Space',
'replaceWith' => '\u{2002}',
'similarTo' => ' ',
),
"\xE2\x80\x83" => array(
'class' => 'char-ws',
'codePoint' => '2003',
'desc' => 'Em Space',
'replaceWith' => '\u{2003}',
'similarTo' => ' ',
),
"\xE2\x80\x84" => array(
'class' => 'char-ws',
'codePoint' => '2004',
'desc' => 'Three-Per-Em Space',
'desc' => 'Three-Per-Em (thick) Space',
'replaceWith' => '\u{2004}',
'similarTo' => ' ',
),
"\xE2\x80\x85" => array(
'class' => 'char-ws',
'codePoint' => '2005',
'desc' => 'Four-Per-Em Space',
'desc' => 'Four-Per-Em (mid) Space',
'replaceWith' => '\u{2005}',
'similarTo' => ' ',
),
"\xE2\x80\x86" => array(
'class' => 'char-ws',
'codePoint' => '2006',
'desc' => 'Six-Per-Em Space',
'replaceWith' => '\u{2006}',
'similarTo' => ' ',
),
"\xE2\x80\x87" => array(
'class' => 'char-ws',
'codePoint' => '2007',
'desc' => 'Figure Space',
'replaceWith' => '\u{2007}',
'similarTo' => ' ',
),
"\xE2\x80\x88" => array(
'class' => 'char-ws',
'codePoint' => '2008',
'desc' => 'Punctuation Space',
'replaceWith' => '\u{2008}',
'similarTo' => ' ',
),
"\xE2\x80\x89" => array(
'class' => 'char-ws',
'codePoint' => '2009',
'desc' => 'Thin Space',
'replaceWith' => '\u{2009}',
'similarTo' => ' ',
),
"\xE2\x80\x8A" => array(
'class' => 'char-ws',
'codePoint' => '200A',
'desc' => 'Hair Space',
'replaceWith' => '\u{200a}',
'similarTo' => ' ',
),
"\xE2\x80\x8B" => array(
'class' => 'char-ws',
'codePoint' => '200B',
'desc' => 'Zero Width Space', // not included in Separator Category
'desc' => 'Zero Width Space', // not included in Separator Category (Other, Format)
'replaceWith' => '\u{200b}',
'similarTo' => '',
),
"\xE2\x80\x8C" => array(
'class' => 'char-ws',
'codePoint' => '200C',
'desc' => 'Zero Width Non-Joiner', // not included in Separator Category (Other, Format)
'replaceWith' => '\u{200c}',
'similarTo' => '',
),
"\xE2\x80\x8D" => array(
'class' => 'char-ws',
'codePoint' => '200D',
'desc' => 'Zero Width Joiner', // not included in Separator Category (Other, Format)
'replaceWith' => '\u{200d}',
'similarTo' => '',
),
"\xE2\x80\xA8" => array(
'class' => 'char-ws',
'codePoint' => '2028',
'desc' => 'Line Separator',
'replaceWith' => '\u{2028}',
'similarTo' => "\n",
),
"\xE2\x80\xA9" => array(
'class' => 'char-ws',
'codePoint' => '2029',
'desc' => 'Paragraph Separator',
'replaceWith' => '\u{2029}',
'similarTo' => "\n",
),
"\xE2\x80\xAF" => array(
'class' => 'char-ws',
'codePoint' => '202F',
'desc' => 'Narrow No-Break Space',
'replaceWith' => '\u{202f}',
'similarTo' => ' ',
),
"\xE2\x81\x9F" => array(
'class' => 'char-ws',
'codePoint' => '202F',
'desc' => 'Medium Mathematical Space',
'replaceWith' => '\u{205f}',
'similarTo' => ' ',
),
"\xE2\x81\xA0" => array(
'class' => 'char-ws',
'codePoint' => '2060',
'desc' => 'Word Joiner', // Not included in Separator Category (Other, Format)
'replaceWith' => '\u{2060}',
'similarTo' => '',
),
"\xE3\x80\x80" => array(
'class' => 'char-ws',
'codePoint' => '3000',
'desc' => 'Ideographic Space',
'replaceWith' => '\u{3000}',
'similarTo' => ' ',
),
"\xEF\xBB\xBF" => array(
'class' => 'char-ws',
'codePoint' => 'FEFF',
'desc' => 'BOM / Zero Width No-Break Space', // not included in Separator Category
'desc' => 'BOM / Zero Width No-Break Space', // not included in Separator Category (Other, Format)
'replaceWith' => '\u{feff}',
'similarTo' => '',
),

"\xEF\xBF\xBD" => array(
'codePoint' => 'FFFD',
'desc' => 'Replacement Character',
Expand Down
3 changes: 1 addition & 2 deletions src/Debug/Abstraction/Object/Methods.php
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,8 @@ public function toString(Abstraction $abs)
}
$val = null;
try {
$val = $obj->__toString();
/** @var Abstraction|string */
$val = $this->abstracter->crate($val, $abs['debugMethod'], $abs['hist']);
$val = $this->abstracter->crate($obj->__toString(), $abs['debugMethod'], $abs['hist']);
} catch (Exception $e) {
// yes, __toString can throw exception..
// example: SplFileObject->__toString will throw exception if file doesn't exist
Expand Down
5 changes: 3 additions & 2 deletions src/Debug/Dump/AbstractValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public function __construct(Dumper $dumper)
$this->optionStackPush(array(
'addQuotes' => true,
'charHighlight' => true,
'charReplace' => true,
));
}

Expand Down Expand Up @@ -118,7 +119,7 @@ public function dump($val, $opts = array())
*
* @param string $str String to search for chars
*
* @return array
* @return list<string>
*/
public function findChars($str)
{
Expand Down Expand Up @@ -338,7 +339,7 @@ abstract protected function dumpString($val, Abstraction $abs = null);

/**
* escape hex and unicode escape sequences
* this allows use to differentiate between '\u{03c5}' and a replaced "\u{03c5}"
* this allows us to differentiate between '\u{03c5}' and a replaced "\u{03c5}"
*
* @param string $val string value
*
Expand Down
42 changes: 27 additions & 15 deletions src/Debug/Dump/Base/Value.php
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,31 @@ protected function getObject()
return $this->lazyObject;
}

/**
* Get replacement for character
*
* @param string $char single multi-byte character
* @param bool $charReplace whether to replace char with code-point
* control chars are always replaced
*
* @return string \x##, \u{####}, or original char
*/
protected function charReplacement($char, $charReplace)
{
if (\ord($char) < 0x80) {
// always replace control chars
return '\\x' . \str_pad(\dechex(\ord($char)), 2, '0', STR_PAD_LEFT);
}
if (isset($this->charData[$char]['class']) && \strpos($this->charData[$char]['class'], 'char-ws') !== false) {
// always replace "whitespace" chars
$charReplace = true;
}
if ($charReplace) {
return '\\u{' . \str_pad(\dechex(Utf8::ord($char)), 4, '0', STR_PAD_LEFT) . '}';
}
return $char;
}

/**
* Highlight confusable and other characters
*
Expand All @@ -291,24 +316,11 @@ protected function getObject()
protected function highlightChars($str)
{
$chars = $this->findChars($str);
$charReplace = $this->optionGet('charReplace');
foreach ($chars as $char) {
$replacement = $this->charReplacement($char);
$replacement = $this->charReplacement($char, $charReplace);
$str = \str_replace($char, $replacement, $str);
}
return $str;
}

/**
* Get ordinal replacement for character
*
* @param string $char single multi-byte character
*
* @return string \x## or \u{####}
*/
protected function charReplacement($char)
{
return \ord($char) < 0x80
? '\\x' . \str_pad(\dechex(\ord($char)), 2, '0', STR_PAD_LEFT)
: '\\u{' . \str_pad(\dechex(Utf8::ord($char)), 4, '0', STR_PAD_LEFT) . '}';
}
}
2 changes: 1 addition & 1 deletion src/Debug/Dump/TextAnsi.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class TextAnsi extends Text
'escapeCodes' => array(
'arrayKey' => "\e[38;5;83m", // yellow
'binary' => "\e[30;48;5;250m", // black foreground / grey background
'char' => "\e[38;5;208m", // orange
'char' => "\e[34;48;5;14m", // blue foreground / light-blue background
'excluded' => "\e[38;5;9m", // red
'false' => "\e[91m", // red
'keyword' => "\e[38;5;45m", // blue
Expand Down
17 changes: 16 additions & 1 deletion src/Debug/Dump/TextAnsi/Value.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use bdk\Debug\Abstraction\Object\Abstraction as ObjectAbstraction;
use bdk\Debug\Abstraction\Type;
use bdk\Debug\Dump\Text\Value as TextValue;
use bdk\Debug\Dump\TextAnsi as Dumper;
use bdk\Debug\Dump\TextAnsi\TextAnsiObject;
use bdk\Debug\Utility\Utf8;

Expand All @@ -30,6 +31,19 @@ class Value extends TextValue
/** @var TextAnsiObject */
protected $lazyObject;

/**
* Constructor
*
* @param Dumper $dumper "parent" dump class
*/
public function __construct(Dumper $dumper)
{
parent::__construct($dumper); // sets debug and dumper
$this->optionStackPush(array(
'charReplace' => false,
));
}

/**
* Get escape reset sequence
*
Expand Down Expand Up @@ -356,9 +370,10 @@ protected function getObject()
protected function highlightChars($str)
{
$chars = $this->findChars($str);
$charReplace = $this->optionGet('charReplace');
foreach ($chars as $char) {
$replacement = $this->cfg['escapeCodes']['char']
. $this->charReplacement($char)
. $this->charReplacement($char, $charReplace)
. $this->escapeReset;
$str = \str_replace($char, $replacement, $str);
}
Expand Down
Loading

0 comments on commit 809501b

Please sign in to comment.