-
-
Notifications
You must be signed in to change notification settings - Fork 139
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[PHP 8.4][Intl] Add
grapheme_str_split
Add a polyfill for the `grapheme_str_split` function added in PHP 8.4. Requires PHP 7.3, because the polyfill is based on `\X` Regex, and it only works properly on PCRE2, which [only comes with PHP 7.3+](https://php.watch/versions/7.3/pcre2). Further, there are some cases that the polyfill cannot split complex characters (such as two consecutive country flag Emojis). This is now fixed in [PCRE2Project/pcre2#410](PCRE2Project/pcre2#410). However, this change will likely only make it to PHP 8.4. References: - [RFC: Grapheme cluster for `str_split` function: `grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split) - [PHP.Watch: PHP 8.4: New `grapheme_str_split` function](https://php.watch/versions/8.4/grapheme_str_split)
- Loading branch information
Showing
11 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
* - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string | ||
* - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack | ||
* - grapheme_substr - Return part of a string | ||
* - grapheme_str_split - Splits a string into an array of individual or chunks of graphemes. | ||
* | ||
* @author Nicolas Grekas <[email protected]> | ||
* | ||
|
@@ -191,6 +192,38 @@ public static function grapheme_strstr($s, $needle, $beforeNeedle = false) | |
return mb_strstr($s, $needle, $beforeNeedle, 'UTF-8'); | ||
} | ||
|
||
public static function grapheme_str_split($s, $len = 1) { | ||
if ($len < 0 || $len > 1073741823) { | ||
if (80000 > \PHP_VERSION_ID) { | ||
return false; | ||
} | ||
|
||
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.'); | ||
} | ||
|
||
if ($s === '') { | ||
return []; | ||
} | ||
|
||
preg_match_all('/('.SYMFONY_GRAPHEME_CLUSTER_RX.')/u', $s, $matches); | ||
|
||
if (empty($matches[0])) { | ||
return false; | ||
} | ||
|
||
if ($len === 1) { | ||
return $matches[0]; | ||
} | ||
|
||
$chunks = array_chunk($matches[0], $len); | ||
|
||
foreach ($chunks as &$chunk) { | ||
$chunk = implode('', $chunk); | ||
} | ||
|
||
return $chunks; | ||
} | ||
|
||
private static function grapheme_position($s, $needle, $offset, $mode) | ||
{ | ||
$needle = (string) $needle; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
<?php | ||
|
||
/* | ||
* This file is part of the Symfony package. | ||
* | ||
* (c) Fabien Potencier <[email protected]> | ||
* | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
*/ | ||
|
||
use Symfony\Polyfill\Php84 as p; | ||
|
||
if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) { | ||
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); } | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,8 @@ | |
|
||
namespace Symfony\Polyfill\Php84; | ||
|
||
use Symfony\Polyfill\Intl\Grapheme\Grapheme; | ||
|
||
/** | ||
* @author Ayesh Karunaratne <[email protected]> | ||
* @author Pierre Ambroise <[email protected]> | ||
|
@@ -169,4 +171,37 @@ private static function mb_internal_trim(string $regex, string $string, ?string | |
|
||
return mb_convert_encoding($string, $encoding, 'UTF-8'); | ||
} | ||
|
||
public static function grapheme_str_split(string $string, int $length) | ||
{ | ||
if ($length < 0 || $length > 1073741823) { | ||
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.'); | ||
} | ||
|
||
if ($string === '') { | ||
return []; | ||
} | ||
|
||
|
||
$regex = ((float) \PCRE_VERSION < 10 ? (float) \PCRE_VERSION >= 8.32 : (float) \PCRE_VERSION >= 10.39) | ||
? '\X' | ||
: '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])'; | ||
|
||
preg_match_all('/'. $regex .'/u', $string, $matches); | ||
|
||
if (empty($matches[0])) { | ||
return false; | ||
} | ||
|
||
if ($length === 1) { | ||
return $matches[0]; | ||
} | ||
|
||
$chunks = array_chunk($matches[0], $length); | ||
foreach ($chunks as &$chunk) { | ||
$chunk = implode('', $chunk); | ||
} | ||
|
||
return $chunks; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters