diff --git a/app/Services/MetaData.php b/app/Services/MetaData.php index 7961e3ef5..707c1e863 100644 --- a/app/Services/MetaData.php +++ b/app/Services/MetaData.php @@ -5,8 +5,10 @@ namespace App\Services; use DOMDocument; -use GuzzleHttp\Exception\RequestException; +use GuzzleHttp\Exception\TransferException; +use GuzzleHttp\Psr7\Exception\MalformedUriException; use Illuminate\Http\Client\ConnectionException; +use Illuminate\Http\Client\HttpClientException; use Illuminate\Support\Collection; use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Http; @@ -97,12 +99,13 @@ private function getData(): Collection $response = Http::get($this->url); if ($response->ok()) { - $data = $this->parse($response->body()); + $data = $this->parse( + $response->body() + ); } - } catch (ConnectionException) { - // Catch but not capture the exception - } catch (RequestException) { - // Catch but not capture the exception + } catch (HttpClientException|MalformedUriException|TransferException) { + // Catch but not capture all base exceptions for: + // Laravel Http Client, Guzzle, and PSR-7 } return $data; diff --git a/app/Services/ParsableContentProviders/LinkProviderParsable.php b/app/Services/ParsableContentProviders/LinkProviderParsable.php index 636cca3d3..24789f366 100644 --- a/app/Services/ParsableContentProviders/LinkProviderParsable.php +++ b/app/Services/ParsableContentProviders/LinkProviderParsable.php @@ -15,50 +15,121 @@ */ public function parse(string $content): string { - return (string) preg_replace_callback( - '/(<(a|code|pre)\s+[^>]*>.*?<\/\2>)|(?|[^<>]*<\/))/is', - function (array $matches): string { - if ($matches[1] !== '') { - return $matches[1]; - } - - $humanUrl = Str::of($matches[0]) - ->replaceMatches('/^https?:\/\//', '') - ->rtrim('/') - ->toString(); - - $isMail = (bool) preg_match('/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/', $humanUrl); - $isHttp = Str::startsWith($matches[0], ['http://', 'https://']); - - if ((! $isMail) && (! $isHttp)) { - return $matches[0]; - } - - $url = $isHttp ? $matches[0] : 'https://'.$matches[0]; - - $url = $isMail ? 'mailto:'.$humanUrl : $url; - - $linkHtml = ''.$humanUrl.''; - - if (! $isMail && $url) { - $service = new MetaData($url); - $metadata = $service->fetch(); - - if ($metadata->isNotEmpty() && ($metadata->has('image') || $metadata->has('html'))) { - $trimmed = trim( - view('components.link-preview-card', [ - 'data' => $metadata, - 'url' => $url, - ])->render() - ); - - return $linkHtml.' '.preg_replace('//', '', $trimmed); - } - } - - return $linkHtml; - }, - str_replace('&', '&', $content) + $tokens = $this->tokenize($content); + + if ($tokens === false) { + return $content; + } + + $processedTokens = array_map( + fn (string $token): string => $this->processToken($token), + $tokens); + + return implode('', $processedTokens); + } + + /** + * Split the content into tokens based on spaces and newlines. + * + * @return list|false + */ + private function tokenize(string $content): array|false + { + return preg_split('/(\s|
)/', $content, -1, PREG_SPLIT_DELIM_CAPTURE); + } + + /** + * Process a single token and convert valid URLs into HTML links. + */ + private function processToken(string $token): string + { + $allowableAttachedCharacters = '{([)]}'; + + $trimmedToken = trim($token, $allowableAttachedCharacters); + + if ($trimmedToken === '' || $trimmedToken === '0') { + return $token; + } + + if (filter_var($trimmedToken, FILTER_VALIDATE_EMAIL)) { + $trimmedToken = "mailto:{$trimmedToken}"; + } elseif (! $this->isValidUrl($trimmedToken)) { + return $token; + } + + $humanUrl = Str::of($trimmedToken) + ->replaceMatches('/^(https?:\/\/|mailto:)/', '') + ->rtrim('/') + ->toString(); + + $linkHtml = "{$humanUrl}"; + + $service = new MetaData($trimmedToken); + $metadata = $service->fetch(); + if ($metadata->isNotEmpty() && ($metadata->has('image') || $metadata->has('html'))) { + $trimmedPreviewCard = trim( + view('components.link-preview-card', [ + 'data' => $metadata, + 'url' => $trimmedToken, + ])->render() + ); + + $linkHtml .= $trimmedPreviewCard; + } + + $leading = $this->getCharacters($token, $allowableAttachedCharacters, 'leading'); + $trailing = $this->getCharacters($token, $allowableAttachedCharacters, 'trailing'); + + return $leading.$linkHtml.$trailing; + } + + /** + * Extract leading or trailing punctuation/characters from a token. + */ + private function getCharacters(string $token, string $allowableCharacters, string $direction): string + { + $pattern = match ($direction) { + 'leading' => '/^(['.preg_quote($allowableCharacters, '/').']+)/', + 'trailing' => '/(['.preg_quote($allowableCharacters, '/').']+)$/', + default => '', + }; + + if (preg_match($pattern, $token, $matches)) { + return $matches[1]; + } + + return ''; + } + + /** + * Validate if a token is a valid URL. + */ + private function isValidUrl(string $token): bool + { + $urlComponents = parse_url($token); + if ($urlComponents === false || ! filter_var($token, FILTER_VALIDATE_URL)) { + return false; + } + + $scheme = $urlComponents['scheme'] ?? null; + $host = $urlComponents['host'] ?? null; + if (! in_array($scheme, ['http', 'https'], true) || ! filter_var($host, FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME)) { + return false; + } + + foreach (['path', 'query', 'fragment'] as $part) { + if (isset($urlComponents[$part]) && preg_match('/[\s<>{}[\]]/', $urlComponents[$part])) { + return false; + } + } + + if (isset($urlComponents['port']) && (preg_match('/^\d{1,5}$/', (string) $urlComponents['port']) === 0 || preg_match('/^\d{1,5}$/', (string) $urlComponents['port']) === false)) { + return false; + } + + return (bool) preg_match( + '/((https?:\/\/)?((localhost)|((?:\d{1,3}\.){3}\d{1,3})|[\w\-._@:%+~#=]{1,256}(\.[a-zA-Z]{2,})+)(:\d+)?(\/[\w\-._@:%+~#=\/]*)?(\?[\w\-._@:%+~#=\/&]*)?)/i', + $token ); } } diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____htt___example_com______htt___example_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____htt___example_com______htt___example_com__.snap new file mode 100644 index 000000000..3abc5bb69 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____htt___example_com______htt___example_com__.snap @@ -0,0 +1 @@ +htt://example.com \ No newline at end of file diff --git "a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____2001_0db8_85a3_0000_0\342\200\246_7334_______http____2001_0db8_85a3_0000_0____7334___.snap" "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____2001_0db8_85a3_0000_0\342\200\246_7334_______http____2001_0db8_85a3_0000_0____7334___.snap" new file mode 100644 index 000000000..90249be18 --- /dev/null +++ "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____2001_0db8_85a3_0000_0\342\200\246_7334_______http____2001_0db8_85a3_0000_0____7334___.snap" @@ -0,0 +1 @@ +http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334] \ No newline at end of file diff --git "a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____2001_0db8_85a3_0000_0\342\200\246__8080______http____2001_0db8_85a3_0000_0_____8080__.snap" "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____2001_0db8_85a3_0000_0\342\200\246__8080______http____2001_0db8_85a3_0000_0_____8080__.snap" new file mode 100644 index 000000000..b978eb583 --- /dev/null +++ "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____2001_0db8_85a3_0000_0\342\200\246__8080______http____2001_0db8_85a3_0000_0_____8080__.snap" @@ -0,0 +1 @@ +http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080 \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____example_com______http____example_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____example_com______http____example_com__.snap new file mode 100644 index 000000000..9b155777c --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____example_com______http____example_com__.snap @@ -0,0 +1 @@ +http://.example.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____example_com______http____example_com____2.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____example_com______http____example_com____2.snap new file mode 100644 index 000000000..1d7732053 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http____example_com______http____example_com____2.snap @@ -0,0 +1 @@ +http:///example.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___exa__mple_com______http___exa__mple_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___exa__mple_com______http___exa__mple_com__.snap new file mode 100644 index 000000000..1d609a229 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___exa__mple_com______http___exa__mple_com__.snap @@ -0,0 +1 @@ +http://exa_mple.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example______http___example__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example______http___example__.snap new file mode 100644 index 000000000..3022ccae9 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example______http___example__.snap @@ -0,0 +1 @@ +http://example \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example__com______http___example__com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example__com______http___example__com__.snap new file mode 100644 index 000000000..0504b0b2b --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example__com______http___example__com__.snap @@ -0,0 +1 @@ +http://example..com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com______http___example_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com______http___example_com__.snap new file mode 100644 index 000000000..fd0492f34 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com______http___example_com__.snap @@ -0,0 +1 @@ +http://example=com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_abcd______http___example_com_abcd__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_abcd______http___example_com_abcd__.snap new file mode 100644 index 000000000..cb7bd4657 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_abcd______http___example_com_abcd__.snap @@ -0,0 +1 @@ +http://example.com:abcd \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_this___that______http___example_com_this___that__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_this___that______http___example_com_this___that__.snap new file mode 100644 index 000000000..0937a5a06 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_this___that______http___example_com_this___that__.snap @@ -0,0 +1 @@ +http://example.com?this<>=that \ No newline at end of file diff --git "a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_this_that_\342\200\246__that______http___example_com_this_that______that__.snap" "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_this_that_\342\200\246__that______http___example_com_this_that______that__.snap" new file mode 100644 index 000000000..3b05fc087 --- /dev/null +++ "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_this_that_\342\200\246__that______http___example_com_this_that______that__.snap" @@ -0,0 +1 @@ +http://example.com?this=that#this<>=that \ No newline at end of file diff --git "a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_\360\237\221\215______http___example_com_\360\237\221\215__.snap" "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_\360\237\221\215______http___example_com_\360\237\221\215__.snap" new file mode 100644 index 000000000..10a285b96 --- /dev/null +++ "b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http___example_com_\360\237\221\215______http___example_com_\360\237\221\215__.snap" @@ -0,0 +1 @@ +http://example.com/👍 \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http__example_com______http__example_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http__example_com______http__example_com__.snap new file mode 100644 index 000000000..2e0b70fe5 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http__example_com______http__example_com__.snap @@ -0,0 +1 @@ +http//example.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http__example_com______http__example_com____2.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http__example_com______http__example_com____2.snap new file mode 100644 index 000000000..e88d1b77a --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____http__example_com______http__example_com____2.snap @@ -0,0 +1 @@ +http:/example.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____protocol___example_com______protocol___example_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____protocol___example_com______protocol___example_com__.snap new file mode 100644 index 000000000..bc712b6a2 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____protocol___example_com______protocol___example_com__.snap @@ -0,0 +1 @@ +protocol://example.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____www_example_com______www_example_com__.snap b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____www_example_com______www_example_com__.snap new file mode 100644 index 000000000..8642ba7b9 --- /dev/null +++ b/tests/.pest/snapshots/Unit/Services/ContentProvidersTest/malformed_links_are_correctly_handled_by_content_parser_with_data_set____www_example_com______www_example_com__.snap @@ -0,0 +1 @@ +www.example.com \ No newline at end of file diff --git a/tests/.pest/snapshots/Unit/Services/ContentTest/only_links_with_images_or_html_oEmbeds_are_parsed.snap b/tests/.pest/snapshots/Unit/Services/ContentTest/only_links_with_images_or_html_oEmbeds_are_parsed.snap index a48a6e741..b0d91f5bf 100644 --- a/tests/.pest/snapshots/Unit/Services/ContentTest/only_links_with_images_or_html_oEmbeds_are_parsed.snap +++ b/tests/.pest/snapshots/Unit/Services/ContentTest/only_links_with_images_or_html_oEmbeds_are_parsed.snap @@ -1,9 +1,9 @@ -Sure, here is the link: laravel.com
laravel.com. Let me know if you have any questions. \ No newline at end of file diff --git a/tests/Unit/Services/ContentProvidersTest.php b/tests/Unit/Services/ContentProvidersTest.php index 25709fe0a..9fc7f463d 100644 --- a/tests/Unit/Services/ContentProvidersTest.php +++ b/tests/Unit/Services/ContentProvidersTest.php @@ -200,6 +200,29 @@ ], ]); +test('malformed links are correctly handled by content parser', function (string $content) { + $provider = new App\Services\ParsableContentProviders\LinkProviderParsable(); + expect($provider->parse($content))->toMatchSnapshot(); +})->with([ + 'http://example..com', + 'htt://example.com', + 'protocol://example.com', + 'http//example.com', + 'http://exa_mple.com', + 'http://example', + 'http://.example.com', + 'http://example=com', + 'www.example.com', + 'http:/example.com', + 'http:///example.com', + 'http://example.com?this<>=that', + 'http://example.com?this=that#this<>=that', + 'http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]', + 'http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080', + 'http://example.com:abcd', + 'http://example.com/👍', +]); + test('only http or https urls are converted to links', function (string $content, string $parsed) { $provider = new App\Services\ParsableContentProviders\LinkProviderParsable(); diff --git a/tests/Unit/Services/MetaDataTest.php b/tests/Unit/Services/MetaDataTest.php index 716280a9c..26f4a80ee 100644 --- a/tests/Unit/Services/MetaDataTest.php +++ b/tests/Unit/Services/MetaDataTest.php @@ -3,8 +3,11 @@ declare(strict_types=1); use App\Services\MetaData; +use GuzzleHttp\Exception\TransferException; use GuzzleHttp\Promise\RejectedPromise; +use GuzzleHttp\Psr7\Exception\MalformedUriException; use Illuminate\Http\Client\ConnectionException; +use Illuminate\Http\Client\HttpClientException; use Illuminate\Http\UploadedFile; use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Http; @@ -261,3 +264,21 @@ expect(round(MetaData::CARD_WIDTH / MetaData::CARD_HEIGHT, 2)) ->toBe(round(16 / 9, 2)); }); + +it('handles all exceptions', function (Exception $exception) { + $url = 'https://laravel.com'; + + Http::fake([ + $url => fn ($request) => new RejectedPromise($exception), + ]); + + $service = new MetaData($url); + $data = $service->fetch(); + + expect($data->isEmpty())->toBeTrue(); +})->with([ + new ConnectionException('Connection error'), + new MalformedUriException('Malformed URI'), + new HttpClientException('Not Found'), + new TransferException('Transfer error'), +]);