Skip to content

Commit

Permalink
[TASK] Improve code quality to phpstan level 9
Browse files Browse the repository at this point in the history
  • Loading branch information
schliesser committed May 12, 2024
1 parent 1273e84 commit efb30c7
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 12 deletions.
4 changes: 4 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ indent_size = 2
# ChangeLog files
[{ChangeLog,ChangeLog.txt}]
indent_style = tab

# NEON-Files
[*.neon]
indent_size = 2
7 changes: 5 additions & 2 deletions .phpstan.neon
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
parameters:
level: 6
level: 9
paths:
- .
- 'Classes'
- 'Configuration'
- 'Tests'
- 'ext_emconf.php'
ignoreErrors:
- '#Variable \$_EXTKEY might not be defined.#'
55 changes: 47 additions & 8 deletions Classes/Command/CrawlSitemapCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

namespace Schliesser\Sitecrawler\Command;

use InvalidArgumentException;
use JsonException;
use Schliesser\Sitecrawler\Exception\Exception;
use Schliesser\Sitecrawler\Exception\InvalidFormatException;
use Schliesser\Sitecrawler\Exception\InvalidHeadersException;
use Schliesser\Sitecrawler\Exception\InvalidUrlException;
use Schliesser\Sitecrawler\Helper\Error;
use Symfony\Component\Console\Command\Command;
Expand Down Expand Up @@ -55,15 +59,19 @@ protected function configure(): void
}

/**
* @throws \JsonException
* @throws JsonException
* @throws InvalidFormatException
* @throws InvalidUrlException
* @throws InvalidHeadersException
*/
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);

$url = (string)$input->getArgument('url');
$url = $input->getArgument('url');
if (!is_string($url)) {
throw new InvalidArgumentException('Argument "url" must be a string!', 1715513484);
}
$io->writeln('Sitemap url: ' . $url, OutputInterface::VERBOSITY_VERBOSE);

// Validate input url
Expand All @@ -72,8 +80,15 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}

// Set headers from argument
if ($input->getArgument('headers')) {
$this->requestHeaders = array_merge($this->requestHeaders, json_decode($input->getArgument('headers'), true, 512, JSON_THROW_ON_ERROR));
if ($headers = $input->getArgument('headers')) {
if (!is_string($headers)) {
throw new InvalidArgumentException('Argument "headers" must be a json string!', 1715513588);
}
$headerArray = json_decode($headers, true, 512, JSON_THROW_ON_ERROR);
if (!is_array($headerArray)) {
throw new InvalidHeadersException('Invalid header json given!', 1715514805);
}
$this->requestHeaders = array_merge($this->requestHeaders, $headerArray);
$io->writeln('Headers: ' . var_export($this->requestHeaders, true), OutputInterface::VERBOSITY_DEBUG);
}

Expand All @@ -100,6 +115,9 @@ protected function execute(InputInterface $input, OutputInterface $output): int

// Return url list as txt/json when format option is set
if ($format = $input->getOption('list')) {
if (!is_string($format)) {
throw new InvalidArgumentException('Argument "list" must be a string!', 1715514158);
}
switch (strtolower($format)) {
case 'json':
$io->write(json_encode(['urls' => $this->urls, 'sitemaps' => $this->sitemaps], JSON_THROW_ON_ERROR));
Expand Down Expand Up @@ -162,17 +180,24 @@ protected function processUrlList(OutputInterface $output): void
$progressBar->finish();
}

/**
* @throws JsonException
* @throws Exception
*/
protected function processUrl(string $url): void
{
$urlData = parse_url($url);
$robotsUrl = false;

// Read robots.txt file if the urls path is /robots.txt
if ($urlData['path'] === '/robots.txt') {
if (isset($urlData['path']) && $urlData['path'] === '/robots.txt') {
$robotsUrl = true;
} elseif ((empty($urlData['path']) || $urlData['path'] === '/') && empty($urlData['query'])) {
// No path / empty path: use robots.txt file
// robots.txt needs to be on root always
if (empty($urlData['scheme']) || empty($urlData['host'])) {
throw new InvalidUrlException('Missing Scheme and Host in url: "' . $url . '"', 1715515452);
}
$url = $urlData['scheme'] . '://' . $urlData['host'] . (isset($urlData['port']) ? ':' . $urlData['port'] : '') . '/robots.txt';
$robotsUrl = true;
}
Expand All @@ -189,12 +214,15 @@ protected function processUrl(string $url): void

/**
* Fetch sitemap from url, parse xml and create list with urls
*
* @throws JsonException
* @throws Exception
*/
protected function getUrlListFromSitemap(string $url): void
{
$arr = $this->getArrayFromUrl($url);

if (isset($arr['sitemap']) && is_array($arr['sitemap']) && !empty($arr['sitemap'])) {
if (!empty($arr['sitemap']) && is_array($arr['sitemap'])) {
// Check for single entry
if (isset($arr['sitemap']['loc'])) {
$this->addSitemap((string)$arr['sitemap']['loc']);
Expand All @@ -204,7 +232,7 @@ protected function getUrlListFromSitemap(string $url): void
$this->addSitemap((string)$sitemap['loc']);
}
}
} elseif (isset($arr['url']) && is_array($arr['url']) && !empty($arr['url'])) {
} elseif (!empty($arr['url']) && is_array($arr['url'])) {
// Check for single entry
if (isset($arr['url']['loc'])) {
$this->addUrl((string)$arr['url']['loc']);
Expand Down Expand Up @@ -240,6 +268,9 @@ protected function readRobotsTxt(string $robotsTxtUrl): array

/**
* @return mixed[]
*
* @throws JsonException
* @throws Exception
*/
protected function getArrayFromUrl(string $url): array
{
Expand All @@ -258,11 +289,19 @@ protected function getArrayFromUrl(string $url): array
}

// Convert SimpleXML Objects to associative array
return json_decode(json_encode($xml), true) ?: [];
$array = json_decode(json_encode($xml, JSON_THROW_ON_ERROR) ?: '', true, 512, JSON_THROW_ON_ERROR);
if (!is_array($array)) {
throw new Exception('Failed to transform xml data', 1715515053);
}

return $array;
}

/**
* Validate url and parse sitemap content
*
* @throws Exception
* @throws JsonException
*/
protected function addSitemap(string $url): void
{
Expand Down
7 changes: 7 additions & 0 deletions Classes/Exception/InvalidHeadersException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?php

namespace Schliesser\Sitecrawler\Exception;

class InvalidHeadersException extends Exception
{
}
4 changes: 3 additions & 1 deletion Tests/Functional/Command/CrawlSitemapCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use Schliesser\Sitecrawler\Exception\InvalidUrlException;
use Symfony\Component\Console\Exception\RuntimeException;
use Symfony\Component\Console\Tester\CommandTester;
use Throwable;
use TYPO3\TestingFramework\Core\Functional\FunctionalTestCase;

class CrawlSitemapCommandTest extends FunctionalTestCase
Expand All @@ -29,10 +30,11 @@ protected function setUp(): void
* @test
*
* @param string[] $parameters
* @param class-string<Throwable>|null $expectedError
*
* @dataProvider commandDataProvider
*/
public function crawlSitemapCommandTest(array $parameters, string $expectedOutput, string $expectedError = ''): void
public function crawlSitemapCommandTest(array $parameters, string $expectedOutput, ?string $expectedError = null): void
{
$arguments = [];
if (!empty($parameters)) {
Expand Down
2 changes: 1 addition & 1 deletion ext_emconf.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
'version' => '2.0.1',
'constraints' => [
'depends' => [
'typo3' => '10.4.0-12.4.99',
'typo3' => '12.4.0-13.4.99',
],
],
];

0 comments on commit efb30c7

Please sign in to comment.