Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for validation of XML submission files #2437

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ DB_PASSWORD=secret
# to some value other than 'sync'.
#REMOTE_WORKERS=false

# Whether or not submission XML files are validated before being processed.
# Enabling this setting will trigger a scan through XML files uploaded in
# each submission in order to preemptively reject malformed files.
#VALIDATE_XML_SUBMISSIONS=true

# Should we show the most recent submission time for a project or subproject?
# Disabling this feature can improve rendering performance of index.php
# for projects with lots of subproject builds.
Expand Down
60 changes: 29 additions & 31 deletions app/Console/Commands/ValidateXml.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use App\Utils\SubmissionUtils;
use Illuminate\Console\Command;
use DOMDocument;
use App\Exceptions\CDashXMLValidationException;

class ValidateXml extends Command
{
Expand All @@ -26,66 +26,64 @@ public function handle(): int
{
// parse all input files from command line
$xml_files_args = $this->argument('xml_file');
$schemas_dir = base_path()."/app/Validators/Schemas";

// process each of the input files
$has_errors = false;
$has_skipped = false;
foreach ($xml_files_args as $input_xml_file) {
// determine the file type by peeking at its contents
if (!file_exists($input_xml_file)) {
$this->error("ERROR: Input file does not exist: '{$input_xml_file}'");
$has_skipped = true;
continue;
}
$xml_file_handle = fopen($input_xml_file, 'r');
if ($xml_file_handle === false) {
$this->error("ERROR: Could not open file: '{$input_xml_file}'");
$has_errors = true;
$has_skipped = true;
continue;
}
$xml_type = SubmissionUtils::get_xml_type($xml_file_handle)['xml_type'];
fclose($xml_file_handle);

// verify we identified a valid xml type
if ($xml_type === '') {
$this->error("ERROR: Could not determine submission"
." file type for: '{$input_xml_file}'");
try {
$xml_schema = SubmissionUtils::get_xml_type($xml_file_handle, $input_xml_file)['xml_schema'];
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
$this->error($error);
}
$has_errors = true;
continue;
} finally {
fclose($xml_file_handle);
}

// verify we can find a corresponding schema file
$schema_file = "{$schemas_dir}/{$xml_type}.xsd";
if (!file_exists($schema_file)) {
$this->error("ERROR: Could not find schema file '{$schema_file}'"
." corresonding to input: '{$input_xml_file}'");
$has_errors = true;
if (!isset($xml_schema)) {
$this->warn("WARNING: Skipped input file '{$input_xml_file}' as validation"
." of this file format is currently not supported.");
$has_skipped = true;
continue;
}

// let us control the failures so we can continue
// parsing all the files instead of crashing midway
libxml_use_internal_errors(true);

// load the input file to be validated
$xml = new DOMDocument();
$xml->load($input_xml_file, LIBXML_PARSEHUGE);

// run the validator and collect errors if there are any
if (!$xml->schemaValidate($schema_file)) {
$errors = libxml_get_errors();
foreach ($errors as $error) {
if ($error->level > 2) {
$this->error("ERROR: {$error->message} in {$error->file},"
." line: {$error->line}, column: {$error->column}");
}
try {
SubmissionUtils::validate_xml($input_xml_file, $xml_schema);
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
$this->error($error);
}
libxml_clear_errors();
$has_errors = true;
continue;
}

$this->line("Validated file: {$input_xml_file}.");
}

// finally, report the results
if ($has_errors) {
$this->error("FAILED: Some XML file checks did not pass!");
return Command::FAILURE;
} elseif ($has_skipped) {
$this->error("FAILED: Some XML file checks were skipped!");
return Command::FAILURE;
} else {
$this->line("SUCCESS: All XML file checks passed.");
return Command::SUCCESS;
Expand Down
31 changes: 31 additions & 0 deletions app/Exceptions/CDashXMLValidationException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

namespace App\Exceptions;

use Exception;
use RuntimeException;

class CDashXMLValidationException extends RuntimeException
{
/**
* @param array<int,string> $message
*/
public function __construct(array $message = [], int $code = 0, Exception $previous = null)
{
$encoded_msg = json_encode($message);
$encoded_msg = $encoded_msg===false ? "" : $encoded_msg;
parent::__construct($encoded_msg, $code, $previous);
}

/**
* @return array<int,string>
*/
public function getDecodedMessage(bool $assoc = false): array
{
$decoded_msg = json_decode($this->getMessage(), $assoc);
if (!isset($decoded_msg) || is_bool($decoded_msg)) {
$decoded_msg = ["An XML validation error has occurred!"];
}
return $decoded_msg;
}
}
2 changes: 1 addition & 1 deletion app/Jobs/ProcessSubmission.php
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ private function doSubmit($filename, $projectid, $buildid = null,
}

// Parse the XML file
$handler = ctest_parse($filehandle, $projectid, $expected_md5);
$handler = ctest_parse($filehandle, $filename, $projectid, $expected_md5);
fclose($filehandle);
unset($filehandle);

Expand Down
63 changes: 61 additions & 2 deletions app/Utils/SubmissionUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@
use CDash\Database;
use CDash\Model\Build;
use CDash\Model\BuildUpdate;
use DOMDocument;
use App\Exceptions\CDashXMLValidationException;

class SubmissionUtils
{

/**
* Figure out what type of XML file this is
* @return array<string,mixed>
* @throws CDashXMLValidationException
*/
public static function get_xml_type(mixed $filehandle): array
public static function get_xml_type(mixed $filehandle, string $xml_file): array
{
$file = '';
$handler = null;
$schemas_dir = base_path()."/app/Validators/Schemas";
$schema_file = null;
// read file contents until we recognize its elements
while ($file === '' && !feof($filehandle)) {
$content = fread($filehandle, 8192);
Expand All @@ -30,56 +34,111 @@ public static function get_xml_type(mixed $filehandle): array
// Should be first otherwise confused with Build
$handler = \UpdateHandler::class;
$file = 'Update';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Build')) {
$handler = \BuildHandler::class;
$file = 'Build';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Configure')) {
$handler = \ConfigureHandler::class;
$file = 'Configure';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Testing')) {
$handler = \TestingHandler::class;
$file = 'Test';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<CoverageLog')) {
// Should be before coverage
$handler = \CoverageLogHandler::class;
$file = 'CoverageLog';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Coverage')) {
$handler = \CoverageHandler::class;
$file = 'Coverage';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<report')) {
$handler = \CoverageJUnitHandler::class;
$file = 'CoverageJUnit';
} elseif (str_contains($content, '<Notes')) {
$handler = \NoteHandler::class;
$file = 'Notes';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<DynamicAnalysis')) {
$handler = \DynamicAnalysisHandler::class;
$file = 'DynamicAnalysis';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Project')) {
$handler = \ProjectHandler::class;
$file = 'Project';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Upload')) {
$handler = \UploadHandler::class;
$file = 'Upload';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<testsuite')) {
$handler = \TestingJUnitHandler::class;
$file = 'TestJUnit';
} elseif (str_contains($content, '<Done')) {
$handler = \DoneHandler::class;
$file = 'Done';
$schema_file = "{$schemas_dir}/{$file}.xsd";
}
}

// restore the file descriptor to beginning of file
rewind($filehandle);

// perform minimal error checking as a sanity check
if ($file === '') {
throw new CDashXMLValidationException(["ERROR: Could not determine submission"
." file type for: '{$xml_file}'"]);
}
if (isset($schema_file) && !file_exists($schema_file)) {
throw new CDashXMLValidationException(["ERROR: Could not find schema file '{$schema_file}'"
." to validate input file: '{$xml_file}'"]);
}

return [
'file_handle' => $filehandle,
'xml_handler' => $handler,
'xml_type' => $file,
'xml_schema' => $schema_file,
];
}

/**
* Validate the given XML file based on its type
* @throws CDashXMLValidationException
*/
public static function validate_xml(string $xml_file, string $schema_file): void
{
$errors = [];

// let us control the failures so we can continue
// parsing files instead of crashing midway
libxml_use_internal_errors(true);

// load the input file to be validated
$xml = new DOMDocument();
$xml->load($xml_file, LIBXML_PARSEHUGE);

// run the validator and collect errors if there are any
if (!$xml->schemaValidate($schema_file)) {
$validation_errors = libxml_get_errors();
foreach ($validation_errors as $error) {
if ($error->level === LIBXML_ERR_ERROR || $error->level === LIBXML_ERR_FATAL) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is what's causing the local testing of mine to fail. If I remove the LIBXML_ERR_ERROR part of the check, the submissions are successful.

My submission is from CTest 3.22.1 while the testing is 3.25.1. Otherwise the configuration files seem to be the same structure. If I submit via CTest, it fails, but copying the content over to the Docker machine and running the manual validation passes.

@williamjallen, I'm happy to demonstrate if you'd like to see my setup

Copy link
Member

@josephsnyder josephsnyder Sep 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm unable to replicate the issue above on further testing. I'm not quite sure what happened

Forgot to reintroduce the environment variable, failures are still around. Sorry!

$errors[] = "ERROR: {$error->message} in {$error->file},"
." line: {$error->line}, column: {$error->column}";
}
}
libxml_clear_errors();
}

if (count($errors) !== 0) {
throw new CDashXMLValidationException($errors);
}
}

/** Add a new build */
public static function add_build(Build $build)
{
Expand Down
27 changes: 25 additions & 2 deletions app/cdash/include/ctestparser.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use App\Utils\SubmissionUtils;
use App\Exceptions\CDashXMLValidationException;

class CDashParseException extends RuntimeException
{
Expand Down Expand Up @@ -148,7 +149,7 @@ function parse_put_submission($filehandler, $projectid, $expected_md5)
}

/** Main function to parse the incoming xml from ctest */
function ctest_parse($filehandle, $projectid, $expected_md5 = '')
function ctest_parse($filehandle, string $filename, $projectid, $expected_md5 = '')
{
// Check if this is a new style PUT submission.
try {
Expand All @@ -168,10 +169,32 @@ function ctest_parse($filehandle, $projectid, $expected_md5 = '')
}

// Figure out what type of XML file this is.
$xml_info = SubmissionUtils::get_xml_type($filehandle);
try {
$xml_info = SubmissionUtils::get_xml_type($filehandle, $filename);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that the path passed to validate_xml() had to be modified to storage_path("app/".$filename), should this path being passed to get_xml_type() be changed as well? If so, I think these two are the only usages of the $filename function argument, so better yet, modify the value being passed to ctest_parse() from app/Jobs/ProcessSubmission.php.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sbelsk, the $xml_file variable within the get_xml_type only seems to be used in the error conditions at the end. if no file type and schema file are found. The content reading is all done using the $filehandle object. I'm not sure if $filehandle will ever not be valid yet.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Might still be worth changing it for the logging in lines 191-193 ctest_parse() in case the XML validation fails.

} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
Log::error($error);
}
return false;
}
$filehandle = $xml_info['file_handle'];
$handler_ref = $xml_info['xml_handler'];
$file = $xml_info['xml_type'];
$schema_file = $xml_info['xml_schema'];

// If validation is enabled and if this file has a corresponding schema, validate it
\Log::critical(((bool) config('cdash.validate_xml_submissions')));
\Log::critical($schema_file);
if (((bool) config('cdash.validate_xml_submissions')) === true && isset($schema_file)) {
try {
SubmissionUtils::validate_xml($filename, $schema_file);
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
Log::error("Validating $filename: ".$error);
}
abort(400, "Xml validation failed: rejected file $filename");
}
}

$handler = isset($handler_ref) ? new $handler_ref($projectid) : null;

Expand Down
5 changes: 5 additions & 0 deletions app/cdash/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -605,5 +605,10 @@ set_tests_properties(misassignedconfigure PROPERTIES DEPENDS /Feature/SubProject
add_laravel_test(/Feature/RemoteWorkers)
set_tests_properties(/Feature/RemoteWorkers PROPERTIES DEPENDS install_3)

add_laravel_test(/Unit/app/Console/Command/ValidateXmlCommandTest)

add_php_test(submissionvalidation)
set_tests_properties(submissionvalidation PROPERTIES DEPENDS install_3)

add_subdirectory(ctest)
add_subdirectory(autoremovebuilds)
Loading
Loading