diff --git a/README.rst b/README.rst index 88ceb82..a5678b0 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,10 @@ To validate files, you need to instantiate a validator (I love OO). # if there are any error messages, they are retrievable through this call submission_file_validator.get_messages() - + # the error messages can be printed + submission_file_validator.print_errors(submission_file_path) + + Data file validation is exactly the same. .. code:: python @@ -79,6 +82,9 @@ Data file validation is exactly the same. # if there are any error messages, they are retrievable through this call data_file_validator.get_messages() + # the error messages can be printed + data_file_validator.print_errors('data.yaml') + Optionally, if you have already loaded the YAML object, then you can pass it through as a data object. You must also pass through the file_path since this is used as a key diff --git a/hepdata_validator/data_file_validator.py b/hepdata_validator/data_file_validator.py index 282958e..4b237a3 100644 --- a/hepdata_validator/data_file_validator.py +++ b/hepdata_validator/data_file_validator.py @@ -66,41 +66,62 @@ def load_custom_schema(self, type, schema_file_path=None): raise UnsupportedDataSchemaException( message="There is no schema defined for the '{0}' data type.".format(type)) - def validate(self, file_path): - try: - default_data_schema = json.load( - open(self.default_schema_file, 'r')) + def validate(self, **kwargs): + """ + Validates a data file + + :param file_path: path to file to be loaded. + :param data: pre loaded YAML object (optional). + :return: Bool to indicate the validity of the file. + """ + + default_data_schema = json.load(open(self.default_schema_file, 'r')) + + # even though we are using the yaml package to load, + # it supports JSON and YAML + data = kwargs.pop("data", None) + file_path = kwargs.pop("file_path", None) + + if file_path is None: + raise LookupError("file_path argument must be supplied") + + if data is None: try: - data = yaml.load_all(open(file_path, 'r'), Loader=yaml.CLoader) + # We try to load using the CLoader for speed improvements. + try: + data = yaml.load(open(file_path, 'r'), Loader=yaml.CLoader) + except ScannerError as se: + self.add_validation_message(ValidationMessage(file=file_path, message= + 'There was a problem parsing the file.\n' + str(se))) + return False except: #pragma: no cover - data = yaml.load_all(open(file_path, 'r')) #pragma: no cover + try: # pragma: no cover + data = yaml.load(open(file_path, 'r')) # pragma: no cover + except ScannerError as se: # pragma: no cover + self.add_validation_message( + ValidationMessage(file=file_path, message= + 'There was a problem parsing the file.\n' + str(se))) # pragma: no cover + return False - for data_item in data: - if data_item is None: - continue - try: - if 'type' in data_item: - custom_schema = self.load_custom_schema(data_item['type']) - json_validate(data_item, custom_schema) - else: - json_validate(data_item, default_data_schema) + try: - except ValidationError as ve: - self.add_validation_message( - ValidationMessage(file=file_path, - message=ve.message + ' in ' + str(ve.instance))) - if self.has_errors(file_path): - return False + if 'type' in data: + custom_schema = self.load_custom_schema(data['type']) + json_validate(data, custom_schema) else: - return True - except ScannerError as se: + json_validate(data, default_data_schema) + + except ValidationError as ve: + self.add_validation_message( ValidationMessage(file=file_path, - message='There was a problem parsing the file. ' - 'This can be because you forgot spaces ' - 'after colons in your YAML file for instance.\n{0}'.format(se.__repr__())) - ) + message=ve.message + ' in ' + str(ve.instance))) + + if self.has_errors(file_path): + return False + else: + return True class UnsupportedDataSchemaException(Exception): diff --git a/hepdata_validator/submission_file_validator.py b/hepdata_validator/submission_file_validator.py index 7643577..849bb76 100644 --- a/hepdata_validator/submission_file_validator.py +++ b/hepdata_validator/submission_file_validator.py @@ -14,21 +14,20 @@ class SubmissionFileValidator(Validator): """ base_path = os.path.dirname(__file__) default_schema_file = base_path + '/schemas/submission_schema.json' - additonal_info_schema = base_path + '/schemas/additional_info_schema.json' + additional_info_schema = base_path + '/schemas/additional_info_schema.json' def validate(self, **kwargs): """ Validates a submission file + :param file_path: path to file to be loaded. :param data: pre loaded YAML object (optional). :return: Bool to indicate the validity of the file. """ try: - submission_file_schema = json.load( - open(self.default_schema_file, 'r')) + submission_file_schema = json.load(open(self.default_schema_file, 'r')) - additional_file_section_schema = json.load( - open(self.additonal_info_schema, 'r')) + additional_file_section_schema = json.load(open(self.additional_info_schema, 'r')) # even though we are using the yaml package to load, # it supports JSON and YAML @@ -57,18 +56,18 @@ def validate(self, **kwargs): except ValidationError as ve: self.add_validation_message( ValidationMessage(file=file_path, - message=ve.message + ' in ' + str(ve.instance))) + message=ve.message + ' in ' + str(ve.instance))) if self.has_errors(file_path): return False else: return True - except ScannerError as se: - self.add_validation_message( - ValidationMessage(file=file_path, - message='There was a problem parsing the file. ' - 'This can be because you forgot spaces ' - 'after colons in your YAML file for instance. ' - 'Diagnostic information follows.\n' + str(se)) - ) - return False + + except ScannerError as se: # pragma: no cover + self.add_validation_message( # pragma: no cover + ValidationMessage(file=file_path, message= + 'There was a problem parsing the file. ' + 'This can be because you forgot spaces ' + 'after colons in your YAML file for instance. ' + 'Diagnostic information follows.\n' + str(se))) + return False \ No newline at end of file diff --git a/hepdata_validator/version.py b/hepdata_validator/version.py index 9f6500e..4695ce8 100644 --- a/hepdata_validator/version.py +++ b/hepdata_validator/version.py @@ -27,4 +27,4 @@ from __future__ import absolute_import, print_function -__version__ = "0.1.13" +__version__ = "0.1.14" diff --git a/testsuite/test_data/invalid_data_file.yaml b/testsuite/test_data/invalid_data_file.yaml index 26d9f19..0eced3e 100644 --- a/testsuite/test_data/invalid_data_file.yaml +++ b/testsuite/test_data/invalid_data_file.yaml @@ -3,6 +3,4 @@ type: "different" some_variables: - header:{name: SQRT(S), units: GEV} values: - - value: 7000 - ---- \ No newline at end of file + - value: 7000 \ No newline at end of file diff --git a/testsuite/test_data/valid_file_custom.yaml b/testsuite/test_data/valid_file_custom.yaml index ded4e43..cf71fab 100644 --- a/testsuite/test_data/valid_file_custom.yaml +++ b/testsuite/test_data/valid_file_custom.yaml @@ -3,6 +3,4 @@ type: "different" some_variables: - header: {name: SQRT(S), units: GEV} values: - - value: 7000 - ---- \ No newline at end of file + - value: 7000 \ No newline at end of file