Skip to content

Commit

Permalink
data file validator: allow data to be passed (needed by hepdata-conve…
Browse files Browse the repository at this point in the history
…rter)

Signed-off-by: Graeme Watt <[email protected]>
  • Loading branch information
GraemeWatt committed Oct 22, 2016
1 parent 464862e commit d892918
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 50 deletions.
8 changes: 7 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ To validate files, you need to instantiate a validator (I love OO).
# if there are any error messages, they are retrievable through this call
submission_file_validator.get_messages()
# the error messages can be printed
submission_file_validator.print_errors(submission_file_path)
Data file validation is exactly the same.

.. code:: python
Expand All @@ -79,6 +82,9 @@ Data file validation is exactly the same.
# if there are any error messages, they are retrievable through this call
data_file_validator.get_messages()
# the error messages can be printed
data_file_validator.print_errors('data.yaml')
Optionally, if you have already loaded the YAML object, then you can pass it through
as a data object. You must also pass through the file_path since this is used as a key
Expand Down
75 changes: 48 additions & 27 deletions hepdata_validator/data_file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,41 +66,62 @@ def load_custom_schema(self, type, schema_file_path=None):
raise UnsupportedDataSchemaException(
message="There is no schema defined for the '{0}' data type.".format(type))

def validate(self, file_path):
try:
default_data_schema = json.load(
open(self.default_schema_file, 'r'))
def validate(self, **kwargs):
"""
Validates a data file
:param file_path: path to file to be loaded.
:param data: pre loaded YAML object (optional).
:return: Bool to indicate the validity of the file.
"""

default_data_schema = json.load(open(self.default_schema_file, 'r'))

# even though we are using the yaml package to load,
# it supports JSON and YAML
data = kwargs.pop("data", None)
file_path = kwargs.pop("file_path", None)

if file_path is None:
raise LookupError("file_path argument must be supplied")

if data is None:

try:
data = yaml.load_all(open(file_path, 'r'), Loader=yaml.CLoader)
# We try to load using the CLoader for speed improvements.
try:
data = yaml.load(open(file_path, 'r'), Loader=yaml.CLoader)
except ScannerError as se:
self.add_validation_message(ValidationMessage(file=file_path, message=
'There was a problem parsing the file.\n' + str(se)))
return False
except: #pragma: no cover
data = yaml.load_all(open(file_path, 'r')) #pragma: no cover
try: # pragma: no cover
data = yaml.load(open(file_path, 'r')) # pragma: no cover
except ScannerError as se: # pragma: no cover
self.add_validation_message(
ValidationMessage(file=file_path, message=
'There was a problem parsing the file.\n' + str(se))) # pragma: no cover
return False

for data_item in data:
if data_item is None:
continue
try:
if 'type' in data_item:
custom_schema = self.load_custom_schema(data_item['type'])
json_validate(data_item, custom_schema)
else:
json_validate(data_item, default_data_schema)
try:

except ValidationError as ve:
self.add_validation_message(
ValidationMessage(file=file_path,
message=ve.message + ' in ' + str(ve.instance)))
if self.has_errors(file_path):
return False
if 'type' in data:
custom_schema = self.load_custom_schema(data['type'])
json_validate(data, custom_schema)
else:
return True
except ScannerError as se:
json_validate(data, default_data_schema)

except ValidationError as ve:

self.add_validation_message(
ValidationMessage(file=file_path,
message='There was a problem parsing the file. '
'This can be because you forgot spaces '
'after colons in your YAML file for instance.\n{0}'.format(se.__repr__()))
)
message=ve.message + ' in ' + str(ve.instance)))

if self.has_errors(file_path):
return False
else:
return True


class UnsupportedDataSchemaException(Exception):
Expand Down
29 changes: 14 additions & 15 deletions hepdata_validator/submission_file_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,20 @@ class SubmissionFileValidator(Validator):
"""
base_path = os.path.dirname(__file__)
default_schema_file = base_path + '/schemas/submission_schema.json'
additonal_info_schema = base_path + '/schemas/additional_info_schema.json'
additional_info_schema = base_path + '/schemas/additional_info_schema.json'

def validate(self, **kwargs):
"""
Validates a submission file
:param file_path: path to file to be loaded.
:param data: pre loaded YAML object (optional).
:return: Bool to indicate the validity of the file.
"""
try:
submission_file_schema = json.load(
open(self.default_schema_file, 'r'))
submission_file_schema = json.load(open(self.default_schema_file, 'r'))

additional_file_section_schema = json.load(
open(self.additonal_info_schema, 'r'))
additional_file_section_schema = json.load(open(self.additional_info_schema, 'r'))

# even though we are using the yaml package to load,
# it supports JSON and YAML
Expand Down Expand Up @@ -57,18 +56,18 @@ def validate(self, **kwargs):
except ValidationError as ve:
self.add_validation_message(
ValidationMessage(file=file_path,
message=ve.message + ' in ' + str(ve.instance)))
message=ve.message + ' in ' + str(ve.instance)))

if self.has_errors(file_path):
return False
else:
return True
except ScannerError as se:
self.add_validation_message(
ValidationMessage(file=file_path,
message='There was a problem parsing the file. '
'This can be because you forgot spaces '
'after colons in your YAML file for instance. '
'Diagnostic information follows.\n' + str(se))
)
return False

except ScannerError as se: # pragma: no cover
self.add_validation_message( # pragma: no cover
ValidationMessage(file=file_path, message=
'There was a problem parsing the file. '
'This can be because you forgot spaces '
'after colons in your YAML file for instance. '
'Diagnostic information follows.\n' + str(se)))
return False
2 changes: 1 addition & 1 deletion hepdata_validator/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@

from __future__ import absolute_import, print_function

__version__ = "0.1.13"
__version__ = "0.1.14"
4 changes: 1 addition & 3 deletions testsuite/test_data/invalid_data_file.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@ type: "different"
some_variables:
- header:{name: SQRT(S), units: GEV}
values:
- value: 7000

---
- value: 7000
4 changes: 1 addition & 3 deletions testsuite/test_data/valid_file_custom.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@ type: "different"
some_variables:
- header: {name: SQRT(S), units: GEV}
values:
- value: 7000

---
- value: 7000

0 comments on commit d892918

Please sign in to comment.