From ace4feb13f603f220a7f08f9b63e3e7996141b1c Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Thu, 27 Oct 2022 12:15:09 -0700 Subject: [PATCH 01/36] update validation and error gen objects with schemagenerator --- schematic/models/GE_Helpers.py | 6 +- schematic/models/validate_attribute.py | 339 ++++++++++++++++++------- schematic/models/validate_manifest.py | 13 +- 3 files changed, 266 insertions(+), 92 deletions(-) diff --git a/schematic/models/GE_Helpers.py b/schematic/models/GE_Helpers.py index 928fa5121..83598a501 100644 --- a/schematic/models/GE_Helpers.py +++ b/schematic/models/GE_Helpers.py @@ -23,6 +23,7 @@ from great_expectations.data_context.types.resource_identifiers import ExpectationSuiteIdentifier from schematic.models.validate_attribute import GenerateError +from schematic.schemas.generator import SchemaGenerator from schematic.utils.validate_utils import rule_in_rule_list logger = logging.getLogger(__name__) @@ -350,7 +351,8 @@ def generate_errors( validation_results: Dict, validation_types: Dict, errors: List, - warnings: List + warnings: List, + sg: SchemaGenerator, ): """ Purpose: @@ -413,6 +415,7 @@ def generate_errors( row_num = row+2, attribute_name = errColumn, invalid_entry = value, + sg = sg, ) ) elif validation_types[rule.split(" ")[0]]['type']=='regex_validation': @@ -427,6 +430,7 @@ def generate_errors( module_to_call = 'match', attribute_name = errColumn, invalid_entry = value, + sg = sg, ) ) elif validation_types[rule.split(" ")[0]]['type']=='content_validation': diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 9ab4ec88e..1fafa866f 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) class GenerateError: - def generate_schema_error(row_num: str, attribute_name: str, error_msg: str)-> List[str]: + def generate_schema_error(row_num: str, attribute_name: str, error_msg: str, sg: SchemaGenerator,)-> List[str]: ''' Purpose: Process error messages generated from schema Input: @@ -34,6 +34,7 @@ def generate_schema_error(row_num: str, attribute_name: str, error_msg: str)-> L - attribute_name: the attribute the error occurred on. - error_msg: Error message ''' + error_col = attribute_name # Attribute name error_row = row_num # index row of the manifest where the error presented. error_message = error_msg @@ -46,7 +47,7 @@ def generate_schema_error(row_num: str, attribute_name: str, error_msg: str)-> L def generate_list_error( list_string: str, row_num: str, attribute_name: str, list_error: str, - invalid_entry:str, + invalid_entry:str, sg: SchemaGenerator, ) -> List[str]: """ Purpose: @@ -60,18 +61,43 @@ def generate_list_error( Logging.error. Errors: List[str] Error details for further storage. """ + + error_list = [] + warning_list = [] + + #Determine which, if any, message to raise + raises = GenerateError.get_message_level( + attribute_name = attribute_name, + sg = sg, + invalid_entry = invalid_entry, + ) + + #if a message needs to be raised, get the approrpiate function to do so + if raises: + logLevel = getattr(logging,raises) + else: + return error_list, warning_list + if list_error == "not_comma_delimited": error_str = ( f"For attribute {attribute_name} in row {row_num} it does not " f"appear as if you provided a comma delimited string. Please check " f"your entry ('{list_string}'') and try again." ) - logging.error(error_str) + logLevel(error_str) error_row = row_num # index row of the manifest where the error presented. error_col = attribute_name # Attribute name error_message = error_str error_val = invalid_entry - return [error_row, error_col, error_message, error_val] + #return error and empty list for warnings + + if raises == 'error': + error_list = [error_row, error_col, error_message, error_val] + #return warning and empty list for errors + elif raises == 'warning': + warning_list = [error_row, error_col, error_message, error_val] + + return error_list, warning_list def generate_regex_error( val_rule: str, @@ -79,7 +105,8 @@ def generate_regex_error( row_num: str, module_to_call: str, attribute_name: str, - invalid_entry:str, + invalid_entry: str, + sg: SchemaGenerator, ) -> List[str]: """ Purpose: @@ -95,19 +122,43 @@ def generate_regex_error( Logging.error. Errors: List[str] Error details for further storage. """ + error_list = [] + warning_list = [] + + #Determine which, if any, message to raise + raises = GenerateError.get_message_level( + attribute_name = attribute_name, + sg = sg, + invalid_entry = invalid_entry, + ) + + #if a message needs to be raised, get the approrpiate function to do so + if raises: + logLevel = getattr(logging,raises) + else: + return error_list, warning_list + regex_error_string = ( f"For the attribute {attribute_name}, on row {row_num}, the string is not properly formatted. " f'It should follow the following re.{module_to_call} pattern "{reg_expression}".' ) - logging.error(regex_error_string) + logLevel(regex_error_string) error_row = row_num # index row of the manifest where the error presented. error_col = attribute_name # Attribute name error_message = regex_error_string error_val = invalid_entry - return [error_row, error_col, error_message, error_val] + + #return error and empty list for warnings + if raises == 'error': + error_list = [error_row, error_col, error_message, error_val] + #return warning and empty list for errors + elif raises == 'warning': + warning_list = [error_row, error_col, error_message, error_val] + + return error_list, warning_list def generate_type_error( - val_rule: str, row_num: str, attribute_name: str, invalid_entry:str, + val_rule: str, row_num: str, attribute_name: str, invalid_entry:str, sg: SchemaGenerator, ) -> List[str]: """ Purpose: @@ -121,20 +172,46 @@ def generate_type_error( Logging.error. Errors: List[str] Error details for further storage. """ + + error_list = [] + warning_list = [] + + #Determine which, if any, message to raise + raises = GenerateError.get_message_level( + attribute_name = attribute_name, + sg = sg, + invalid_entry = invalid_entry, + ) + + #if a message needs to be raised, get the approrpiate function to do so + if raises: + logLevel = getattr(logging,raises) + else: + return error_list, warning_list + type_error_str = ( f"On row {row_num} the attribute {attribute_name} " f"does not contain the proper value type {val_rule}." ) - logging.error(type_error_str) + logLevel(type_error_str) error_row = row_num # index row of the manifest where the error presented. error_col = attribute_name # Attribute name error_message = type_error_str error_val = invalid_entry - return [error_row, error_col, error_message, error_val] + + + #return error and empty list for warnings + if raises == 'error': + error_list = [error_row, error_col, error_message, error_val] + #return warning and empty list for errors + elif raises == 'warning': + warning_list = [error_row, error_col, error_message, error_val] + + return error_list, warning_list def generate_url_error( url: str, url_error: str, row_num: str, attribute_name: str, argument: str, - invalid_entry:str, + invalid_entry:str, sg: SchemaGenerator, ) -> List[str]: """ Purpose: @@ -159,6 +236,24 @@ def generate_url_error( Logging.error. Errors: List[str] Error details for further storage. """ + + error_list = [] + warning_list = [] + + #Determine which, if any, message to raise + raises = GenerateError.get_message_level( + attribute_name = attribute_name, + sg = sg, + invalid_entry = invalid_entry, + ) + + #if a message needs to be raised, get the approrpiate function to do so + if raises: + logLevel = getattr(logging,raises) + else: + return error_list, warning_list + + error_row = row_num # index row of the manifest where the error presented. error_col = attribute_name # Attribute name if url_error == "invalid_url": @@ -167,7 +262,7 @@ def generate_url_error( f"conform to the standards of a URL. Please make sure you are entering a real, working URL " f"as required by the Schema." ) - logging.error(invalid_url_error_string) + logLevel(invalid_url_error_string) error_message = invalid_url_error_string error_val = invalid_entry elif url_error == "arg_error": @@ -175,7 +270,7 @@ def generate_url_error( f"For the attribute '{attribute_name}', on row {row_num}, the URL provided ({url}) does not " f"conform to the schema specifications and does not contain the required element: {argument}." ) - logging.error(arg_error_string) + logLevel(arg_error_string) error_message = arg_error_string error_val = f"URL Error: Argument Error" elif url_error == "random_entry": @@ -183,18 +278,28 @@ def generate_url_error( f"For the attribute '{attribute_name}', on row {row_num}, the input provided ('{url}'') does not " f"look like a URL, please check input and try again." ) - logging.error(random_entry_error_str) + logLevel(random_entry_error_str) error_message = random_entry_error_str error_val = f"URL Error: Random Entry" - return [error_row, error_col, error_message, error_val] + + #return error and empty list for warnings + if raises == 'error': + error_list = [error_row, error_col, error_message, error_val] + #return warning and empty list for errors + elif raises == 'warning': + warning_list = [error_row, error_col, error_message, error_val] + + return error_list, warning_list def generate_cross_warning( val_rule: str, attribute_name: str, + sg: SchemaGenerator, matching_manifests = [], missing_manifest_ID = None, invalid_entry = None, row_num = None, + ) -> List[str]: """ Purpose: @@ -211,7 +316,23 @@ def generate_cross_warning( Logging.error. Errors: List[str] Error details for further storage. """ + error_list = [] + warning_list = [] + #Determine which, if any, message to raise + raises = GenerateError.get_message_level( + val_rule=val_rule, + attribute_name = attribute_name, + sg = sg, + invalid_entry = invalid_entry, + ) + + #if a message needs to be raised, get the approrpiate function to do so + if raises: + logLevel = getattr(logging,raises) + else: + return error_list, warning_list + if val_rule.__contains__('matchAtLeast'): cross_error_str = ( f"Value(s) {invalid_entry} from row(s) {row_num} of the attribute {attribute_name} in the source manifest are missing." ) @@ -232,13 +353,21 @@ def generate_cross_warning( f"Value(s) {invalid_entry} from row(s) {row_num} of the attribute {attribute_name} in the source manifest are not present in only one other manifest. " ) - logging.warning(cross_error_str) + logLevel(cross_error_str) error_row = row_num # index row of the manifest where the error presented. error_col = attribute_name # Attribute name error_message = cross_error_str error_val = invalid_entry #Value from source manifest missing from targets - return [error_row, error_col, error_message, error_val] + #return error and empty list for warnings + if raises == 'error': + error_list = [error_row, error_col, error_message, error_val] + #return warning and empty list for errors + elif raises == 'warning': + warning_list = [error_row, error_col, error_message, error_val] + + return error_list, warning_list + def generate_content_error( val_rule: str, @@ -270,11 +399,12 @@ def generate_content_error( warning_list = [] error_col = attribute_name # Attribute name - #Determine whether to raise a warning or error + #Determine which, if any, message to raise raises = GenerateError.get_message_level( val_rule=val_rule, attribute_name = attribute_name, sg = sg, + invalid_entry = error_val, ) #if a message needs to be raised, get the approrpiate function to do so @@ -328,9 +458,10 @@ def generate_content_error( return error_list, warning_list def get_message_level( - val_rule: str, sg: SchemaGenerator, attribute_name: str, + invalid_entry, + val_rule: str = None, ) -> str: """ Purpose: @@ -348,24 +479,26 @@ def get_message_level( 'error' or 'warning' """ - - rule_parts = val_rule.split(" ") - - #See if the node is required, if it is and the column is missing then a requirement error will be raised later; no error or waring logged here if recommended and required but missing - if val_rule.startswith('recommended') and sg.is_node_required(node_display_name=attribute_name): - level = None - - #if not required, use the message level specified in the rule - elif rule_parts[-1].lower() == 'error': - level = 'error' + if val_rule: + rule_parts = val_rule.split(" ") + + #See if the node is required, if it is and the column is missing then a requirement error will be raised later; no error or waring logged here if recommended and required but missing + if val_rule.startswith('recommended') and sg.is_node_required(node_display_name=attribute_name): + level = None + + #if not required, use the message level specified in the rule + elif rule_parts[-1].lower() == 'error': + level = 'error' - elif rule_parts[-1].lower() == 'warning': - level = 'warning' - - #if no level specified, the default level is warning + elif rule_parts[-1].lower() == 'warning': + level = 'warning' + + #if no level specified, the default level is warning + else: + level = 'warning' else: - level = 'warning' - + level = 'error' + return level class ValidateAttribute(object): @@ -411,7 +544,7 @@ def get_target_manifests(target_component, project_scope: List): return synStore, target_manifest_IDs, target_dataset_IDs def list_validation( - self, val_rule: str, manifest_col: pd.core.series.Series + self, val_rule: str, manifest_col: pd.core.series.Series, sg: SchemaGenerator, ) -> (List[List[str]], List[List[str]], pd.core.series.Series): """ Purpose: @@ -444,15 +577,18 @@ def list_validation( for i, list_string in enumerate(manifest_col): if not re.fullmatch(csv_re,list_string): list_error = "not_comma_delimited" - errors.append( - GenerateError.generate_list_error( + vr_errors, vr_warnings = GenerateError.generate_list_error( list_string, row_num=str(i + 2), attribute_name=manifest_col.name, list_error=list_error, - invalid_entry=manifest_col[i] + invalid_entry=manifest_col[i], + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) # Convert string to list. @@ -461,7 +597,7 @@ def list_validation( return errors, warnings, manifest_col def regex_validation( - self, val_rule: str, manifest_col: pd.core.series.Series + self, val_rule: str, manifest_col: pd.core.series.Series, sg: SchemaGenerator, ) -> (List[List[str]], List[List[str]]): """ Purpose: @@ -513,16 +649,19 @@ def regex_validation( if not bool(module_to_call(reg_expression, re_to_check)) and bool( re_to_check ): - errors.append( - GenerateError.generate_regex_error( + vr_errors, vr_warnings = GenerateError.generate_regex_error( val_rule, reg_expression, row_num=str(i + 2), module_to_call=reg_exp_rules[1], attribute_name=manifest_col.name, - invalid_entry=manifest_col[i] + invalid_entry=manifest_col[i], + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) # Validating single re's else: @@ -531,21 +670,24 @@ def regex_validation( if not bool(module_to_call(reg_expression, re_to_check)) and bool( re_to_check ): - errors.append( - GenerateError.generate_regex_error( + vr_errors, vr_warnings = GenerateError.generate_regex_error( val_rule, reg_expression, row_num=str(i + 2), module_to_call=reg_exp_rules[1], attribute_name=manifest_col.name, - invalid_entry=manifest_col[i] + invalid_entry=manifest_col[i], + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) return errors, warnings def type_validation( - self, val_rule: str, manifest_col: pd.core.series.Series + self, val_rule: str, manifest_col: pd.core.series.Series, sg: SchemaGenerator, ) -> (List[List[str]], List[List[str]]): """ Purpose: @@ -577,28 +719,34 @@ def type_validation( if val_rule == "num": for i, value in enumerate(manifest_col): if bool(value) and not isinstance(value, specified_type[val_rule]): - errors.append( - GenerateError.generate_type_error( + vr_errors, vr_warnings = GenerateError.generate_type_error( val_rule, row_num=str(i + 2), attribute_name=manifest_col.name, - invalid_entry=str(manifest_col[i]) + invalid_entry=str(manifest_col[i]), + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) elif val_rule in ["int", "float", "str"]: for i, value in enumerate(manifest_col): if bool(value) and not isinstance(value, specified_type[val_rule]): - errors.append( - GenerateError.generate_type_error( + vr_errors, vr_warnings = GenerateError.generate_type_error( val_rule, row_num=str(i + 2), attribute_name=manifest_col.name, - invalid_entry=str(manifest_col[i]) + invalid_entry=str(manifest_col[i]), + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) return errors, warnings - def url_validation(self, val_rule: str, manifest_col: str) -> (List[List[str]], List[List[str]]): + def url_validation(self, val_rule: str, manifest_col: str, sg: SchemaGenerator,) -> (List[List[str]], List[List[str]]): """ Purpose: Validate URL's submitted for a particular attribute in a manifest. @@ -630,16 +778,19 @@ def url_validation(self, val_rule: str, manifest_col: str) -> (List[List[str]], # url_error = "random_entry" valid_url = False - errors.append( - GenerateError.generate_url_error( + vr_errors, vr_warnings = GenerateError.generate_url_error( url, url_error=url_error, row_num=str(i + 2), attribute_name=manifest_col.name, argument=url_args, - invalid_entry=manifest_col[i] + invalid_entry=manifest_col[i], + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) else: # add scheme to the URL if not currently added. if not urlparse(url).scheme: @@ -654,36 +805,42 @@ def url_validation(self, val_rule: str, manifest_col: str) -> (List[List[str]], except: valid_url = False url_error = "invalid_url" - errors.append( - GenerateError.generate_url_error( + vr_errors, vr_warnings = GenerateError.generate_url_error( url, url_error=url_error, row_num=str(i + 2), attribute_name=manifest_col.name, argument=url_args, - invalid_entry=manifest_col[i] + invalid_entry=manifest_col[i], + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) if valid_url == True: # If the URL works, check to see if it contains the proper arguments # as specified in the schema. for arg in url_args: if arg not in url: url_error = "arg_error" - errors.append( - GenerateError.generate_url_error( + vr_errors, vr_warnings = GenerateError.generate_url_error( url, url_error=url_error, row_num=str(i + 2), attribute_name=manifest_col.name, argument=arg, - invalid_entry=manifest_col[i] + invalid_entry=manifest_col[i], + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) return errors, warnings def cross_validation( - self, val_rule: str, manifest_col: pd.core.series.Series, project_scope: List, + self, val_rule: str, manifest_col: pd.core.series.Series, project_scope: List, sg: SchemaGenerator, ) -> List[List[str]]: """ Purpose: @@ -763,25 +920,31 @@ def cross_validation( if val_rule.__contains__('matchAtLeastOne') and not missing_values.empty: missing_rows = missing_values.index.to_numpy() + 2 - warnings.append( - GenerateError.generate_cross_warning( + vr_errors, vr_warnings = GenerateError.generate_cross_warning( val_rule = val_rule, row_num = str(list(missing_rows)), attribute_name = source_attribute, invalid_entry = str(missing_values.values.tolist()), + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) elif val_rule.__contains__('matchExactlyOne') and (duplicated_values.any() or missing_values.any()): invalid_values = pd.merge(duplicated_values,missing_values,how='outer') invalid_rows = pd.merge(duplicated_values,missing_values,how='outer',left_index=True,right_index=True).index.to_numpy() + 2 - warnings.append( - GenerateError.generate_cross_warning( + vr_errors, vr_warnings = GenerateError.generate_cross_warning( val_rule = val_rule, row_num = str(list(invalid_rows)), attribute_name = source_attribute, - invalid_entry = str(pd.Series(invalid_values.squeeze()).values.tolist()) + invalid_entry = str(pd.Series(invalid_values.squeeze()).values.tolist()), + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) @@ -798,23 +961,29 @@ def cross_validation( missing_values=list(set(missing_values)) #print(missing_rows,missing_values) - warnings.append( - GenerateError.generate_cross_warning( + vr_errors, vr_warnings = GenerateError.generate_cross_warning( val_rule = val_rule, row_num = str(missing_rows), attribute_name = source_attribute, invalid_entry = str(missing_values), missing_manifest_ID = missing_manifest_IDs, + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) elif val_rule.__contains__('matchExactlyOne') and len(present_manifest_log) != 1: - warnings.append( - GenerateError.generate_cross_warning( + vr_errors, vr_warnings = GenerateError.generate_cross_warning( val_rule = val_rule, attribute_name = source_attribute, matching_manifests = present_manifest_log, + sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) return errors, warnings diff --git a/schematic/models/validate_manifest.py b/schematic/models/validate_manifest.py index 4795fb38a..1286e4216 100644 --- a/schematic/models/validate_manifest.py +++ b/schematic/models/validate_manifest.py @@ -160,6 +160,7 @@ def validate_manifest_rules( warnings = warnings, validation_results = validation_results, validation_types = validation_types, + sg = sg, ) else: logging.info("Great Expetations suite will not be utilized.") @@ -196,16 +197,16 @@ def validate_manifest_rules( if validation_type == "list": vr_errors, vr_warnings, manifest_col = validation_method( - self, rule, manifest[col] + self, rule, manifest[col], sg, ) manifest[col] = manifest_col elif validation_type.lower().startswith("match"): vr_errors, vr_warnings = validation_method( - self, rule, manifest[col], project_scope, + self, rule, manifest[col], project_scope, sg, ) else: vr_errors, vr_warnings = validation_method( - self, rule, manifest[col] + self, rule, manifest[col], sg, ) # Check for validation rule errors and add them to other errors. if vr_errors: @@ -215,7 +216,7 @@ def validate_manifest_rules( return manifest, errors, warnings - def validate_manifest_values(self, manifest, jsonSchema + def validate_manifest_values(self, manifest, jsonSchema, sg ) -> (List[List[str]], List[List[str]]): errors = [] @@ -245,7 +246,7 @@ def validate_manifest_values(self, manifest, jsonSchema col_index = error[1] attr_name = col_attr[col_index] errorMsg = error[2] - GenerateError.generate_schema_error(row_num = row_num, attribute_name = attr_name, error_msg = errorMsg) + GenerateError.generate_schema_error(row_num = row_num, attribute_name = attr_name, error_msg = errorMsg, sg = sg) return errors, warnings @@ -258,7 +259,7 @@ def validate_all(self, errors, warnings, manifest, manifestPath, sg, jsonSchema, if vmr_warnings: warnings.extend(vmr_warnings) - vmv_errors, vmv_warnings = vm.validate_manifest_values(manifest, jsonSchema) + vmv_errors, vmv_warnings = vm.validate_manifest_values(manifest, jsonSchema, sg) if vmv_errors: errors.extend(vmv_errors) if vmv_warnings: From 77373173728e85189b4813b255f646da9d8eeaec Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 10:16:45 -0700 Subject: [PATCH 02/36] update GE error handling --- schematic/models/GE_Helpers.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/schematic/models/GE_Helpers.py b/schematic/models/GE_Helpers.py index 83598a501..4d9bf5d7e 100644 --- a/schematic/models/GE_Helpers.py +++ b/schematic/models/GE_Helpers.py @@ -409,15 +409,17 @@ def generate_errors( #call functions to generate error messages and add to error list if validation_types[rule.split(" ")[0]]['type']=='type_validation': for row, value in zip(indices,values): - errors.append( - GenerateError.generate_type_error( + vr_errors, vr_warnings = GenerateError.generate_type_error( val_rule = rule, row_num = row+2, attribute_name = errColumn, invalid_entry = value, sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) elif validation_types[rule.split(" ")[0]]['type']=='regex_validation': expression=result_dict['expectation_config']['kwargs']['regex'] @@ -434,22 +436,22 @@ def generate_errors( ) ) elif validation_types[rule.split(" ")[0]]['type']=='content_validation': - content_errors, content_warnings = GenerateError.generate_content_error( + vr_errors, vr_warnings = GenerateError.generate_content_error( val_rule = rule, attribute_name = errColumn, row_num = list(np.array(indices)+2), error_val = values, sg = self.sg ) - if content_errors: - errors.append(content_errors) + if vr_errors: + errors.append(vr_errors) if rule.startswith('protectAges'): - self.censor_ages(content_errors,errColumn) + self.censor_ages(vr_errors,errColumn) pass - elif content_warnings: - warnings.append(content_warnings) + if vr_warnings: + warnings.append(vr_warnings) if rule.startswith('protectAges'): - self.censor_ages(content_warnings,errColumn) + self.censor_ages(vr_warnings,errColumn) pass return errors, warnings From 0ad704b9f0dbff6ba613ebc4e2deec802e1397e9 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 10:17:09 -0700 Subject: [PATCH 03/36] update tests for new error/warning output runcombos --- tests/test_validation.py | 118 ++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 46 deletions(-) diff --git a/tests/test_validation.py b/tests/test_validation.py index a6b2d5eaa..0fa6b6be5 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -70,38 +70,43 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): val_rule = 'num', row_num = 3, attribute_name = 'Check Num', - invalid_entry = 'c' - ) in errors + invalid_entry = 'c', + sg = sg, + )[0] in errors assert GenerateError.generate_type_error( val_rule = 'int', row_num = 3, attribute_name = 'Check Int', - invalid_entry = 5.63 - ) in errors + invalid_entry = 5.63, + sg = sg, + )[0] in errors assert GenerateError.generate_type_error( val_rule = 'str', row_num = 3, attribute_name = 'Check String', - invalid_entry = 94 - ) in errors + invalid_entry = 94, + sg = sg, + )[0] in errors assert GenerateError.generate_list_error( list_string = 'invalid list values', row_num = '3', attribute_name = 'Check List', list_error = "not_comma_delimited", - invalid_entry = 'invalid list values' - ) in errors + invalid_entry = 'invalid list values', + sg = sg, + )[0] in errors assert GenerateError.generate_list_error( list_string = 'ab cd ef', row_num = '3', attribute_name = 'Check Regex List', list_error = "not_comma_delimited", - invalid_entry = 'ab cd ef' - ) in errors + invalid_entry = 'ab cd ef', + sg = sg, + )[0] in errors assert GenerateError.generate_regex_error( val_rule = 'regex', @@ -109,8 +114,9 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): row_num = '3', attribute_name = 'Check Regex Format', module_to_call = 'match', - invalid_entry = 'm' - ) in errors + invalid_entry = 'm', + sg = sg, + )[0] in errors assert GenerateError.generate_regex_error( val_rule = 'regex', @@ -118,8 +124,9 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): row_num = '3', attribute_name = 'Check Regex Single', module_to_call = 'search', - invalid_entry = 'q' - ) in errors + invalid_entry = 'q', + sg = sg, + )[0] in errors assert GenerateError.generate_url_error( url = 'http://googlef.com/', @@ -127,8 +134,9 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): row_num = '3', attribute_name = 'Check URL', argument = None, - invalid_entry = 'http://googlef.com/' - ) in errors + invalid_entry = 'http://googlef.com/', + sg = sg, + )[0] in errors assert GenerateError.generate_content_error( val_rule = 'unique error', @@ -167,34 +175,39 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): attribute_name='Check Match at Least', invalid_entry = '[7163]', missing_manifest_ID = ['syn27600110', 'syn29381803'], - ) in warnings + sg = sg, + )[1] in warnings assert GenerateError.generate_cross_warning( val_rule = 'matchAtLeastOne MockComponent.checkMatchatLeastvalues value', row_num = '[3]', attribute_name = 'Check Match at Least values', invalid_entry = '[51100]', - ) in warnings + sg = sg, + )[1] in warnings assert \ GenerateError.generate_cross_warning( val_rule = 'matchExactlyOne', attribute_name='Check Match Exactly', - matching_manifests = ['syn29862078', 'syn27648165'] - ) in warnings \ + matching_manifests = ['syn29862078', 'syn27648165'], + sg = sg, + )[1] in warnings \ or \ GenerateError.generate_cross_warning( val_rule = 'matchExactlyOne', attribute_name='Check Match Exactly', - matching_manifests = ['syn29862066', 'syn27648165'] - ) in warnings + matching_manifests = ['syn29862066', 'syn27648165'], + sg = sg, + )[1] in warnings assert GenerateError.generate_cross_warning( val_rule = 'matchExactlyOne MockComponent.checkMatchExactlyvalues MockComponent.checkMatchExactlyvalues value', row_num = '[2, 3, 4]', attribute_name='Check Match Exactly values', invalid_entry = '[71738, 98085, 210065]', - ) in warnings + sg = sg, + )[1] in warnings @@ -213,38 +226,43 @@ def test_in_house_validation(self,helpers,sg,metadataModel): val_rule = 'num', row_num = '3', attribute_name = 'Check Num', - invalid_entry = 'c' - ) in errors + invalid_entry = 'c', + sg = sg, + )[0] in errors assert GenerateError.generate_type_error( val_rule = 'int', row_num = '3', attribute_name = 'Check Int', - invalid_entry = '5.63' - ) in errors + invalid_entry = '5.63', + sg = sg, + )[0] in errors assert GenerateError.generate_type_error( val_rule = 'str', row_num = '3', attribute_name = 'Check String', - invalid_entry = '94' - ) in errors + invalid_entry = '94', + sg = sg, + )[0] in errors assert GenerateError.generate_list_error( list_string = 'invalid list values', row_num = '3', attribute_name = 'Check List', list_error = "not_comma_delimited", - invalid_entry = 'invalid list values' - ) in errors + invalid_entry = 'invalid list values', + sg = sg, + )[0] in errors assert GenerateError.generate_list_error( list_string = 'ab cd ef', row_num = '3', attribute_name = 'Check Regex List', list_error = "not_comma_delimited", - invalid_entry = 'ab cd ef' - ) in errors + invalid_entry = 'ab cd ef', + sg = sg, + )[0] in errors assert GenerateError.generate_regex_error( val_rule = 'regex', @@ -252,8 +270,9 @@ def test_in_house_validation(self,helpers,sg,metadataModel): row_num = '3', attribute_name = 'Check Regex Single', module_to_call = 'search', - invalid_entry = 'q' - ) in errors + invalid_entry = 'q', + sg = sg, + )[0] in errors assert GenerateError.generate_regex_error( val_rule = 'regex', @@ -261,8 +280,9 @@ def test_in_house_validation(self,helpers,sg,metadataModel): row_num = '3', attribute_name = 'Check Regex Format', module_to_call = 'match', - invalid_entry = 'm' - ) in errors + invalid_entry = 'm', + sg = sg, + )[0] in errors assert GenerateError.generate_url_error( url = 'http://googlef.com/', @@ -270,8 +290,9 @@ def test_in_house_validation(self,helpers,sg,metadataModel): row_num = '3', attribute_name = 'Check URL', argument = None, - invalid_entry = 'http://googlef.com/' - ) in errors + invalid_entry = 'http://googlef.com/', + sg = sg, + )[0] in errors #Check Warnings @@ -281,34 +302,39 @@ def test_in_house_validation(self,helpers,sg,metadataModel): attribute_name='Check Match at Least', invalid_entry = '[7163]', missing_manifest_ID = ['syn27600110', 'syn29381803'], - ) in warnings + sg = sg, + )[1] in warnings assert GenerateError.generate_cross_warning( val_rule = 'matchAtLeastOne MockComponent.checkMatchatLeastvalues value', row_num = '[3]', attribute_name = 'Check Match at Least values', invalid_entry = '[51100]', - ) in warnings + sg = sg, + )[1] in warnings assert \ GenerateError.generate_cross_warning( val_rule = 'matchExactlyOne', attribute_name='Check Match Exactly', - matching_manifests = ['syn29862078', 'syn27648165'] - ) in warnings \ + matching_manifests = ['syn29862078', 'syn27648165'], + sg = sg, + )[1] in warnings \ or \ GenerateError.generate_cross_warning( val_rule = 'matchExactlyOne', attribute_name='Check Match Exactly', - matching_manifests = ['syn29862066', 'syn27648165'] - ) in warnings + matching_manifests = ['syn29862066', 'syn27648165'], + sg = sg, + )[1] in warnings assert GenerateError.generate_cross_warning( val_rule = 'matchExactlyOne MockComponent.checkMatchExactlyvalues MockComponent.checkMatchExactlyvalues value', row_num = '[2, 3, 4]', attribute_name='Check Match Exactly values', invalid_entry = '[71738, 98085, 210065]', - ) in warnings + sg = sg, + )[1] in warnings @pytest.mark.rule_combos(reason = 'This introduces a great number of tests covering every possible rule combination that are only necessary on occasion.') From fe6fee29908076b831f24debd68dae8ec87d73bf Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 11:17:52 -0700 Subject: [PATCH 04/36] fix error gen for GE --- schematic/models/GE_Helpers.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/schematic/models/GE_Helpers.py b/schematic/models/GE_Helpers.py index 4d9bf5d7e..4cc5c6919 100644 --- a/schematic/models/GE_Helpers.py +++ b/schematic/models/GE_Helpers.py @@ -416,16 +416,14 @@ def generate_errors( invalid_entry = value, sg = sg, ) - if vr_errors: - errors.append(vr_errors) - if vr_warnings: - warnings.append(vr_warnings) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) elif validation_types[rule.split(" ")[0]]['type']=='regex_validation': expression=result_dict['expectation_config']['kwargs']['regex'] - for row, value in zip(indices,values): - errors.append( - GenerateError.generate_regex_error( + vr_errors, vr_warnings = GenerateError.generate_regex_error( val_rule= rule, reg_expression = expression, row_num = row+2, @@ -434,7 +432,10 @@ def generate_errors( invalid_entry = value, sg = sg, ) - ) + if vr_errors: + errors.append(vr_errors) + if vr_warnings: + warnings.append(vr_warnings) elif validation_types[rule.split(" ")[0]]['type']=='content_validation': vr_errors, vr_warnings = GenerateError.generate_content_error( val_rule = rule, From 6443c4ff476f52a1a0592b49755e9fd5d8e6c0c1 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:01:39 -0700 Subject: [PATCH 05/36] Update test model --- tests/data/example.model.csv | 32 +++++++++---------- tests/data/example.model.jsonld | 54 ++++++++++----------------------- 2 files changed, 32 insertions(+), 54 deletions(-) diff --git a/tests/data/example.model.csv b/tests/data/example.model.csv index b6edd8843..b9bb1e027 100644 --- a/tests/data/example.model.csv +++ b/tests/data/example.model.csv @@ -19,20 +19,20 @@ CSV/TSV,,,Genome Build,,FALSE,ValidValue,,, Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,, Genome FASTA,,,,,TRUE,DataProperty,,, MockComponent,,,"Component, Check List, Check Regex List, Check Regex Single, Check Regex Format, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Recommended, Check Ages, Check Unique, Check Range",,FALSE,DataType,,, -Check List,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,list strict -Check Regex List,,,,,FALSE,DataProperty,,,list strict::regex match [a-f] -Check Regex Single,,,,,FALSE,DataProperty,,,regex search [a-f] -Check Regex Format,,,,,FALSE,DataProperty,,,regex match [a-f] -Check Num,,,,,FALSE,DataProperty,,,num -Check Float,,,,,FALSE,DataProperty,,,float -Check Int,,,,,FALSE,DataProperty,,,int -Check String,,,,,FALSE,DataProperty,,,str -Check URL,,,,,FALSE,DataProperty,,,url -Check Match at Least,,,,,FALSE,DataProperty,,,matchAtLeastOne Patient.PatientID set -Check Match Exactly,,,,,FALSE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set -Check Match at Least values,,,,,FALSE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value -Check Match Exactly values,,,,,FALSE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value +Check List,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict +Check Regex List,,,,,TRUE,DataProperty,,,list strict::regex match [a-f] +Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f] +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f] +Check Num,,,,,TRUE,DataProperty,,,num +Check Float,,,,,TRUE,DataProperty,,,float +Check Int,,,,,TRUE,DataProperty,,,int +Check String,,,,,TRUE,DataProperty,,,str +Check URL,,,,,TRUE,DataProperty,,,url +Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set +Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set +Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value +Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value Check Recommended,,,,,FALSE,DataProperty,,,recommended -Check Ages,,,,,FALSE,DataProperty,,,protectAges -Check Unique,,,,,FALSE,DataProperty,,,unique error -Check Range,,,,,FALSE,DataProperty,,,inRange 50 100 error +Check Ages,,,,,TRUE,DataProperty,,,protectAges +Check Unique,,,,,TRUE,DataProperty,,,unique error +Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error diff --git a/tests/data/example.model.jsonld b/tests/data/example.model.jsonld index 8d468a30e..e64e0f1d1 100644 --- a/tests/data/example.model.jsonld +++ b/tests/data/example.model.jsonld @@ -2188,28 +2188,6 @@ "list strict" ] }, - { - "@id": "bts:ClinicalData", - "@type": "rdfs:Class", - "rdfs:comment": "TBD", - "rdfs:label": "ClinicalData", - "rdfs:subClassOf": [ - { - "@id": "bts:ValidValue" - } - ], - "schema:isPartOf": { - "@id": "http://schema.biothings.io" - }, - "sms:displayName": "Clinical Data", - "sms:required": "sms:false", - "sms:requiresDependency": [ - { - "@id": "bts:FamilyHistory" - } - ], - "sms:validationRules": [] - }, { "@id": "bts:Biospecimen", "@type": "rdfs:Class", @@ -2590,7 +2568,7 @@ } ], "sms:displayName": "Check List", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "list strict" ] @@ -2609,7 +2587,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Regex List", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "list strict", "regex match [a-f]" @@ -2629,7 +2607,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Regex Single", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "regex search [a-f]" ] @@ -2648,7 +2626,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Regex Format", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "regex match [a-f]" ] @@ -2667,7 +2645,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Num", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "num" ] @@ -2686,7 +2664,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Float", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "float" ] @@ -2705,7 +2683,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Int", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "int" ] @@ -2724,7 +2702,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check String", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "str" ] @@ -2743,7 +2721,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check URL", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "url" ] @@ -2762,7 +2740,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Match at Least", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "matchAtLeastOne Patient.PatientID set" ] @@ -2781,7 +2759,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Match Exactly", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "matchExactlyOne MockComponent.checkMatchExactly set" ] @@ -2800,7 +2778,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Match at Least values", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "matchAtLeastOne MockComponent.checkMatchatLeastvalues value" ] @@ -2819,7 +2797,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Match Exactly values", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "matchExactlyOne MockComponent.checkMatchExactlyvalues value" ] @@ -2857,7 +2835,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Ages", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "protectAges" ] @@ -2876,7 +2854,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Unique", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "unique error" ] @@ -2895,7 +2873,7 @@ "@id": "http://schema.biothings.io" }, "sms:displayName": "Check Range", - "sms:required": "sms:false", + "sms:required": "sms:true", "sms:validationRules": [ "inRange 50 100 error" ] From 70c61b2a4e54201c67dc438eeab8384711bca6ce Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:19:53 -0700 Subject: [PATCH 06/36] update GE object to allow for rules with arguments --- schematic/models/GE_Helpers.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/schematic/models/GE_Helpers.py b/schematic/models/GE_Helpers.py index 4cc5c6919..9c0c1cfa0 100644 --- a/schematic/models/GE_Helpers.py +++ b/schematic/models/GE_Helpers.py @@ -160,7 +160,7 @@ def build_expectation_suite(self,): if validation_rules: #iterate through all validation rules for an attribute for rule in validation_rules: - + base_rule = rule.split(" ")[0] #check if rule has an implemented expectation if rule_in_rule_list(rule,self.unimplemented_expectations): @@ -170,8 +170,9 @@ def build_expectation_suite(self,): args["column"] = col args["result_format"] = "COMPLETE" + #Validate num - if rule=='num': + if base_rule=='num': args["mostly"]=1.0 args["type_list"]=['int','int64', 'float', 'float64'] meta={ @@ -183,7 +184,7 @@ def build_expectation_suite(self,): } #Validate float - elif rule=='float': + elif base_rule=='float': args["mostly"]=1.0 args["type_list"]=['float', 'float64'] meta={ @@ -195,7 +196,7 @@ def build_expectation_suite(self,): } #Validate int - elif rule=='int': + elif base_rule=='int': args["mostly"]=1.0 args["type_list"]=['int','int64'] meta={ @@ -207,7 +208,7 @@ def build_expectation_suite(self,): } #Validate string - elif rule=='str': + elif base_rule=='str': args["mostly"]=1.0 args["type_"]='str' meta={ @@ -218,7 +219,7 @@ def build_expectation_suite(self,): "validation_rule": rule } - elif rule.startswith("recommended"): + elif base_rule==("recommended"): args["mostly"]=0.0000000001 args["regex_list"]=['^$'] meta={ @@ -229,7 +230,7 @@ def build_expectation_suite(self,): "validation_rule": rule } - elif rule.startswith("protectAges"): + elif base_rule==("protectAges"): #Function to convert to different age limit formats min_age, max_age = self.get_age_limits() @@ -244,7 +245,7 @@ def build_expectation_suite(self,): "validation_rule": rule } - elif rule.startswith("unique"): + elif base_rule==("unique"): args["mostly"]=1.0 meta={ "notes": { @@ -254,7 +255,7 @@ def build_expectation_suite(self,): "validation_rule": rule } - elif rule.startswith("inRange"): + elif base_rule==("inRange"): args["mostly"]=1.0 args["min_value"]=float(rule.split(" ")[1]) args["max_value"]=float(rule.split(" ")[2]) From 8cdbe217473fd4181618184b0ed802c66bbad2c7 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:21:22 -0700 Subject: [PATCH 07/36] allow error/warning arguments for rest of rules --- schematic/utils/validate_rules_utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/schematic/utils/validate_rules_utils.py b/schematic/utils/validate_rules_utils.py index 6dedeb807..d53bb64d3 100644 --- a/schematic/utils/validate_rules_utils.py +++ b/schematic/utils/validate_rules_utils.py @@ -20,53 +20,53 @@ def validation_rule_info(): ''' rule_dict = { "int": { - 'arguments':(None, None), + 'arguments':(1, 0), 'type': "type_validation", 'complementary_rules': ['inRange',]}, "float": { - 'arguments':(None, None), + 'arguments':(1, 0), 'type': "type_validation", 'complementary_rules': ['inRange',]}, "num": { - 'arguments':(None, None), + 'arguments':(1, 0), 'type': "type_validation", 'complementary_rules': ['inRange',]}, "str": { - 'arguments':(None, None), + 'arguments':(1, 0), 'type': "type_validation", 'complementary_rules': None}, "regex": { - 'arguments':(2, 2), + 'arguments':(3, 2), 'fixed_arg': ['strict'], 'type': "regex_validation", 'complementary_rules': ['list']}, "url" : { - 'arguments':(None, None), + 'arguments':(101, 0), 'type': "url_validation", 'complementary_rules': None}, "list": { - 'arguments':(1, 0), + 'arguments':(2, 0), 'type': "list_validation", 'complementary_rules': ['regex']}, "matchAtLeastOne": { - 'arguments':(2, 2), + 'arguments':(3, 2), 'type': "cross_validation", 'complementary_rules': None}, "matchExactlyOne": { - 'arguments':(2, 2), + 'arguments':(3, 2), 'type': "cross_validation", 'complementary_rules': None}, "recommended": { - 'arguments':(None, None), + 'arguments':(1, 0), 'type': "content_validation", 'complementary_rules': None}, From 2c2c33b2f83a2caa9a0b038b6018d7b4ca02d51c Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:21:56 -0700 Subject: [PATCH 08/36] add default message level to rule dict --- schematic/utils/validate_rules_utils.py | 39 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/schematic/utils/validate_rules_utils.py b/schematic/utils/validate_rules_utils.py index d53bb64d3..3edcc358b 100644 --- a/schematic/utils/validate_rules_utils.py +++ b/schematic/utils/validate_rules_utils.py @@ -22,68 +22,81 @@ def validation_rule_info(): "int": { 'arguments':(1, 0), 'type': "type_validation", - 'complementary_rules': ['inRange',]}, + 'complementary_rules': ['inRange',], + 'default_message_level': 'error'}, "float": { 'arguments':(1, 0), 'type': "type_validation", - 'complementary_rules': ['inRange',]}, + 'complementary_rules': ['inRange',], + 'default_message_level': 'error'}, "num": { 'arguments':(1, 0), 'type': "type_validation", - 'complementary_rules': ['inRange',]}, + 'complementary_rules': ['inRange',], + 'default_message_level': 'error'}, "str": { 'arguments':(1, 0), 'type': "type_validation", - 'complementary_rules': None}, + 'complementary_rules': None, + 'default_message_level': 'error'}, "regex": { 'arguments':(3, 2), 'fixed_arg': ['strict'], 'type': "regex_validation", - 'complementary_rules': ['list']}, + 'complementary_rules': ['list'], + 'default_message_level': 'error'}, "url" : { 'arguments':(101, 0), 'type': "url_validation", - 'complementary_rules': None}, + 'complementary_rules': None, + 'default_message_level': 'error'}, "list": { 'arguments':(2, 0), 'type': "list_validation", - 'complementary_rules': ['regex']}, + 'complementary_rules': ['regex'], + 'default_message_level': 'error'}, "matchAtLeastOne": { 'arguments':(3, 2), 'type': "cross_validation", - 'complementary_rules': None}, + 'complementary_rules': None, + 'default_message_level': 'warning'}, "matchExactlyOne": { 'arguments':(3, 2), 'type': "cross_validation", - 'complementary_rules': None}, + 'complementary_rules': None, + 'default_message_level': 'warning'}, "recommended": { 'arguments':(1, 0), 'type': "content_validation", - 'complementary_rules': None}, + 'complementary_rules': None, + 'default_message_level': 'warning'}, "protectAges": { 'arguments':(1, 0), 'type': "content_validation", - 'complementary_rules': ['inRange',]}, + 'complementary_rules': ['inRange',], + 'default_message_level': 'warning'}, "unique": { 'arguments':(1, 0), 'type': "content_validation", - 'complementary_rules': None}, + 'complementary_rules': None, + 'default_message_level': 'error'}, "inRange": { 'arguments':(3, 2), 'type': "content_validation", - 'complementary_rules': ['int','float','num','protectAges']}, + 'complementary_rules': ['int','float','num','protectAges'], + 'default_message_level': 'error'}, } return rule_dict From 1eaec9cea2dc0f8b6646e60bea6f3d55bef28b66 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:23:52 -0700 Subject: [PATCH 09/36] consistently pass through specified val_rule --- schematic/models/validate_attribute.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 1fafa866f..b286f2e0e 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -47,7 +47,7 @@ def generate_schema_error(row_num: str, attribute_name: str, error_msg: str, sg: def generate_list_error( list_string: str, row_num: str, attribute_name: str, list_error: str, - invalid_entry:str, sg: SchemaGenerator, + invalid_entry:str, sg: SchemaGenerator, val_rule: str, ) -> List[str]: """ Purpose: @@ -67,6 +67,7 @@ def generate_list_error( #Determine which, if any, message to raise raises = GenerateError.get_message_level( + val_rule = val_rule, attribute_name = attribute_name, sg = sg, invalid_entry = invalid_entry, @@ -127,6 +128,7 @@ def generate_regex_error( #Determine which, if any, message to raise raises = GenerateError.get_message_level( + val_rule = val_rule, attribute_name = attribute_name, sg = sg, invalid_entry = invalid_entry, @@ -178,6 +180,7 @@ def generate_type_error( #Determine which, if any, message to raise raises = GenerateError.get_message_level( + val_rule = val_rule, attribute_name = attribute_name, sg = sg, invalid_entry = invalid_entry, @@ -211,7 +214,7 @@ def generate_type_error( def generate_url_error( url: str, url_error: str, row_num: str, attribute_name: str, argument: str, - invalid_entry:str, sg: SchemaGenerator, + invalid_entry:str, sg: SchemaGenerator, val_rule: str, ) -> List[str]: """ Purpose: @@ -242,6 +245,7 @@ def generate_url_error( #Determine which, if any, message to raise raises = GenerateError.get_message_level( + val_rule = val_rule, attribute_name = attribute_name, sg = sg, invalid_entry = invalid_entry, @@ -321,7 +325,7 @@ def generate_cross_warning( #Determine which, if any, message to raise raises = GenerateError.get_message_level( - val_rule=val_rule, + val_rule = val_rule, attribute_name = attribute_name, sg = sg, invalid_entry = invalid_entry, @@ -461,7 +465,7 @@ def get_message_level( sg: SchemaGenerator, attribute_name: str, invalid_entry, - val_rule: str = None, + val_rule: str, ) -> str: """ Purpose: @@ -584,6 +588,7 @@ def list_validation( list_error=list_error, invalid_entry=manifest_col[i], sg = sg, + val_rule = val_rule, ) if vr_errors: errors.append(vr_errors) @@ -652,6 +657,8 @@ def regex_validation( vr_errors, vr_warnings = GenerateError.generate_regex_error( val_rule, reg_expression, + val_rule = val_rule, + reg_expression = reg_expression, row_num=str(i + 2), module_to_call=reg_exp_rules[1], attribute_name=manifest_col.name, @@ -673,6 +680,8 @@ def regex_validation( vr_errors, vr_warnings = GenerateError.generate_regex_error( val_rule, reg_expression, + val_rule = val_rule, + reg_expression = reg_expression, row_num=str(i + 2), module_to_call=reg_exp_rules[1], attribute_name=manifest_col.name, @@ -720,7 +729,7 @@ def type_validation( for i, value in enumerate(manifest_col): if bool(value) and not isinstance(value, specified_type[val_rule]): vr_errors, vr_warnings = GenerateError.generate_type_error( - val_rule, + val_rule = val_rule, row_num=str(i + 2), attribute_name=manifest_col.name, invalid_entry=str(manifest_col[i]), @@ -734,7 +743,7 @@ def type_validation( for i, value in enumerate(manifest_col): if bool(value) and not isinstance(value, specified_type[val_rule]): vr_errors, vr_warnings = GenerateError.generate_type_error( - val_rule, + val_rule = val_rule , row_num=str(i + 2), attribute_name=manifest_col.name, invalid_entry=str(manifest_col[i]), @@ -786,6 +795,7 @@ def url_validation(self, val_rule: str, manifest_col: str, sg: SchemaGenerator,) argument=url_args, invalid_entry=manifest_col[i], sg = sg, + val_rule = val_rule, ) if vr_errors: errors.append(vr_errors) @@ -813,6 +823,7 @@ def url_validation(self, val_rule: str, manifest_col: str, sg: SchemaGenerator,) argument=url_args, invalid_entry=manifest_col[i], sg = sg, + val_rule = val_rule, ) if vr_errors: errors.append(vr_errors) From 8377bee49be8a6016835b6186e92aa7f487f52ea Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:24:25 -0700 Subject: [PATCH 10/36] missed line from prev commit --- schematic/models/validate_attribute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index b286f2e0e..89bf957e2 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -843,6 +843,7 @@ def url_validation(self, val_rule: str, manifest_col: str, sg: SchemaGenerator,) argument=arg, invalid_entry=manifest_col[i], sg = sg, + val_rule = val_rule, ) if vr_errors: errors.append(vr_errors) From 2e1832aa09b01b92c2dc0d0fe78e2c2006c464cc Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:24:52 -0700 Subject: [PATCH 11/36] clean --- schematic/models/validate_attribute.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 89bf957e2..733d9dd5a 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -655,8 +655,6 @@ def regex_validation( re_to_check ): vr_errors, vr_warnings = GenerateError.generate_regex_error( - val_rule, - reg_expression, val_rule = val_rule, reg_expression = reg_expression, row_num=str(i + 2), @@ -678,8 +676,6 @@ def regex_validation( re_to_check ): vr_errors, vr_warnings = GenerateError.generate_regex_error( - val_rule, - reg_expression, val_rule = val_rule, reg_expression = reg_expression, row_num=str(i + 2), @@ -729,7 +725,7 @@ def type_validation( for i, value in enumerate(manifest_col): if bool(value) and not isinstance(value, specified_type[val_rule]): vr_errors, vr_warnings = GenerateError.generate_type_error( - val_rule = val_rule, + val_rule = val_rule , row_num=str(i + 2), attribute_name=manifest_col.name, invalid_entry=str(manifest_col[i]), @@ -743,7 +739,7 @@ def type_validation( for i, value in enumerate(manifest_col): if bool(value) and not isinstance(value, specified_type[val_rule]): vr_errors, vr_warnings = GenerateError.generate_type_error( - val_rule = val_rule , + val_rule = val_rule, row_num=str(i + 2), attribute_name=manifest_col.name, invalid_entry=str(manifest_col[i]), From 1c0164bc7e92437d9fb001d8b13c7e80a712a2e0 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:33:21 -0700 Subject: [PATCH 12/36] fix manifest column name typo --- tests/data/mock_manifests/Valid_Test_Manifest.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/mock_manifests/Valid_Test_Manifest.csv b/tests/data/mock_manifests/Valid_Test_Manifest.csv index 304d9d836..c87a8f6d3 100644 --- a/tests/data/mock_manifests/Valid_Test_Manifest.csv +++ b/tests/data/mock_manifests/Valid_Test_Manifest.csv @@ -1,4 +1,4 @@ -Component,Check List,Check Regex List,Check Regex Single,Check Regex Format, Check Num,Check Float,Check Int,Check String,Check URL,Check Match at Least,Check Match at Least values,Check Match Exactly,Check Match Exactly values,Check Recommended,Check Ages,Check Unique,Check Range +Component,Check List,Check Regex List,Check Regex Single,Check Regex Format,Check Num,Check Float,Check Int,Check String,Check URL,Check Match at Least,Check Match at Least values,Check Match Exactly,Check Match Exactly values,Check Recommended,Check Ages,Check Unique,Check Range MockComponent,"ab,cd","a,c,f",a,a,6,99.65,7,valid,https://www.google.com/,1738,1738,8085,8085,,6571,str1,75 MockComponent,"ab,cd","a,c,f",e,b,71,58.4,3,valid,https://www.google.com/,9965,9965,9965,9965,,6571,str2,80 MockComponent,"ab,cd","b,d,f",b,c,6.5,62.3,2,valid,https://www.google.com/,8085,8085,1738,1738,present,32849,str3,95 From bd5912d13e7f778d4ac88a0c50141a46b9f301a5 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:47:11 -0700 Subject: [PATCH 13/36] update error method calls --- tests/test_validation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_validation.py b/tests/test_validation.py index 0fa6b6be5..82abfac78 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -91,6 +91,7 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): )[0] in errors assert GenerateError.generate_list_error( + val_rule = 'list strict', list_string = 'invalid list values', row_num = '3', attribute_name = 'Check List', @@ -100,6 +101,7 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): )[0] in errors assert GenerateError.generate_list_error( + val_rule = 'list strict', list_string = 'ab cd ef', row_num = '3', attribute_name = 'Check Regex List', @@ -129,6 +131,7 @@ def test_invalid_manifest(self,helpers,sg,metadataModel): )[0] in errors assert GenerateError.generate_url_error( + val_rule = 'url', url = 'http://googlef.com/', url_error = 'invalid_url', row_num = '3', @@ -247,6 +250,7 @@ def test_in_house_validation(self,helpers,sg,metadataModel): )[0] in errors assert GenerateError.generate_list_error( + val_rule = 'list strict', list_string = 'invalid list values', row_num = '3', attribute_name = 'Check List', @@ -256,6 +260,7 @@ def test_in_house_validation(self,helpers,sg,metadataModel): )[0] in errors assert GenerateError.generate_list_error( + val_rule = 'list strict', list_string = 'ab cd ef', row_num = '3', attribute_name = 'Check Regex List', @@ -285,6 +290,7 @@ def test_in_house_validation(self,helpers,sg,metadataModel): )[0] in errors assert GenerateError.generate_url_error( + val_rule = 'url', url = 'http://googlef.com/', url_error = 'invalid_url', row_num = '3', From 39529fd8d2141866da31fce0c408a9754f03efd9 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:48:40 -0700 Subject: [PATCH 14/36] logic to select message level for every rule --- schematic/models/validate_attribute.py | 48 +++++++++++++++++--------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 733d9dd5a..5cd217a26 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -19,6 +19,7 @@ from schematic.store.base import BaseStorage from schematic.schemas.generator import SchemaGenerator from schematic.utils.validate_utils import comma_separated_list_regex +from schematic.utils.validate_rules_utils import validation_rule_info import time from schematic.utils.validate_utils import parse_str_series_to_list @@ -480,29 +481,44 @@ def get_message_level( sg: schemaGenerator object attribute_name: str, attribute being validated Returns: - 'error' or 'warning' + 'error', 'warning' or None """ - if val_rule: - rule_parts = val_rule.split(" ") - - #See if the node is required, if it is and the column is missing then a requirement error will be raised later; no error or waring logged here if recommended and required but missing - if val_rule.startswith('recommended') and sg.is_node_required(node_display_name=attribute_name): - level = None - - #if not required, use the message level specified in the rule - elif rule_parts[-1].lower() == 'error': - level = 'error' + level = None + rule_parts = val_rule.split(" ") + # if node is not required, return None + # if node is recommended and requried, return None + # TODO: recommended and other rules + # if validion type is cross manifest, default to warning but parse + # if validation type is content, default to warning but parse + # if validation rule is other, default to error but parse + + + print(rule_parts) + + + rule_info = validation_rule_info() - elif rule_parts[-1].lower() == 'warning': + if not sg.is_node_required(node_display_name=attribute_name): + # raise warning if recommended but not required + if 'recommended' in val_rule: level = 'warning' - - #if no level specified, the default level is warning + # If not required or recommended raise nothing. + ## Redundant setting to None here again but including for clarity else: - level = 'warning' - else: + level = None + return level + + + # Parse rule for level, set to default if not specified + if rule_parts[-1].lower() == 'error': level = 'error' + elif rule_parts[-1].lower() == 'warning': + level = 'warning' + else: + level = rule_info[rule_parts[0]]['default_message_level'] + print(level) return level class ValidateAttribute(object): From af136edec8f4db2814f47f36f11536528923fed3 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:53:02 -0700 Subject: [PATCH 15/36] switch to warning for non-required erroneous entries --- schematic/models/validate_attribute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 5cd217a26..e5664b757 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -506,7 +506,7 @@ def get_message_level( # If not required or recommended raise nothing. ## Redundant setting to None here again but including for clarity else: - level = None + level = 'warning' return level From 1f3899a0d6e042e892947c02a8b4ea414cd9ed92 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 12:55:38 -0700 Subject: [PATCH 16/36] remove invalid entry input --- schematic/models/validate_attribute.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index e5664b757..8e8c1ae76 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -71,7 +71,6 @@ def generate_list_error( val_rule = val_rule, attribute_name = attribute_name, sg = sg, - invalid_entry = invalid_entry, ) #if a message needs to be raised, get the approrpiate function to do so @@ -132,7 +131,6 @@ def generate_regex_error( val_rule = val_rule, attribute_name = attribute_name, sg = sg, - invalid_entry = invalid_entry, ) #if a message needs to be raised, get the approrpiate function to do so @@ -184,7 +182,6 @@ def generate_type_error( val_rule = val_rule, attribute_name = attribute_name, sg = sg, - invalid_entry = invalid_entry, ) #if a message needs to be raised, get the approrpiate function to do so @@ -249,7 +246,6 @@ def generate_url_error( val_rule = val_rule, attribute_name = attribute_name, sg = sg, - invalid_entry = invalid_entry, ) #if a message needs to be raised, get the approrpiate function to do so @@ -329,7 +325,6 @@ def generate_cross_warning( val_rule = val_rule, attribute_name = attribute_name, sg = sg, - invalid_entry = invalid_entry, ) #if a message needs to be raised, get the approrpiate function to do so @@ -409,7 +404,6 @@ def generate_content_error( val_rule=val_rule, attribute_name = attribute_name, sg = sg, - invalid_entry = error_val, ) #if a message needs to be raised, get the approrpiate function to do so @@ -465,7 +459,6 @@ def generate_content_error( def get_message_level( sg: SchemaGenerator, attribute_name: str, - invalid_entry, val_rule: str, ) -> str: """ From c6488585d910fe32c523473ba6b65ffa02534f84 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 13:01:58 -0700 Subject: [PATCH 17/36] docstring, comments, cleaning --- schematic/models/validate_attribute.py | 28 +++++++++----------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 8e8c1ae76..4a2b9317e 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -464,44 +464,35 @@ def get_message_level( """ Purpose: Determine whether an error or warning message should be logged and displayed + + if node is not required, + return warning + if node is recommended and requried, + return None + for other rules, parse possible, if not use default specified in validation_rule_info - Types of error/warning included: - - recommended - Raised when an attribute is empty and recommended but not required. - - unique - Raised when attribute values are not unique. - - protectAges - Raised when an attribute contains ages below 18YO or over 90YO that should be censored. Input: val_rule: str, defined in the schema. sg: schemaGenerator object attribute_name: str, attribute being validated Returns: 'error', 'warning' or None + # TODO: recommended and other rules """ level = None rule_parts = val_rule.split(" ") - # if node is not required, return None - # if node is recommended and requried, return None - # TODO: recommended and other rules - # if validion type is cross manifest, default to warning but parse - # if validation type is content, default to warning but parse - # if validation rule is other, default to error but parse - - - print(rule_parts) - - rule_info = validation_rule_info() if not sg.is_node_required(node_display_name=attribute_name): # raise warning if recommended but not required if 'recommended' in val_rule: level = 'warning' - # If not required or recommended raise nothing. - ## Redundant setting to None here again but including for clarity + # If not required or recommended raise warnings to notify else: level = 'warning' return level - + # Parse rule for level, set to default if not specified if rule_parts[-1].lower() == 'error': @@ -511,7 +502,6 @@ def get_message_level( else: level = rule_info[rule_parts[0]]['default_message_level'] - print(level) return level class ValidateAttribute(object): From fff8e5cb99f5b4cd85b4a1f733cb51359e678cc8 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Tue, 8 Nov 2022 15:01:47 -0700 Subject: [PATCH 18/36] raise nothing if required and not recommended --- schematic/models/validate_attribute.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 4a2b9317e..192310145 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -492,6 +492,8 @@ def get_message_level( else: level = 'warning' return level + elif sg.is_node_required(node_display_name=attribute_name) and 'recommended' in val_rule: + level = None # Parse rule for level, set to default if not specified From 9bbea7fc4a58a26ce0df529ff23022642bf2416e Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Wed, 9 Nov 2022 14:43:50 -0700 Subject: [PATCH 19/36] make JSONSchema error generator conform to standard --- schematic/models/validate_attribute.py | 30 ++++++++++++++++++++++---- schematic/models/validate_manifest.py | 15 ++++++------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 192310145..1eb43b0aa 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) class GenerateError: - def generate_schema_error(row_num: str, attribute_name: str, error_msg: str, sg: SchemaGenerator,)-> List[str]: + def generate_schema_error(row_num: str, attribute_name: str, error_msg: str, invalid_entry: str, sg: SchemaGenerator,)-> List[str]: ''' Purpose: Process error messages generated from schema Input: @@ -35,6 +35,22 @@ def generate_schema_error(row_num: str, attribute_name: str, error_msg: str, sg: - attribute_name: the attribute the error occurred on. - error_msg: Error message ''' + error_list = [] + warning_list = [] + + #Determine which, if any, message to raise + raises = GenerateError.get_message_level( + val_rule = 'schema', + attribute_name = attribute_name, + sg = sg, + ) + + #if a message needs to be raised, get the approrpiate function to do so + if raises: + logLevel = getattr(logging,raises) + else: + return error_list, warning_list + error_col = attribute_name # Attribute name error_row = row_num # index row of the manifest where the error presented. @@ -43,8 +59,14 @@ def generate_schema_error(row_num: str, attribute_name: str, error_msg: str, sg: arg_error_string = ( f"For the attribute '{error_col}', on row {error_row}, {error_message}." ) - logging.error(arg_error_string) - return [error_row, error_col, error_message] + logLevel(arg_error_string) + + if raises == 'error': + error_list = [error_row, error_col, error_message, invalid_entry] + elif raises == 'warning': + warning_list = [error_row, error_col, error_message, invalid_entry] + + return error_list, warning_list def generate_list_error( list_string: str, row_num: str, attribute_name: str, list_error: str, @@ -497,7 +519,7 @@ def get_message_level( # Parse rule for level, set to default if not specified - if rule_parts[-1].lower() == 'error': + if rule_parts[-1].lower() == 'error' or rule_parts[0] == 'schema': level = 'error' elif rule_parts[-1].lower() == 'warning': level = 'warning' diff --git a/schematic/models/validate_manifest.py b/schematic/models/validate_manifest.py index 1286e4216..c79931b7a 100644 --- a/schematic/models/validate_manifest.py +++ b/schematic/models/validate_manifest.py @@ -238,15 +238,12 @@ def validate_manifest_values(self, manifest, jsonSchema, sg errorMsg = error.message[0:500] errorVal = error.instance if len(error.path) > 0 else "Wrong schema" - errors.append([errorRow, errorCol, errorMsg, errorVal]) - col_attr[errorCol] = errorColName - if errors: - for error in errors: - row_num = error[0] - col_index = error[1] - attr_name = col_attr[col_index] - errorMsg = error[2] - GenerateError.generate_schema_error(row_num = row_num, attribute_name = attr_name, error_msg = errorMsg, sg = sg) + val_errors, val_warnings = GenerateError.generate_schema_error(row_num = errorRow, attribute_name = errorColName, error_msg = errorMsg, invalid_entry = errorVal, sg = sg) + + if val_errors: + errors.append(val_errors) + if val_warnings: + warnings.append(val_warnings) return errors, warnings From b3164d4af419b1dbc0e57f02c1c6c497f7017eaf Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Wed, 9 Nov 2022 14:45:03 -0700 Subject: [PATCH 20/36] clean up imports runcombos --- schematic/models/validate_attribute.py | 28 ++++++++++++-------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py index 1eb43b0aa..53b8c1916 100644 --- a/schematic/models/validate_attribute.py +++ b/schematic/models/validate_attribute.py @@ -1,28 +1,26 @@ import builtins -from jsonschema import ValidationError import logging - -import numpy as np -import pandas as pd import re import sys +import time from os import getenv - # allows specifying explicit variable types -from typing import Any, Dict, Optional, Text, List -from urllib.parse import urlparse -from urllib.request import urlopen, OpenerDirector, HTTPDefaultErrorHandler -from urllib.request import Request +from typing import Any, Dict, List, Optional, Text from urllib import error +from urllib.parse import urlparse +from urllib.request import (HTTPDefaultErrorHandler, OpenerDirector, Request, + urlopen) + +import numpy as np +import pandas as pd +from jsonschema import ValidationError -from schematic.store.synapse import SynapseStorage -from schematic.store.base import BaseStorage from schematic.schemas.generator import SchemaGenerator -from schematic.utils.validate_utils import comma_separated_list_regex +from schematic.store.base import BaseStorage +from schematic.store.synapse import SynapseStorage from schematic.utils.validate_rules_utils import validation_rule_info -import time - -from schematic.utils.validate_utils import parse_str_series_to_list +from schematic.utils.validate_utils import (comma_separated_list_regex, + parse_str_series_to_list) logger = logging.getLogger(__name__) From 89382dc9d979d779ca48a0e95a09086a3c5fa897 Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Thu, 17 Nov 2022 09:52:47 -0700 Subject: [PATCH 21/36] change synapseutils import, add new function --- schematic/store/synapse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 14be6c820..a42cc845b 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -14,7 +14,6 @@ import pandas as pd import re import synapseclient -import synapseutils from time import sleep from synapseclient import ( @@ -33,7 +32,8 @@ from synapseclient.table import build_table from synapseclient.annotations import from_synapse_annotations from synapseclient.core.exceptions import SynapseHTTPError, SynapseAuthenticationError, SynapseUnmetAccessRestrictions -import synapseutils +from synapseutils import walk +from synapseutils.copy_functions import changeFileMetaData import uuid @@ -301,7 +301,7 @@ def getFilesInStorageDataset( """ # select all files within a given storage dataset folder (top level folder in a Synapse storage project or folder marked with contentType = 'dataset') - walked_path = synapseutils.walk(self.syn, datasetId) + walked_path = walk(self.syn, datasetId) file_list = [] From a9e5fed1cc37ed1162d647877649c3af1c57633a Mon Sep 17 00:00:00 2001 From: Gianna Jordan <61707471+GiaJordan@users.noreply.github.com> Date: Thu, 17 Nov 2022 10:00:18 -0700 Subject: [PATCH 22/36] ensure manifest filename and downloadAs name match --- schematic/store/synapse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index a42cc845b..0f5c1a5b4 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -821,6 +821,7 @@ def uplodad_manifest_file(self, manifest, metadataManifestPath, datasetId, restr ) manifest_synapse_file_id = self.syn.store(manifestSynapseFile, isRestricted = restrict_manifest).id + changeFileMetaData(syn = self.syn, entity = manifest_synapse_file_id, downloadAs = file_name_new) return manifest_synapse_file_id From 49a949137386f027daebea19c3288dbc820fc2dd Mon Sep 17 00:00:00 2001 From: Milen Nikolov Date: Thu, 17 Nov 2022 15:27:47 -0800 Subject: [PATCH 23/36] Blacklist hyphen in annotation key names to avoid a Synapse error when setting an annotation key containing a hyphen --- schematic/store/synapse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 14be6c820..259582141 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -831,7 +831,7 @@ def format_row_annotations(self, se, sg, row, entityId, useSchemaLabel, hideBlan # this could create a divergence between manifest column and annotations. this should be ok for most use cases. # columns with special characters are outside of the schema metadataSyn = {} - blacklist_chars = ['(', ')', '.', ' '] + blacklist_chars = ['(', ')', '.', ' ', '-'] for k, v in row.to_dict().items(): From 9577047e48b3200b5aed83aac8d3c39a876cd5fb Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 11:15:12 -0500 Subject: [PATCH 24/36] add example test manifest --- tests/data/mock_manifests/Example_Test_BulkRNAseq.csv | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/data/mock_manifests/Example_Test_BulkRNAseq.csv diff --git a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv b/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv new file mode 100644 index 000000000..65c8cfb4f --- /dev/null +++ b/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv @@ -0,0 +1,3 @@ +Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Uuid,entityId +1,1,BAM,BulkRNA-seqAssay,GRCh37,,26937891-988a-41ef-810f-45eeb93bea15, +2,2,BAM,BulkRNA-seqAssay,GRCh38,,94ffefad-851e-4b02-b51d-0e0ca841ea9f, From 37bf22b95916d9662d23bb757bf02202e59d9ecf Mon Sep 17 00:00:00 2001 From: Milen Nikolov Date: Fri, 18 Nov 2022 08:42:06 -0800 Subject: [PATCH 25/36] Blacklist hyphen in table column names names to match annotation keys character convention --- schematic/store/synapse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 259582141..40ee95b6a 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -745,7 +745,7 @@ def upload_format_manifest_table(self, se, manifest, datasetId, table_name, rest # Rename the manifest columns to display names to match fileview table_info = self.get_table_info(datasetId) - blacklist_chars = ['(', ')', '.', ' '] + blacklist_chars = ['(', ')', '.', ' ', '-'] manifest_columns = manifest.columns.tolist() table_manifest=deepcopy(manifest) From df2ab2f0cadaff3f16d35093ab66e65c25eb86ff Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 14:10:14 -0500 Subject: [PATCH 26/36] add endpoint for checking if a node is required --- api/routes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/api/routes.py b/api/routes.py index ddc326526..24164c6da 100644 --- a/api/routes.py +++ b/api/routes.py @@ -673,3 +673,20 @@ def get_node_range( gen = SchemaGenerator(path_to_json_ld=schema_url) node_range = gen.get_node_range(node_label, return_display_names) return node_range + +def get_if_node_required(schema_url: str, node_display_name: str) -> bool: + """Check if the node is required + + Args: + schema_url (str): Data Model URL + node_display_name (str): display name + + Returns: + a boolean that indicates if a node is required + """ + gen = SchemaGenerator(path_to_json_ld=schema_url) + is_required = gen.is_node_required(node_display_name) + + return is_required + + From 5d36e945f6edccd2431e570829011409ead4433f Mon Sep 17 00:00:00 2001 From: Lingling <55448354+linglp@users.noreply.github.com> Date: Fri, 18 Nov 2022 14:12:21 -0500 Subject: [PATCH 27/36] delete example file --- tests/data/mock_manifests/Example_Test_BulkRNAseq.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/data/mock_manifests/Example_Test_BulkRNAseq.csv diff --git a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv b/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv deleted file mode 100644 index 65c8cfb4f..000000000 --- a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv +++ /dev/null @@ -1,3 +0,0 @@ -Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Uuid,entityId -1,1,BAM,BulkRNA-seqAssay,GRCh37,,26937891-988a-41ef-810f-45eeb93bea15, -2,2,BAM,BulkRNA-seqAssay,GRCh38,,94ffefad-851e-4b02-b51d-0e0ca841ea9f, From b9d838f4b382ad37740149a5a0587134a6acb1ef Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 14:12:48 -0500 Subject: [PATCH 28/36] add endpoint to yaml spec --- api/openapi/api.yaml | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/api/openapi/api.yaml b/api/openapi/api.yaml index bbd4845c1..30b181a29 100644 --- a/api/openapi/api.yaml +++ b/api/openapi/api.yaml @@ -747,8 +747,39 @@ paths: description: Check schematic log. tags: - Schema Operation - - + /schemas/is_node_required: + get: + summary: Check if a node is required or not + description: Check if a node is required or not + operationId: api.routes.get_if_node_required + parameters: + - in: query + name: schema_url + schema: + type: string + description: Data Model URL + example: >- + https://raw.githubusercontent.com/Sage-Bionetworks/schematic/develop/tests/data/example.model.jsonld + required: true + - in: query + name: node_display_name + schema: + type: string + nullable: false + description: Display label of a node + example: Patient + required: true + responses: + "200": + description: csv file path or json + "500": + description: Check schematic log. + tags: + - Schema Operation + + + + /explorer/get_node_dependencies: get: summary: Get the immediate dependencies that are related to a given source node From 725163602b312edb421a803af1c1067dabeac99f Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 14:22:33 -0500 Subject: [PATCH 29/36] change default to familyhistory --- api/openapi/api.yaml | 4 ++-- api/routes.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/api/openapi/api.yaml b/api/openapi/api.yaml index 30b181a29..4caba91ab 100644 --- a/api/openapi/api.yaml +++ b/api/openapi/api.yaml @@ -767,11 +767,11 @@ paths: type: string nullable: false description: Display label of a node - example: Patient + example: FamilyHistory required: true responses: "200": - description: csv file path or json + description: return a boolean "500": description: Check schematic log. tags: diff --git a/api/routes.py b/api/routes.py index 24164c6da..af6fdcbea 100644 --- a/api/routes.py +++ b/api/routes.py @@ -681,8 +681,9 @@ def get_if_node_required(schema_url: str, node_display_name: str) -> bool: schema_url (str): Data Model URL node_display_name (str): display name - Returns: - a boolean that indicates if a node is required + Returns: + True: If the given node is a "required" node. + False: If the given node is not a "required" (i.e., an "optional") node. """ gen = SchemaGenerator(path_to_json_ld=schema_url) is_required = gen.is_node_required(node_display_name) From 3e58a7f37d6837b1e93b08b54774f35c2386feab Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 14:22:45 -0500 Subject: [PATCH 30/36] add tests --- tests/test_api.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index 28ed4b2dc..51ad55c10 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -199,7 +199,16 @@ def test_get_schema(self, client, data_model_jsonld): if os.path.exists(response_dt): os.remove(response_dt) + def test_if_node_required(test, client, data_model_jsonld): + params = { + "schema_url": data_model_jsonld, + "node_display_name": "FamilyHistory" + } + response = client.get("http://localhost:3001/v1/schemas/is_node_required", query_string = params) + response_dta = json.loads(response.data) + assert response.status_code == 200 + assert response_dta == True @pytest.mark.schematic_api class TestSchemaGeneratorOperation: From 92103270f9e31581f0291f9e75f6f0905aec41ec Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 15:59:19 -0500 Subject: [PATCH 31/36] auto send slack message when release --- .github/workflows/publish.yml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f2818d0ed..8d5b854f8 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -92,9 +92,28 @@ jobs: # publish to pypi #---------------------------------------------- - name: Publish package to Pypi + id: publish-to-pypi if: steps.check-tag.outputs.match == 'true' env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} PYPI_USERNAME: __token__ run: | - poetry publish --build --username $PYPI_USERNAME --password $PYPI_TOKEN \ No newline at end of file + poetry publish --build --username $PYPI_USERNAME --password $PYPI_TOKEN + + #---------------------------------------------- + # post a message to slack + #---------------------------------------------- + + - name: Post to a Slack channel + if: steps.publish-to-pypi.outcome == 'success' + id: slack + uses: slackapi/slack-github-action@v1.23.0 + with: + # Slack channel id, channel name, or user id to post message. + # See also: https://api.slack.com/methods/chat.postMessage#channels + # You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs. + channel-id: 'C01HSSMPQBG,C01ANC02U59' + # For posting a simple plain text message + slack-message: "Schematic has just been released. Check out new version: ${{ github.ref_name }}" + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} \ No newline at end of file From fc001619374ef61d5d351b1fe7f91062524de303 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 18 Nov 2022 16:01:42 -0500 Subject: [PATCH 32/36] add comment --- .github/workflows/publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8d5b854f8..e0658f7d5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -112,6 +112,7 @@ jobs: # Slack channel id, channel name, or user id to post message. # See also: https://api.slack.com/methods/chat.postMessage#channels # You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs. + # ibc-fair-data channel and data-curator-schematic channel channel-id: 'C01HSSMPQBG,C01ANC02U59' # For posting a simple plain text message slack-message: "Schematic has just been released. Check out new version: ${{ github.ref_name }}" From b3484735b33e731eafaffa2a9e40d736fea95b4b Mon Sep 17 00:00:00 2001 From: Lingling <55448354+linglp@users.noreply.github.com> Date: Fri, 18 Nov 2022 17:27:46 -0500 Subject: [PATCH 33/36] delete example file --- tests/data/mock_manifests/Example_Test_BulkRNAseq.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/data/mock_manifests/Example_Test_BulkRNAseq.csv diff --git a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv b/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv deleted file mode 100644 index 65c8cfb4f..000000000 --- a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv +++ /dev/null @@ -1,3 +0,0 @@ -Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Uuid,entityId -1,1,BAM,BulkRNA-seqAssay,GRCh37,,26937891-988a-41ef-810f-45eeb93bea15, -2,2,BAM,BulkRNA-seqAssay,GRCh38,,94ffefad-851e-4b02-b51d-0e0ca841ea9f, From 729ceb3fa55ca8ed09c60002c01be9ff4b8944e7 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 22 Nov 2022 11:28:54 -0500 Subject: [PATCH 34/36] update file path and provide instructions related to use jupyter notebook to run cli --- schematic_cli_examples.ipynb | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/schematic_cli_examples.ipynb b/schematic_cli_examples.ipynb index 3079fdfc0..165f9a84c 100644 --- a/schematic_cli_examples.ipynb +++ b/schematic_cli_examples.ipynb @@ -13,7 +13,14 @@ "id": "a464face", "metadata": {}, "source": [ - "To read full documentation, please visit here: https://sage-schematic.readthedocs.io/en/develop/cli_reference.html" + "To read full documentation, please visit here: https://sage-schematic.readthedocs.io/en/develop/cli_reference.html\n", + "\n", + "To run the CLI commands in jupyter notebook, please add a code cell and add \"%%bash\" on top. For example: \n", + "\n", + "```\n", + "%%bash\n", + "schematic init --config ~/path/to/config.yml --auth service_account\n", + "```" ] }, { @@ -23,10 +30,10 @@ "source": [ "### Initalize schematic \n", "For token which uses OAuth: \n", - "* `schematic init --config config.yml --auth token`\n", + "* `schematic init --config ~/path/to/config.yml --auth token`\n", "\n", "For service account: \n", - "* `schematic init --config config.yml --auth service_account`" + "* `schematic init --config ~/path/to/config.yml --auth service_account`" ] }, { @@ -63,22 +70,22 @@ "tags": [] }, "source": [ - "Get an empty manifest as a CSV\n", - "- `schematic manifest --config config.yml get`\n", + "Get an empty manifest as a CSV by using the example data model\n", + "- `schematic manifest --config ~/path/to/config.yml get`\n", "\n", - "Get an empty manifest as a google sheet\n", - "- `schematic manifest --config config.yml get --sheet_url`\n", + "Get an empty manifest as a google sheet by using the example data model\n", + "- `schematic manifest --config ~/path/to/config.yml get --sheet_url`\n", "\n", "Get an existing manifest (syn28397250) and download it as a csv file (without customizing title)\n", - "- `schematic manifest --config config.yml get --dataset_id syn28268700`\n", + "- `schematic manifest --config ~/path/to/config.yml get --dataset_id syn28268700`\n", "\n", "*Note*: the dataset_id here has to be the parent folder syn id\n", "\n", "Get an existing manifest and download it as \"test.xlsx\"\n", - "- `schematic manifest --config config.yml get --output_xlsx 'test.xlsx' --dataset_id syn28268700`\n", + "- `schematic manifest --config ~/path/to/config.yml get --output_xlsx 'test.xlsx' --dataset_id syn28268700`\n", "\n", "Get an existing manifest and download it as \"test.csv\"\n", - "- `schematic manifest --config config.yml get --output_csv 'test.csv' --dataset_id syn28268700`\n", + "- `schematic manifest --config ~/path/to/config.yml get --output_csv 'test.csv' --dataset_id syn28268700`\n", "\n" ] }, @@ -97,7 +104,7 @@ "source": [ "Validate a manifest\n", "\n", - "* `schematic model --config config.yml validate --manifest_path tests/data/mock_manifests/Valid_Test_Manifest.csv --data_type MockComponent`" + "* `schematic model --config ~/path/to/config.yml validate --manifest_path tests/data/mock_manifests/Valid_Test_Manifest.csv --data_type MockComponent`" ] }, { @@ -115,7 +122,7 @@ "source": [ "Submit a manifest as a table \n", "\n", - "* `schematic model -c config.yml submit -mp /Users/lpeng/Downloads/Patient-Metadata-Manifest.csv -d syn23643250 -vc Patient -mrt table`\n" + "* `schematic model -c ~/path/to/config.yml submit -mp /path/to/Patient-Metadata-Manifest.csv -d syn23643250 -vc Patient -mrt table`\n" ] } ], From 3ba60359e025d2b2297212e8b3540d3ff5c8ee0e Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 22 Nov 2022 11:33:12 -0500 Subject: [PATCH 35/36] remind users to run schematic init --- schematic_cli_examples.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/schematic_cli_examples.ipynb b/schematic_cli_examples.ipynb index 165f9a84c..9328fe881 100644 --- a/schematic_cli_examples.ipynb +++ b/schematic_cli_examples.ipynb @@ -103,6 +103,7 @@ "metadata": {}, "source": [ "Validate a manifest\n", + "* For first time users, navigate to the root of schematic repo and run `great_expectation init`\n", "\n", "* `schematic model --config ~/path/to/config.yml validate --manifest_path tests/data/mock_manifests/Valid_Test_Manifest.csv --data_type MockComponent`" ] From 5c663f6a45bc5c1ef53e726785b63d4374241c56 Mon Sep 17 00:00:00 2001 From: Lingling <55448354+linglp@users.noreply.github.com> Date: Tue, 22 Nov 2022 11:36:11 -0500 Subject: [PATCH 36/36] delete extra file delete extra file --- tests/data/mock_manifests/Example_Test_BulkRNAseq.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tests/data/mock_manifests/Example_Test_BulkRNAseq.csv diff --git a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv b/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv deleted file mode 100644 index 65c8cfb4f..000000000 --- a/tests/data/mock_manifests/Example_Test_BulkRNAseq.csv +++ /dev/null @@ -1,3 +0,0 @@ -Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Uuid,entityId -1,1,BAM,BulkRNA-seqAssay,GRCh37,,26937891-988a-41ef-810f-45eeb93bea15, -2,2,BAM,BulkRNA-seqAssay,GRCh38,,94ffefad-851e-4b02-b51d-0e0ca841ea9f,