Skip to content

Commit

Permalink
Merge pull request #1032 from Sage-Bionetworks/develop
Browse files Browse the repository at this point in the history
Release 22.11.3
  • Loading branch information
linglp authored Nov 22, 2022
2 parents 322cf42 + 1887b0a commit ad78fd4
Show file tree
Hide file tree
Showing 14 changed files with 604 additions and 290 deletions.
22 changes: 21 additions & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,29 @@ jobs:
# publish to pypi
#----------------------------------------------
- name: Publish package to Pypi
id: publish-to-pypi
if: steps.check-tag.outputs.match == 'true'
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
PYPI_USERNAME: __token__
run: |
poetry publish --build --username $PYPI_USERNAME --password $PYPI_TOKEN
poetry publish --build --username $PYPI_USERNAME --password $PYPI_TOKEN
#----------------------------------------------
# post a message to slack
#----------------------------------------------

- name: Post to a Slack channel
if: steps.publish-to-pypi.outcome == 'success'
id: slack
uses: slackapi/[email protected]
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
# You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs.
# ibc-fair-data channel and data-curator-schematic channel
channel-id: 'C01HSSMPQBG,C01ANC02U59'
# For posting a simple plain text message
slack-message: "Schematic has just been released. Check out new version: ${{ github.ref_name }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
35 changes: 33 additions & 2 deletions api/openapi/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -747,8 +747,39 @@ paths:
description: Check schematic log.
tags:
- Schema Operation


/schemas/is_node_required:
get:
summary: Check if a node is required or not
description: Check if a node is required or not
operationId: api.routes.get_if_node_required
parameters:
- in: query
name: schema_url
schema:
type: string
description: Data Model URL
example: >-
https://raw.githubusercontent.com/Sage-Bionetworks/schematic/develop/tests/data/example.model.jsonld
required: true
- in: query
name: node_display_name
schema:
type: string
nullable: false
description: Display label of a node
example: FamilyHistory
required: true
responses:
"200":
description: return a boolean
"500":
description: Check schematic log.
tags:
- Schema Operation




/explorer/get_node_dependencies:
get:
summary: Get the immediate dependencies that are related to a given source node
Expand Down
18 changes: 18 additions & 0 deletions api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,3 +673,21 @@ def get_node_range(
gen = SchemaGenerator(path_to_json_ld=schema_url)
node_range = gen.get_node_range(node_label, return_display_names)
return node_range

def get_if_node_required(schema_url: str, node_display_name: str) -> bool:
"""Check if the node is required
Args:
schema_url (str): Data Model URL
node_display_name (str): display name
Returns:
True: If the given node is a "required" node.
False: If the given node is not a "required" (i.e., an "optional") node.
"""
gen = SchemaGenerator(path_to_json_ld=schema_url)
is_required = gen.is_node_required(node_display_name)

return is_required


56 changes: 32 additions & 24 deletions schematic/models/GE_Helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from great_expectations.data_context.types.resource_identifiers import ExpectationSuiteIdentifier

from schematic.models.validate_attribute import GenerateError
from schematic.schemas.generator import SchemaGenerator
from schematic.utils.validate_utils import rule_in_rule_list

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -159,7 +160,7 @@ def build_expectation_suite(self,):
if validation_rules:
#iterate through all validation rules for an attribute
for rule in validation_rules:

base_rule = rule.split(" ")[0]

#check if rule has an implemented expectation
if rule_in_rule_list(rule,self.unimplemented_expectations):
Expand All @@ -169,8 +170,9 @@ def build_expectation_suite(self,):
args["column"] = col
args["result_format"] = "COMPLETE"


#Validate num
if rule=='num':
if base_rule=='num':
args["mostly"]=1.0
args["type_list"]=['int','int64', 'float', 'float64']
meta={
Expand All @@ -182,7 +184,7 @@ def build_expectation_suite(self,):
}

#Validate float
elif rule=='float':
elif base_rule=='float':
args["mostly"]=1.0
args["type_list"]=['float', 'float64']
meta={
Expand All @@ -194,7 +196,7 @@ def build_expectation_suite(self,):
}

#Validate int
elif rule=='int':
elif base_rule=='int':
args["mostly"]=1.0
args["type_list"]=['int','int64']
meta={
Expand All @@ -206,7 +208,7 @@ def build_expectation_suite(self,):
}

#Validate string
elif rule=='str':
elif base_rule=='str':
args["mostly"]=1.0
args["type_"]='str'
meta={
Expand All @@ -217,7 +219,7 @@ def build_expectation_suite(self,):
"validation_rule": rule
}

elif rule.startswith("recommended"):
elif base_rule==("recommended"):
args["mostly"]=0.0000000001
args["regex_list"]=['^$']
meta={
Expand All @@ -228,7 +230,7 @@ def build_expectation_suite(self,):
"validation_rule": rule
}

elif rule.startswith("protectAges"):
elif base_rule==("protectAges"):
#Function to convert to different age limit formats
min_age, max_age = self.get_age_limits()

Expand All @@ -243,7 +245,7 @@ def build_expectation_suite(self,):
"validation_rule": rule
}

elif rule.startswith("unique"):
elif base_rule==("unique"):
args["mostly"]=1.0
meta={
"notes": {
Expand All @@ -253,7 +255,7 @@ def build_expectation_suite(self,):
"validation_rule": rule
}

elif rule.startswith("inRange"):
elif base_rule==("inRange"):
args["mostly"]=1.0
args["min_value"]=float(rule.split(" ")[1])
args["max_value"]=float(rule.split(" ")[2])
Expand Down Expand Up @@ -350,7 +352,8 @@ def generate_errors(
validation_results: Dict,
validation_types: Dict,
errors: List,
warnings: List
warnings: List,
sg: SchemaGenerator,
):
"""
Purpose:
Expand Down Expand Up @@ -407,45 +410,50 @@ def generate_errors(
#call functions to generate error messages and add to error list
if validation_types[rule.split(" ")[0]]['type']=='type_validation':
for row, value in zip(indices,values):
errors.append(
GenerateError.generate_type_error(
vr_errors, vr_warnings = GenerateError.generate_type_error(
val_rule = rule,
row_num = row+2,
attribute_name = errColumn,
invalid_entry = value,
sg = sg,
)
)
if vr_errors:
errors.append(vr_errors)
if vr_warnings:
warnings.append(vr_warnings)
elif validation_types[rule.split(" ")[0]]['type']=='regex_validation':
expression=result_dict['expectation_config']['kwargs']['regex']

for row, value in zip(indices,values):
errors.append(
GenerateError.generate_regex_error(
vr_errors, vr_warnings = GenerateError.generate_regex_error(
val_rule= rule,
reg_expression = expression,
row_num = row+2,
module_to_call = 'match',
attribute_name = errColumn,
invalid_entry = value,
sg = sg,
)
)
if vr_errors:
errors.append(vr_errors)
if vr_warnings:
warnings.append(vr_warnings)
elif validation_types[rule.split(" ")[0]]['type']=='content_validation':
content_errors, content_warnings = GenerateError.generate_content_error(
vr_errors, vr_warnings = GenerateError.generate_content_error(
val_rule = rule,
attribute_name = errColumn,
row_num = list(np.array(indices)+2),
error_val = values,
sg = self.sg
)
if content_errors:
errors.append(content_errors)
if vr_errors:
errors.append(vr_errors)
if rule.startswith('protectAges'):
self.censor_ages(content_errors,errColumn)
self.censor_ages(vr_errors,errColumn)
pass
elif content_warnings:
warnings.append(content_warnings)
if vr_warnings:
warnings.append(vr_warnings)
if rule.startswith('protectAges'):
self.censor_ages(content_warnings,errColumn)
self.censor_ages(vr_warnings,errColumn)
pass

return errors, warnings
Expand Down
Loading

0 comments on commit ad78fd4

Please sign in to comment.