Merge pull request #1032 from Sage-Bionetworks/develop

Release 22.11.3
Sage-Bionetworks · Nov 22, 2022 · ad78fd4 · ad78fd4
2 parents 322cf42 + 1887b0a
commit ad78fd4
Show file tree

Hide file tree

Showing 14 changed files with 604 additions and 290 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -92,9 +92,29 @@ jobs:
       #    publish to pypi
       #----------------------------------------------  
       - name: Publish package to Pypi
+        id: publish-to-pypi
         if: steps.check-tag.outputs.match == 'true'
         env:
           PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
           PYPI_USERNAME: __token__
         run: |
-          poetry publish --build --username $PYPI_USERNAME --password $PYPI_TOKEN
+          poetry publish --build --username $PYPI_USERNAME --password $PYPI_TOKEN
+
+      #----------------------------------------------
+      #    post a message to slack
+      #----------------------------------------------  
+
+      - name: Post to a Slack channel
+        if: steps.publish-to-pypi.outcome == 'success'
+        id: slack
+        uses: slackapi/[email protected]
+        with:
+          # Slack channel id, channel name, or user id to post message.
+          # See also: https://api.slack.com/methods/chat.postMessage#channels
+          # You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs.
+          # ibc-fair-data channel and data-curator-schematic channel
+          channel-id: 'C01HSSMPQBG,C01ANC02U59'
+          # For posting a simple plain text message
+          slack-message: "Schematic has just been released. Check out new version: ${{ github.ref_name }}"
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
diff --git a/api/openapi/api.yaml b/api/openapi/api.yaml
@@ -747,8 +747,39 @@ paths:
           description: Check schematic log. 
       tags:
         - Schema Operation
-
-
+  /schemas/is_node_required:
+    get:
+      summary: Check if a node is required or not
+      description: Check if a node is required or not
+      operationId: api.routes.get_if_node_required
+      parameters:
+        - in: query
+          name: schema_url
+          schema:
+            type: string
+          description: Data Model URL
+          example: >-
+            https://raw.githubusercontent.com/Sage-Bionetworks/schematic/develop/tests/data/example.model.jsonld
+          required: true
+        - in: query
+          name: node_display_name
+          schema:
+            type: string
+            nullable: false
+          description: Display label of a node
+          example: FamilyHistory
+          required: true
+      responses:
+        "200":
+          description: return a boolean 
+        "500":
+          description: Check schematic log. 
+      tags:
+        - Schema Operation
+
+
+
+
   /explorer/get_node_dependencies:
     get:
       summary: Get the immediate dependencies that are related to a given source node

diff --git a/api/routes.py b/api/routes.py
@@ -673,3 +673,21 @@ def get_node_range(
     gen = SchemaGenerator(path_to_json_ld=schema_url)
     node_range = gen.get_node_range(node_label, return_display_names)
     return node_range
+
+def get_if_node_required(schema_url: str, node_display_name: str) -> bool:
+    """Check if the node is required
+
+    Args:
+        schema_url (str): Data Model URL
+        node_display_name (str): display name
+
+    Returns:
+        True: If the given node is a "required" node.
+        False: If the given node is not a "required" (i.e., an "optional") node.
+    """
+    gen = SchemaGenerator(path_to_json_ld=schema_url)
+    is_required = gen.is_node_required(node_display_name)
+
+    return is_required
+
+
diff --git a/schematic/models/GE_Helpers.py b/schematic/models/GE_Helpers.py
@@ -23,6 +23,7 @@
 from great_expectations.data_context.types.resource_identifiers import ExpectationSuiteIdentifier
 
 from schematic.models.validate_attribute import GenerateError
+from schematic.schemas.generator import SchemaGenerator
 from schematic.utils.validate_utils import rule_in_rule_list
 
 logger = logging.getLogger(__name__)
@@ -159,7 +160,7 @@ def build_expectation_suite(self,):
             if validation_rules:
                 #iterate through all validation rules for an attribute
                 for rule in validation_rules:
-
+                    base_rule = rule.split(" ")[0]
 
                     #check if rule has an implemented expectation
                     if rule_in_rule_list(rule,self.unimplemented_expectations):
@@ -169,8 +170,9 @@ def build_expectation_suite(self,):
                     args["column"] = col
                     args["result_format"] = "COMPLETE"
 
+
                     #Validate num
-                    if rule=='num':
+                    if base_rule=='num':
                         args["mostly"]=1.0
                         args["type_list"]=['int','int64', 'float', 'float64']
                         meta={
@@ -182,7 +184,7 @@ def build_expectation_suite(self,):
                         }
 
                     #Validate float
-                    elif rule=='float':
+                    elif base_rule=='float':
                         args["mostly"]=1.0
                         args["type_list"]=['float', 'float64']
                         meta={
@@ -194,7 +196,7 @@ def build_expectation_suite(self,):
                         }
 
                     #Validate int
-                    elif rule=='int':
+                    elif base_rule=='int':
                         args["mostly"]=1.0
                         args["type_list"]=['int','int64']
                         meta={
@@ -206,7 +208,7 @@ def build_expectation_suite(self,):
                         }
 
                     #Validate string
-                    elif rule=='str':
+                    elif base_rule=='str':
                         args["mostly"]=1.0
                         args["type_"]='str'
                         meta={
@@ -217,7 +219,7 @@ def build_expectation_suite(self,):
                             "validation_rule": rule
                         }
 
-                    elif rule.startswith("recommended"):
+                    elif base_rule==("recommended"):
                         args["mostly"]=0.0000000001
                         args["regex_list"]=['^$']
                         meta={
@@ -228,7 +230,7 @@ def build_expectation_suite(self,):
                             "validation_rule": rule
                         }
 
-                    elif rule.startswith("protectAges"):
+                    elif base_rule==("protectAges"):
                         #Function to convert to different age limit formats
                         min_age, max_age = self.get_age_limits()
 
@@ -243,7 +245,7 @@ def build_expectation_suite(self,):
                             "validation_rule": rule
                         }
 
-                    elif rule.startswith("unique"):
+                    elif base_rule==("unique"):
                         args["mostly"]=1.0
                         meta={
                             "notes": {
@@ -253,7 +255,7 @@ def build_expectation_suite(self,):
                             "validation_rule": rule
                         }
 
-                    elif rule.startswith("inRange"):
+                    elif base_rule==("inRange"):
                         args["mostly"]=1.0
                         args["min_value"]=float(rule.split(" ")[1])
                         args["max_value"]=float(rule.split(" ")[2])
@@ -350,7 +352,8 @@ def generate_errors(
         validation_results: Dict,
         validation_types: Dict,
         errors: List,
-        warnings: List
+        warnings: List,
+        sg: SchemaGenerator,
         ):
         """
             Purpose:
@@ -407,45 +410,50 @@ def generate_errors(
                 #call functions to generate error messages and add to error list
                 if validation_types[rule.split(" ")[0]]['type']=='type_validation':
                     for row, value in zip(indices,values):
-                        errors.append(
-                            GenerateError.generate_type_error(
+                        vr_errors, vr_warnings = GenerateError.generate_type_error(
                                 val_rule = rule,
                                 row_num = row+2,
                                 attribute_name = errColumn,
                                 invalid_entry = value,
+                                sg = sg,
                             )
-                        )          
+                        if vr_errors:
+                            errors.append(vr_errors)  
+                        if vr_warnings:
+                            warnings.append(vr_warnings) 
                 elif validation_types[rule.split(" ")[0]]['type']=='regex_validation':
                     expression=result_dict['expectation_config']['kwargs']['regex']
-
                     for row, value in zip(indices,values):   
-                        errors.append(
-                            GenerateError.generate_regex_error(
+                        vr_errors, vr_warnings = GenerateError.generate_regex_error(
                                 val_rule= rule,
                                 reg_expression = expression,
                                 row_num = row+2,
                                 module_to_call = 'match',
                                 attribute_name = errColumn,
                                 invalid_entry = value,
+                                sg = sg,
                             )
-                        )    
+                        if vr_errors:
+                            errors.append(vr_errors)  
+                        if vr_warnings:
+                            warnings.append(vr_warnings)                          
                 elif validation_types[rule.split(" ")[0]]['type']=='content_validation':     
-                    content_errors, content_warnings = GenerateError.generate_content_error(
+                    vr_errors, vr_warnings = GenerateError.generate_content_error(
                                                             val_rule = rule, 
                                                             attribute_name = errColumn,
                                                             row_num = list(np.array(indices)+2),
                                                             error_val = values,  
                                                             sg = self.sg
                                                         )       
-                    if content_errors:
-                        errors.append(content_errors)  
+                    if vr_errors:
+                        errors.append(vr_errors)  
                         if rule.startswith('protectAges'):
-                            self.censor_ages(content_errors,errColumn)
+                            self.censor_ages(vr_errors,errColumn)
                             pass
-                    elif content_warnings:
-                        warnings.append(content_warnings)  
+                    if vr_warnings:
+                        warnings.append(vr_warnings)  
                         if rule.startswith('protectAges'):
-                            self.censor_ages(content_warnings,errColumn)
+                            self.censor_ages(vr_warnings,errColumn)
                             pass
 
         return errors, warnings