1.4 - State tracking for Phylum incomplete package data, fixed yarn p…

…arsing, cleanup (#5) * Adding state tracking for incomplete packages * fix if clause * fix input variables * fix input variables * fix path resolve() * enable tmate * fix paths; disable tmate * fix output declaration * update success and complete_succcess files with CORRECT files * updated testing files * enable tmate * update testing files with old and new reqs approach * fix string issue in .replace() for incompletes * disable tmate * Refactor support for yarn lockfile parsing Added parse_yarn module to support identification and parsing of yarn v1 and v2 lockfiles returning a list of tuples (pkg,ver) * remove IPython import * break out functions for lockfile submission and changes submission * fix return stmt to parse_yarn module * fix error message when looking for PREVIOUS_INCOMPLETE env var * add debug for parse_yarn * enable tmate * update to fix single package upgrade bug * disable tmate * clean up * update comment message to fix #5 (comment) * re-enable exit condition when environment variables cannot be identified: #5 (comment) * update comment text to generalize references to requirements.txt
phylum-dev · Mar 7, 2022 · 8efbc88 · 8efbc88
1 parent b37490e
commit 8efbc88
Show file tree

Hide file tree

Showing 10 changed files with 2,205 additions and 75 deletions.
diff --git a/action.yml b/action.yml
@@ -29,6 +29,14 @@ inputs:
     description: "Phylum version"
     required: false
     default: '0'
+  incomplete_package_strategy:
+    description: "Method for resolving incomplete packages"
+    required: false
+    default: "pass_with_comment"
+  invoke_test_matrix:
+    description: "Only used for testing"
+    required: false
+    default: false
 
 
 runs:
@@ -40,6 +48,20 @@ runs:
         phylum_token: ${{ inputs.phylum_token }}
         phylum_version: ${{ inputs.phylum_version }}
 
+    - name: Check for previous comment
+      uses: peter-evans/find-comment@v1
+      id: fc
+      with:
+        issue-number: ${{ github.event.pull_request.number }}
+        body-includes: INCOMPLETE
+
+    - name: Store result of id=fc in environment
+      shell: bash
+      if: "contains(steps.fc.outputs.comment-body, 'Phylum')"
+      run: |
+        echo "storing PREVIOUS_INCOMPLETE"
+        echo PREVIOUS_INCOMPLETE=1 >> $GITHUB_ENV
+
     - name: Check for existing project
       shell: bash
       run: |
@@ -100,11 +122,23 @@ runs:
         popd
 
 
+    # - name: tmate
+      # uses: mxschmitt/action-tmate@v3
+
+    - name: invoke test matrix
+      shell: bash
+      if: "contains(inputs.invoke_test_matrix, 'true')"
+      run: |
+        python $GITHUB_ACTION_PATH/test_matrix.py
+
     - name: python script analyze.py
       shell: bash
       if: "!contains(steps.get-prtype.outputs.prtype, 'NA')"
       run: python $GITHUB_ACTION_PATH/analyze.py "analyze" $GITHUB_REPOSITORY ${{ github.event.number }} ${{ inputs.vul_threshold }} ${{ inputs.mal_threshold }} ${{ inputs.eng_threshold }} ${{ inputs.lic_threshold }} ${{ inputs.aut_threshold }}
 
+    # - name: tmate
+      # uses: mxschmitt/action-tmate@v3
+
     - id: get-returncode
       shell: bash
       run: |
@@ -114,13 +148,7 @@ runs:
         ret="${ret//$'\r'/'%0A'}"
         echo "::set-output name=ret::$ret"
 
-    - name: return 5 for incomplete packages
-      shell: bash
-      if: "contains(steps.get-returncode.outputs.ret, '5')"
-      run: |
-        echo 'exiting with 5 for incomplete packages'
-        exit 5
-
+    # This will catch SUCCESS cases
     - name: return 0 for success
       shell: bash
       if: "contains(steps.get-returncode.outputs.ret, '0')"
@@ -129,7 +157,9 @@ runs:
         exit 0
 
     - id: get-comment-body
-      if: "contains(steps.get-returncode.outputs.ret, '1')"
+      # this will have to check for 1 or 5 AND if on the second run
+      # if: "contains(steps.get-returncode.outputs.ret, '1')"
+      if: "steps.get-returncode.outputs.ret > 0"
       shell: bash
       run: |
         body="$(cat ~/pr_comment.txt)"
@@ -139,12 +169,24 @@ runs:
         echo "::set-output name=body::$body"
 
     - name: Set comment
-      if: "contains(steps.get-returncode.outputs.ret, '1')"
+      # This will have to check for 1 or 5
+      # Could check for > 0 ?
+      #if: "contains(steps.get-returncode.outputs.ret, '1')"
+      if: "steps.get-returncode.outputs.ret > 0"
       uses: peter-evans/create-or-update-comment@v1
       with:
         issue-number: ${{ github.event.pull_request.number }}
         body: ${{ steps.get-comment-body.outputs.body }}
 
+    # This will catch INCOMPLETE and COMPLETE_SUCCESS
+    - name: handle ret values of 4 or 5
+      shell: bash
+      if: "steps.get-returncode.outputs.ret >= 4"
+      run: |
+        echo 'exiting with 0 for success - ret = ${{ steps.get-returncode.outputs.ret }}'
+        exit 0
+
+    # This will catch FAILURE and COMPLETE_FAILURE
     - name: return 1 for risk analysis failure
       shell: bash
       if: "contains(steps.get-returncode.outputs.ret, '1')"

diff --git a/analyze.py b/analyze.py
@@ -6,12 +6,7 @@
 from unidiff import PatchSet
 import pathlib
 from subprocess import run
-
-# TODO:
-# [DONE]    1. Clearly document which environment variables are used
-# [DONE]    2. Don't assume PRs are going into master branch, need to get the target
-# [DONE]        3. Add Gmefile support
-# [DONE]        4. Document file paths
+import parse_yarn
 
 ENV_KEYS = [
     "GITHUB_SHA", # for get_PR_diff; this is the SHA of the commit for the branch being merged
@@ -26,6 +21,35 @@
     "pr_comment": "/home/runner/pr_comment.txt",
 }
 
+'''
+    States on returncode
+    0 = No comment
+    1 = FAILED_COMMENT
+    5 = INCOMPLETE_COMMENT then:
+        4 = COMPLETE_SUCCESS_COMMENT
+        1 = COMPLETE_FAILED_COMMENT
+'''
+
+# Headers for distinct comment types
+DETAILS_DROPDOWN = "<details>\n<summary>Background</summary>\n<br />\nThis repository uses a GitHub Action to automatically analyze the risk of new dependencies added via Pull Request. An administrator of this repository has set score requirements for Phylum's five risk domains.<br /><br />\nIf you see this comment, one or more dependencies added to the package manager lockfile in this Pull Request have failed Phylum's risk analysis.\n</details>\n\n"
+
+INCOMPLETE_COMMENT = "## Phylum OSS Supply Chain Risk Analysis - INCOMPLETE\n\n"
+INCOMPLETE_COMMENT += "This pull request contains TKTK package versions Phylum has not yet processed, preventing a complete risk analysis. Phylum is processing these packages currently and should complete within 30 minutes. Please wait for at least 30 minutes, then re-run the GitHub Check pertaining to `phylum-analyze-pr-action`.\n\n"
+INCOMPLETE_COMMENT += DETAILS_DROPDOWN
+
+COMPLETE_FAILED_COMMENT = "## Phylum OSS Supply Chain Risk Analysis - COMPLETE\n\n"
+COMPLETE_FAILED_COMMENT += "The Phylum risk analysis is now complete.\n\n"
+COMPLETE_FAILED_COMMENT += DETAILS_DROPDOWN
+
+COMPLETE_SUCCESS_COMMENT = "## Phylum OSS Supply Chain Risk Analysis - COMPLETE\n\n"
+COMPLETE_SUCCESS_COMMENT += "The Phylum risk analysis is now complete and did not identify any issues for this PR.\n\n"
+COMPLETE_SUCCESS_COMMENT += DETAILS_DROPDOWN
+
+FAILED_COMMENT = "## Phylum OSS Supply Chain Risk Analysis\n\n"
+FAILED_COMMENT +=DETAILS_DROPDOWN
+
+
+
 class AnalyzePRForReqs():
     def __init__(self, repo, pr_num, vul, mal, eng, lic, aut):
         self.repo = repo
@@ -38,17 +62,21 @@ def __init__(self, repo, pr_num, vul, mal, eng, lic, aut):
         self.gbl_failed = False
         self.gbl_incomplete = False
         self.incomplete_pkgs = list()
+        self.previous_incomplete = False
         self.env = dict()
         self.get_env_vars()
 
+
     def get_env_vars(self):
         for key in ENV_KEYS:
             temp = os.environ.get(key)
             if temp is not None:
                 self.env[key] = temp
             else:
-                print(f"[ERROR] could not get value for os.environ.get({key})")
+                print(f"[ERROR] could not get value for required env variable os.environ.get({key})")
                 sys.exit(11)
+        if os.environ.get("PREVIOUS_INCOMPLETE"):
+            self.previous_incomplete = True
         return
 
     def new_get_PR_diff(self):
@@ -161,26 +189,15 @@ def parse_package_lock(self, changes):
                     ver = version_match.groups()[0]
                     pkg_ver.append((name,ver))
             cur +=1
+
+        print(f"[DEBUG]: pkg_ver length: {len(pkg_ver)}")
         return pkg_ver
 
     ''' Parse yarn.lock diff to generate a list of tuples of (package_name, version) '''
-    def parse_yarn_lock(self, changes):
-        cur = 0
-        name_pat        = re.compile(r"[\"]?(@?.*?)(?=@)")
-        version_pat     = re.compile(r".*version \"(.*?)\"")
-        resolved_pat    = re.compile(r".*resolved \"(.*?)\"")
-        integrity_pat   = re.compile(r".*integrity.*")
-        pkg_ver = list()
 
-        while cur < len(changes)-3:
-            if name_match := re.match(name_pat, changes[cur]):
-                if version_match := re.match(version_pat, changes[cur+1]):
-                    if resolved_match := re.match(resolved_pat, changes[cur+2]):
-                        if integrity_match := re.match(integrity_pat, changes[cur+3]):
-                            name = name_match.groups()[0]
-                            ver = version_match.groups()[0]
-                            pkg_ver.append((name,ver))
-            cur += 1
+    def parse_yarn_lock(self, changes):
+        pkg_ver = parse_yarn.parse_yarn_lock_changes(changes)
+        print(f"[DEBUG]: pkg_ver length: {len(pkg_ver)}")
         return pkg_ver
 
     def parse_gemfile_lock(self, changes):
@@ -194,6 +211,8 @@ def parse_gemfile_lock(self, changes):
                 ver = name_ver_match.groups()[1]
                 pkg_ver.append((name,ver))
             cur += 1
+
+        print(f"[DEBUG]: pkg_ver length: {len(pkg_ver)}")
         return pkg_ver
 
     def parse_requirements_txt(self, changes):
@@ -207,6 +226,8 @@ def parse_requirements_txt(self, changes):
                 ver = name_ver_match.groups()[1]
                 pkg_ver.append((name,ver))
             cur += 1
+
+        print(f"[DEBUG]: pkg_ver length: {len(pkg_ver)}")
         return pkg_ver
 
 
@@ -226,24 +247,7 @@ def generate_pkgver(self, changes, pr_type):
             pkg_ver_tup = self.parse_gemfile_lock(changes)
             return pkg_ver_tup
 
-        #  no_version = 0
-        #  pkg_ver = dict()
-        #  pkg_ver_tup = list()
-
-        #  for line in changes:
-            #  if line == '\n':
-                #  continue
-            #  if match := re.match(pat, line):
-                #  pkg,ver = match.groups()
-                #  pkg_ver[pkg] = ver
-                #  pkg_ver_tup.append((pkg,ver))
-            #  else:
-                #  no_version += 1
-
-        #  if no_version > 0:
-            #  print(f"[ERROR] Found entries that do not specify version, preventing analysis. Exiting")
-            #  sys.exit(11)
-
+        # shouldn't get here
         return pkg_ver_tup
 
     ''' Read phylum_analysis.json file '''
@@ -330,17 +334,10 @@ def check_risk_scores(self, package_json):
         else:
             return None
 
-    #TODO: generalize this
     def build_issues_list(self, package_json, issue_flags: list):
         issues = list()
         pkg_issues = package_json.get("issues")
-        # pkg_vulns = package_json.get("vulnerabilities")
 
-        #  if 'vul' in issue_flags:
-            #  for vuln in pkg_vulns:
-                #  risk_level = vuln.get("risk_level")
-                #  title = vuln.get("title")
-                #  issues.append(('VUL', risk_level,title))
 
         for flag in issue_flags:
             for pkg_issue in pkg_issues:
@@ -373,37 +370,45 @@ def run_analyze(self):
         pr_type = self.determine_pr_type(diff_data)
         changes = self.get_diff_hunks(diff_data, pr_type)
         pkg_ver = self.generate_pkgver(changes, pr_type)
-        # phylum_json = self.read_phylum_analysis('/home/runner/phylum_analysis.json')
         phylum_json = self.read_phylum_analysis(FILE_PATHS.get("phylum_analysis"))
         risk_data = self.parse_risk_data(phylum_json, pkg_ver)
         project_url = self.get_project_url(phylum_json)
         returncode = 0
 
-        # Write pr_comment.txt only if the analysis failed (self.gbl_result == 1)
-        if self.gbl_failed:
-            returncode += 1
-
-            header = "## Phylum OSS Supply Chain Risk Analysis\n\n"
-            header += "<details>\n<summary>Background</summary>\n<br />\nThis repository uses a GitHub Action to automatically analyze the risk of new dependencies added to requirements.txt via Pull Request. An administrator of this repository has set score requirements for Phylum's five risk domains.<br /><br />\nIf you see this comment, one or more dependencies added to the requirements.txt file in this Pull Request have failed Phylum's risk analysis.\n</details>\n\n"
-
-            # with open('/home/runner/pr_comment.txt','w') as outfile:
-            with open(FILE_PATHS.get("pr_comment"),'w') as outfile:
-                outfile.write(header)
-                for line in risk_data:
-                    if line:
-                        outfile.write(line)
-                outfile.write(f"\n[View this project in Phylum UI]({project_url})")
-                print(f"[DEBUG] pr_comment.txt: wrote {outfile.tell()} bytes")
+        output = ""
+        # Write pr_comment.txt only if the analysis failed and all pkgvers are completed(self.gbl_result == 1)
+        if self.gbl_failed == True and self.gbl_incomplete == False:
+            returncode = 1
+            # if this is a repeated test of previously incomplete packages, set the comment based on states of failed, not incomplete and previous
+            if self.previous_incomplete == True:
+                output = COMPLETE_FAILED_COMMENT
+            else:
+                output = FAILED_COMMENT
+
+            # write data from risk analysis
+            for line in risk_data:
+                if line:
+                    output += line
+
         # If any packages are incomplete, add 5 to the returncode so we know the results are incomplete
         if self.gbl_incomplete == True:
+            returncode = 5
             print(f"[DEBUG] {len(self.incomplete_pkgs)} packages were incomplete as of the analysis job")
-            returncode += 5
+            output = INCOMPLETE_COMMENT.replace("TKTK",str(len(self.incomplete_pkgs)))
+
+        if self.gbl_failed == False and self.gbl_incomplete == False and self.previous_incomplete == True:
+            returncode = 4
+            print(f"[DEBUG] failed=False incomplete=False previous_incomplete=True")
+            output = COMPLETE_SUCCESS_COMMENT
 
-        # with open('/home/runner/returncode.txt','w') as resultout:
         with open(FILE_PATHS.get("returncode"),'w') as resultout:
             resultout.write(str(returncode))
             print(f"[DEBUG] returncode: wrote {str(returncode)}")
 
+        with open(FILE_PATHS.get("pr_comment"),'w') as outfile:
+            outfile.write(output)
+            outfile.write(f"\n[View this project in Phylum UI]({project_url})")
+            print(f"[DEBUG] pr_comment.txt: wrote {outfile.tell()} bytes")
 
 if __name__ == "__main__":
     argv = sys.argv