Skip to content

Commit

Permalink
plugins/semgrep: add csmock semgrep plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
rhyw committed Mar 21, 2024
1 parent 87d9761 commit 1215d78
Showing 1 changed file with 215 additions and 0 deletions.
215 changes: 215 additions & 0 deletions py/plugins/semgrep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# Copyright (C) 2024 Red Hat, Inc.
#
# This file is part of csmock.
#
# csmock is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# csmock is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csmock. If not, see <http://www.gnu.org/licenses/>.

import os

# disable metrics to be sent to semgrep cloud
DEFAULT_SEMGREP_SEND_METRICS = "off"

SEMGREP_CLI_VERSION = "1.56.0"

SEMGREP_SCAN_DIR = "/builddir/build/BUILD"

SEMGREP_SCAN_OUTPUT = "/builddir/semgrep-scan-results.sarif"

SEMGREP_SCAN_CHROOT_ROOT_PATH = "/builddir/semgrep-chroot-root"

SEMGREP_SCAN_LOG = "/builddir/semgrep-scan.log"


class PluginProps:
def __init__(self):
self.description = (
"A fast, open-source, static analysis engine for finding bugs, "
"detecting dependency vulnerabilities, and enforcing code standards."
)

self.stable = False


class Plugin:
def __init__(self):
self.enabled = False

def get_props(self):
return PluginProps()

def enable(self):
self.enabled = True

def init_parser(self, parser):
"""
Initialize the argument parser for the Semgrep plugin.
"""
parser.add_argument(
"--semgrep-metrics",
default=DEFAULT_SEMGREP_SEND_METRICS,
help="configures whether usage metrics are sent to the Semgrep server, default to ({DEFAULT_SEMGREP_SEND_METRICS})",
)

parser.add_argument(
"--semgrep-rules-repo",
help="semgrep rules repo, assuming rules are located under the 'rules' sub-directory",
)

parser.add_argument(
"--semgrep-verbose",
action="store_true",
help="show more details about what rules are running, which files failed to parse, etc",
)

parser.add_argument(
"--semgrep-scan-opts",
help="space-separated list of additional options passed to the 'semgrep scan' command",
)

def handle_args(self, parser, args, props):
if not self.enabled:
return

if not args.semgrep_rules_repo:
parser.error("'--semgrep-rules-repo' is required to run semgrep scan")
return 1

# download semgrep rules
def fetch_semgrep_rules_hook(results, props):
# target dir where semgrep cli and its dependencies are installed
semgrep_lib_dir = os.path.join(results.tmpdir, "semgrep_lib")
try:
# make sure the lib directory exists
os.makedirs(semgrep_lib_dir, mode=0o755, exist_ok=True)
except OSError:
results.error("failed to create semgrep lib directory")
return 1

# install semgrep cli using pip
cmd = f"python3 -m pip install --target={semgrep_lib_dir} semgrep=={SEMGREP_CLI_VERSION}"
ec = results.exec_cmd(cmd, shell=True)
if 0 != ec:
results.error("failed to install semgrep cli using pip")

semgrep_prefix = f"env PATH={semgrep_lib_dir}/bin:$PATH, PYTHONPATH={semgrep_lib_dir}"

semgrep_rules_repo_dir = os.path.join(results.tmpdir, "semgrep_rules")
repo_clone_cmd = [
"git", "clone", "--depth", "1",
args.semgrep_rules_repo,
semgrep_rules_repo_dir
]
ec = results.exec_cmd(repo_clone_cmd)
if 0 != ec:
results.error("failed to download semgrep rules")
return ec
# query version of semgrep
cmd = semgrep_prefix + " semgrep --version"
ec, output = results.get_cmd_output(cmd)
if 0 != ec:
results.error("failed to query semgrep cli version", ec=ec)
return ec

# parse and record the version of semgrep cli
version = output.rstrip("\n")
results.ini_writer.append("analyzer-version-semgrep-cli", version)

# get the results out of the chroot
props.copy_out_files += [
SEMGREP_SCAN_OUTPUT,
SEMGREP_SCAN_LOG,
SEMGREP_SCAN_CHROOT_ROOT_PATH,
]
return 0

props.pre_mock_hooks += [fetch_semgrep_rules_hook]

def scan_hook(results, mock, props):
semgrep_lib_dir = os.path.join(results.tmpdir, "semgrep_lib")
semgrep_prefix = f"env PATH={semgrep_lib_dir}/bin:$PATH PYTHONPATH={semgrep_lib_dir}"
# assuming semgrep rules are located under the 'rules' directory
semgrep_rules_dir = os.path.join(results.tmpdir, "semgrep_rules/rules")
# get the chroot root path
ec, output = results.get_cmd_output(mock.get_mock_cmd(["--print-root-path"]), shell=False)
if ec != 0:
results.error("semgrep: failed to get chroot root path", ec=ec)
chroot_root_path = output.rstrip("/\n")
# write the chroot root path to the SEMGREP_SCAN_CHROOT_ROOT_PATH
with open(f"{chroot_root_path}{SEMGREP_SCAN_CHROOT_ROOT_PATH}", "w", encoding="utf-8") as f:
f.write(chroot_root_path)

# command to run semgrep scan
semgrep_scan_cmd = semgrep_prefix + (
f" semgrep scan --metrics={args.semgrep_metrics} --sarif"
f" --config={semgrep_rules_dir}"
)
if args.semgrep_verbose:
semgrep_scan_cmd += " --verbose"

# append additional options passed to the 'semgrep scan' command
if args.semgrep_scan_opts:
semgrep_scan_cmd += f" {args.semgrep_scan_opts}"

# eventually append the target directory to be scanned
semgrep_scan_cmd += (
f" --output={chroot_root_path}{SEMGREP_SCAN_OUTPUT} {chroot_root_path}{SEMGREP_SCAN_DIR}"
f" 2>{chroot_root_path}{SEMGREP_SCAN_LOG}"
)
# run semgrep scan
ec = results.exec_cmd(semgrep_scan_cmd, shell=True)

# according to semgrep cli scan doc, below are all possible return codes
if ec == 123:
results.error("semgrep: Indiscriminate errors reported on standard error.")
elif ec == 124:
results.error("semgrep: Command line parsing errors.")
elif ec == 125:
results.error("semgrep: Unexpected internal errors (bugs).")

return 0

# run semgrep scan after successful build
props.post_install_hooks += [scan_hook]

# convert the results into the csdiff's JSON format
def filter_hook(results):
src = results.dbgdir_raw + SEMGREP_SCAN_OUTPUT
if not os.path.exists(src):
return 0
dst = f"{results.dbgdir_uni}/semgrep-scan-results.json"

# read from SEMGREP_SCAN_CHROOT_ROOT_PATH to get the chroot root path
chroot_root_path = ""
with open(f"{results.dbgdir_raw}{SEMGREP_SCAN_CHROOT_ROOT_PATH}", "r", encoding="utf-8") as f:
chroot_root_path = f.read().rstrip("\n")

# remove the `SEMGREP_SCAN_CHROOT_ROOT_PATH` file
os.remove(f"{results.dbgdir_raw}{SEMGREP_SCAN_CHROOT_ROOT_PATH}")

tmp_dir_basename = results.tmpdir.split("/")[-1]
semgrep_rules_path_prefix = f"{tmp_dir_basename}/semgrep_rules/"
# semgrep report has dot-separated rules path
tmp_path = semgrep_rules_path_prefix.lstrip("/").replace("/", r"\.")
# strip suspicious path prefix from the semgrep rules directory
# depending on where the semgrep scan process is run, the raw report may or may not contain "/tmp"
# in its rules path. The following sed command strips suspicious path prefixes by removing
# any sequence of non left-square-bracket characters preceding '{tmp_path}'
cmd = (
f"csgrep {src} --mode=json --strip-path-prefix {chroot_root_path}{SEMGREP_SCAN_DIR}/"
f" | sed 's|[^\[]*{tmp_path}||' > {dst}"
)

return results.exec_cmd(cmd, shell=True)

props.post_process_hooks += [filter_hook]

0 comments on commit 1215d78

Please sign in to comment.