From 5317fd98af3c2084a1d49c6fcf40a20b68b89129 Mon Sep 17 00:00:00 2001 From: Charles Coggins Date: Thu, 2 Jan 2025 10:45:52 -0600 Subject: [PATCH] feat: ensure git repository access as prerequisite (#518) This change adds a new base prerequisite such that the user account executing `git` has access to the repository. This is accomplished with a new function, `ensure_git_repo_access`, that checks for the two likely reasons to fail a git repo membership test: 1. Not actually in git repo, in which case nothing can be done. 2. The repository is owned by a different user and we don't have access to it. This is detected and remedied with a configuration change. References: https://confluence.atlassian.com/pages/viewpage.action?pageId=1167744132 https://confluence.atlassian.com/pages/viewpage.action?pageId=1384121844 https://git-scm.com/docs/git-config/2.35.2#Documentation/git-config.txt-safedirectory The second reason is encountered when the user account used to mount the container created from the `phylum-ci` image does not match the one owning the git repository. This happens for some CI environments and was encountered while attempting to add a non-root user to the `phylum-ci` image (#519). That effort was abandoned, but the feature here is still worth adding now since it adds functionality for operating in more environments. Additional changes made include: * Clarify command used to build from the `Dockerfile.slim` file * Update `docker_tests.sh` script to include the most basic test command * Fix typo: `git_curent_branch_name` --> `git_current_branch_name` * Add `is_in_git_repo` predicate as helper function in `git` module * Add more tests for the `git` module * Add type hints to `tests/unit/test_git.py` --- Dockerfile.slim | 2 +- scripts/docker_tests.sh | 2 + src/phylum/ci/ci_base.py | 7 +-- src/phylum/ci/ci_none.py | 4 +- src/phylum/ci/ci_precommit.py | 4 +- src/phylum/ci/git.py | 84 ++++++++++++++++++++++++++++++++++- tests/unit/test_git.py | 54 +++++++++++++++++----- 7 files changed, 137 insertions(+), 20 deletions(-) diff --git a/Dockerfile.slim b/Dockerfile.slim index 7382805e..e8dd21fc 100644 --- a/Dockerfile.slim +++ b/Dockerfile.slim @@ -73,7 +73,7 @@ # # Images built from this Dockerfile can be tested for basic functionality: # -# $ docker build --tag phylum-ci . +# $ docker build --tag phylum-ci --file Dockerfile.slim . # $ scripts/docker_tests.sh --image phylum-ci --slim ########################################################################################## diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh index ba1ad55a..dbf75cc9 100755 --- a/scripts/docker_tests.sh +++ b/scripts/docker_tests.sh @@ -117,9 +117,11 @@ echo " [+] Running with UID:GID of: ${USER}:${GROUP}" if [ -z "${SLIM:-}" ]; then echo " [+] \`--slim\` option not specified. Running all tests ..." + docker run --rm --entrypoint entrypoint.sh "${IMAGE}" docker run --rm --entrypoint entrypoint.sh --user "${USER}:${GROUP}" "${IMAGE}" "${SLIM_COMMANDS}" docker run --rm --entrypoint entrypoint.sh --user "${USER}:${GROUP}" "${IMAGE}" "${MANIFEST_COMMANDS}" else echo " [+] \`--slim\` option specified. Skipping the lockfile generation tests ..." + docker run --rm --entrypoint entrypoint.sh "${IMAGE}" docker run --rm --entrypoint entrypoint.sh --user "${USER}:${GROUP}" "${IMAGE}" "${SLIM_COMMANDS}" fi diff --git a/src/phylum/ci/ci_base.py b/src/phylum/ci/ci_base.py index 6bf34d30..69fdfb48 100644 --- a/src/phylum/ci/ci_base.py +++ b/src/phylum/ci/ci_base.py @@ -36,7 +36,7 @@ ReturnCode, ) from phylum.ci.depfile import Depfile, Depfiles, DepfileType, parse_depfile -from phylum.ci.git import git_hash_object, git_repo_name, git_root_dir, git_worktree +from phylum.ci.git import ensure_git_repo_access, git_hash_object, git_repo_name, git_root_dir, git_worktree from phylum.console import console from phylum.constants import ENVVAR_NAME_TOKEN, MIN_CLI_VER_INSTALLED from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error @@ -647,6 +647,7 @@ def _check_prerequisites(self) -> None: * A Phylum CLI version at least as new as the minimum supported version * Have `git` installed and available for use on the PATH * Operating within the context of a git repository + * User account executing `git` has access to the repository """ LOG.info("Confirming prerequisites ...") @@ -661,8 +662,8 @@ def _check_prerequisites(self) -> None: raise SystemExit(msg) LOG.debug("`git` binary found on the PATH") - # Referencing this property is enough to ensure the prerequisite - LOG.debug("Git repository root found: %s", self._git_root_dir) + ensure_git_repo_access() + LOG.debug("Git repository root: %s", self._git_root_dir) def _cmd_extender( self, diff --git a/src/phylum/ci/ci_none.py b/src/phylum/ci/ci_none.py index cf63302c..2730b404 100644 --- a/src/phylum/ci/ci_none.py +++ b/src/phylum/ci/ci_none.py @@ -12,7 +12,7 @@ from typing import Optional from phylum.ci.ci_base import CIBase -from phylum.ci.git import git_curent_branch_name, git_remote, git_set_remote_head +from phylum.ci.git import git_current_branch_name, git_remote, git_set_remote_head from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error from phylum.logger import LOG @@ -35,7 +35,7 @@ def _check_prerequisites(self) -> None: @cached_property def phylum_label(self) -> str: """Get a custom label for use when submitting jobs for analysis.""" - current_branch = git_curent_branch_name() + current_branch = git_current_branch_name() label = f"{self.ci_platform_name}_{current_branch}_{self.depfile_hash_object}" label = re.sub(r"\s+", "-", label) return label diff --git a/src/phylum/ci/ci_precommit.py b/src/phylum/ci/ci_precommit.py index 0b9a3fd4..138fe280 100644 --- a/src/phylum/ci/ci_precommit.py +++ b/src/phylum/ci/ci_precommit.py @@ -18,7 +18,7 @@ from typing import Optional from phylum.ci.ci_base import CIBase -from phylum.ci.git import git_curent_branch_name +from phylum.ci.git import git_current_branch_name from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error from phylum.logger import LOG, MARKUP @@ -106,7 +106,7 @@ def _check_prerequisites(self) -> None: @cached_property def phylum_label(self) -> str: """Get a custom label for use when submitting jobs for analysis.""" - current_branch = git_curent_branch_name() + current_branch = git_current_branch_name() label = f"{self.ci_platform_name}_{current_branch}_{self.depfile_hash_object}" label = re.sub(r"\s+", "-", label) return label diff --git a/src/phylum/ci/git.py b/src/phylum/ci/git.py index 5794bad8..9e203e46 100644 --- a/src/phylum/ci/git.py +++ b/src/phylum/ci/git.py @@ -10,7 +10,7 @@ from typing import Optional from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error -from phylum.logger import LOG +from phylum.logger import LOG, MARKUP def git_base_cmd(git_c_path: Optional[Path] = None) -> list[str]: @@ -24,6 +24,86 @@ def git_base_cmd(git_c_path: Optional[Path] = None) -> list[str]: return ["git", "-C", str(git_c_path.resolve())] +def is_in_git_repo(git_c_path: Optional[Path] = None) -> bool: + """Predicate for determining if operating within the context of an accessible git repository. + + The optional `git_c_path` is used to tell `git` to run as if it were started in that + path instead of the current working directory, which is the default when not provided. + """ + base_cmd = git_base_cmd(git_c_path=git_c_path) + cmd = [*base_cmd, "rev-parse", "--show-toplevel"] + + # We want the return code here and don't want to raise when non-zero. + return not bool(subprocess.run(cmd, check=False, capture_output=True).returncode) # noqa: S603 + + +def ensure_git_repo_access(git_c_path: Optional[Path] = None) -> None: + """Ensure user account executing `git` has access to the repository. + + The optional `git_c_path` is used to tell `git` to run as if it were started in that + path instead of the current working directory, which is the default when not provided. + """ + if is_in_git_repo(git_c_path=git_c_path): + LOG.debug("Operating within git repository, with proper access") + return + + # There are two likely reasons to fail the git repo membership test: + # + # 1. Not actually in a git repo, in which case there is nothing that can be done. + # 2. The repository is owned by a different user and we don't have access to it. + # This can be remedied with a configuration change. References: + # https://confluence.atlassian.com/pages/viewpage.action?pageId=1167744132 + # https://confluence.atlassian.com/pages/viewpage.action?pageId=1384121844 + # https://git-scm.com/docs/git-config/2.35.2#Documentation/git-config.txt-safedirectory + + base_cmd = git_base_cmd(git_c_path=git_c_path) + cmd = [*base_cmd, "rev-parse", "--show-toplevel"] + + try: + _ = subprocess.run(cmd, check=True, text=True, capture_output=True, encoding="utf-8") # noqa: S603 + except subprocess.CalledProcessError as outer_err: + # Account for reason #1 + std_err: str = outer_err.stderr + if "not a git repository" in std_err: + msg = "Must be operating within the context of a git repository" + raise PhylumCalledProcessError(outer_err, msg) from outer_err + + # Account for reason #2 + # The error message states the command to use to update the configuration, so use it + start_of_cmd = "git config --global --add safe.directory" + if start_of_cmd in std_err: + msg = """ + This git repository is owned by a different user! + Adding repository directory to git global config as safe directory ...""" + LOG.warning(cleandoc(msg)) + start_of_cmd_idx = std_err.find(start_of_cmd) + cmd_msg = std_err[start_of_cmd_idx:] + cmd_list = shlex.split(cmd_msg) + # Ensure the `git` part of the command takes into account the optional `git_c_path` + conf_cmd = [*base_cmd, *cmd_list[1:]] + num_tokens = len(conf_cmd) + # Ensure the "" part of the command is only one token and that nothing comes after it: + # " config --global --add safe.directory " + expected_num_tokens = len(base_cmd) + 5 + if num_tokens != expected_num_tokens: + msg = f""" + {num_tokens} tokens provided but exactly {expected_num_tokens} were expected. + Bailing instead of executing this unexpected command: + [code]{shlex.join(conf_cmd)}[/] + Please report this as a bug if you believe the command is correct.""" + raise PhylumCalledProcessError(outer_err, cleandoc(msg)) from outer_err + LOG.debug("Executing command: [code]%s[/]", shlex.join(conf_cmd), extra=MARKUP) + try: + _ = subprocess.run(conf_cmd, check=True, text=True, capture_output=True, encoding="utf-8") # noqa: S603 + except subprocess.CalledProcessError as inner_err: + msg = "Unable to add repository to git global config as safe directory" + raise PhylumCalledProcessError(inner_err, msg) from inner_err + msg = f""" + Config updated. Undo for non-ephemeral environments with command: + [code]{shlex.join(conf_cmd).replace("--add", "--unset", 1)}[/]""" + LOG.warning(cleandoc(msg), extra=MARKUP) + + def git_remote(git_c_path: Optional[Path] = None) -> str: """Get the git remote and return it. @@ -181,7 +261,7 @@ def git_root_dir(git_c_path: Optional[Path] = None) -> Path: return Path(git_root).resolve() -def git_curent_branch_name(git_c_path: Optional[Path] = None) -> str: +def git_current_branch_name(git_c_path: Optional[Path] = None) -> str: """Get the current branch name and return it. The optional `git_c_path` is used to tell `git` to run as if it were started in that diff --git a/tests/unit/test_git.py b/tests/unit/test_git.py index cf17b082..4b6012dd 100644 --- a/tests/unit/test_git.py +++ b/tests/unit/test_git.py @@ -5,7 +5,14 @@ from dulwich import porcelain import pytest -from phylum.ci.git import git_branch_exists, git_fetch, git_repo_name +from phylum.ci.git import ( + ensure_git_repo_access, + git_branch_exists, + git_fetch, + git_repo_name, + git_root_dir, + is_in_git_repo, +) # Names of a git repository that will be cloned locally CLONED_LOCAL_REPO_NAMES = [ @@ -20,36 +27,36 @@ @pytest.mark.parametrize("repo_name", INITIALIZED_LOCAL_REPO_NAMES) -def test_initialized_local_repo_name(tmp_path, repo_name): +def test_initialized_local_repo_name(tmp_path: Path, repo_name: str) -> None: """Ensure a local repo, with no remotes, has it's name correctly identified.""" - repo_path: Path = tmp_path / repo_name + repo_path = tmp_path / repo_name porcelain.init(str(repo_path)) local_repo_name = git_repo_name(git_c_path=repo_path) assert local_repo_name == repo_name, "The local repo name should be found" @pytest.mark.parametrize("repo_name", CLONED_LOCAL_REPO_NAMES) -def test_cloned_local_repo_name(tmp_path, repo_name: str): +def test_cloned_local_repo_name(tmp_path: Path, repo_name: str) -> None: """Ensure a cloned local repo has it's name correctly identified.""" - init_repo_path: Path = tmp_path / repo_name - cloned_repo_path: Path = tmp_path / "cloned_local_repo_name" + init_repo_path = tmp_path / repo_name + cloned_repo_path = tmp_path / "cloned_local_repo_name" porcelain.init(str(init_repo_path)) porcelain.clone(source=str(init_repo_path), target=str(cloned_repo_path)) remote_repo_name = git_repo_name(git_c_path=cloned_repo_path) assert remote_repo_name == repo_name, "The cloned local repo name should be found" -def test_cloned_remote_repo_name(tmp_path): +def test_cloned_remote_repo_name(tmp_path: Path) -> None: """Ensure a cloned remote repo has it's name correctly identified.""" - repo_path: Path = tmp_path / "cloned_remote_repo_name" + repo_path = tmp_path / "cloned_remote_repo_name" porcelain.clone(source="https://github.com/phylum-dev/phylum-ci.git", target=str(repo_path), depth=1) remote_repo_name = git_repo_name(git_c_path=repo_path) assert remote_repo_name == "phylum-ci", "The cloned remote repo name should be found" -def test_fetch_default_remote_branch(tmp_path): +def test_fetch_default_remote_branch(tmp_path: Path) -> None: """Ensure `git_fetch` works with a cloned remote repo missing it's default remote branch ref.""" - repo_path: Path = tmp_path / "cloned_remote_repo_name" + repo_path = tmp_path / "cloned_remote_repo_name" porcelain.clone(source="https://github.com/phylum-dev/phylum-ci.git", target=str(repo_path), depth=1) default_remote_branch_ref = "refs/remotes/origin/main" assert git_branch_exists(default_remote_branch_ref, git_c_path=repo_path) @@ -58,3 +65,30 @@ def test_fetch_default_remote_branch(tmp_path): assert not git_branch_exists(default_remote_branch_ref, git_c_path=repo_path) git_fetch(repo="origin", ref="main", git_c_path=repo_path) assert git_branch_exists(default_remote_branch_ref, git_c_path=repo_path) + + +def test_is_in_git_repo(tmp_path: Path) -> None: + """Identify when the operating context is within a git repository or not.""" + repo_path = tmp_path / "maybe_a_git_repo" + repo_path.mkdir() + assert not is_in_git_repo(git_c_path=repo_path), "The path should not be a git repo yet" + porcelain.init(str(repo_path)) + assert is_in_git_repo(git_c_path=repo_path), "The path should be a git repo" + + +def test_raises_when_not_in_git_repo(tmp_path: Path) -> None: + """Ensure an exception is raised when operating outside of a git repository.""" + repo_path = tmp_path / "not_a_git_repo" + repo_path.mkdir() + with pytest.raises(SystemExit): + ensure_git_repo_access(git_c_path=repo_path) + + +def test_git_root_dir(tmp_path: Path) -> None: + """Ensure the correct git root directory is returned, regardless of depth.""" + repo_path = tmp_path / "toplevel" + nested_path = repo_path / "sub_dir_1" / "sub_dir_2" + nested_path.mkdir(parents=True) + porcelain.init(str(repo_path)) + assert git_root_dir(git_c_path=repo_path) == repo_path + assert git_root_dir(git_c_path=nested_path) == repo_path