Skip to content

Commit

Permalink
feat: ensure git repository access as prerequisite (#518)
Browse files Browse the repository at this point in the history
This change adds a new base prerequisite such that the user account
executing `git` has access to the repository. This is accomplished with
a new function, `ensure_git_repo_access`, that checks for the two likely
reasons to fail a git repo membership test:

1. Not actually in git repo, in which case nothing can be done.
2. The repository is owned by a different user and we don't have access
to it. This is detected and remedied with a configuration change.

References:

https://confluence.atlassian.com/pages/viewpage.action?pageId=1167744132
https://confluence.atlassian.com/pages/viewpage.action?pageId=1384121844
https://git-scm.com/docs/git-config/2.35.2#Documentation/git-config.txt-safedirectory

The second reason is encountered when the user account used to mount the
container created from the `phylum-ci` image does not match the one
owning the git repository. This happens for some CI environments and was
encountered while attempting to add a non-root user to the `phylum-ci`
image (#519). That effort was abandoned, but the feature here is still
worth adding now since it adds functionality for operating in more
environments.

Additional changes made include:

* Clarify command used to build from the `Dockerfile.slim` file
* Update `docker_tests.sh` script to include the most basic test command
* Fix typo: `git_curent_branch_name` --> `git_current_branch_name`
* Add `is_in_git_repo` predicate as helper function in `git` module
* Add more tests for the `git` module
* Add type hints to `tests/unit/test_git.py`
  • Loading branch information
maxrake authored Jan 2, 2025
1 parent ebd896e commit 5317fd9
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Dockerfile.slim
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
#
# Images built from this Dockerfile can be tested for basic functionality:
#
# $ docker build --tag phylum-ci .
# $ docker build --tag phylum-ci --file Dockerfile.slim .
# $ scripts/docker_tests.sh --image phylum-ci --slim
##########################################################################################

Expand Down
2 changes: 2 additions & 0 deletions scripts/docker_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,11 @@ echo " [+] Running with UID:GID of: ${USER}:${GROUP}"

if [ -z "${SLIM:-}" ]; then
echo " [+] \`--slim\` option not specified. Running all tests ..."
docker run --rm --entrypoint entrypoint.sh "${IMAGE}"
docker run --rm --entrypoint entrypoint.sh --user "${USER}:${GROUP}" "${IMAGE}" "${SLIM_COMMANDS}"
docker run --rm --entrypoint entrypoint.sh --user "${USER}:${GROUP}" "${IMAGE}" "${MANIFEST_COMMANDS}"
else
echo " [+] \`--slim\` option specified. Skipping the lockfile generation tests ..."
docker run --rm --entrypoint entrypoint.sh "${IMAGE}"
docker run --rm --entrypoint entrypoint.sh --user "${USER}:${GROUP}" "${IMAGE}" "${SLIM_COMMANDS}"
fi
7 changes: 4 additions & 3 deletions src/phylum/ci/ci_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
ReturnCode,
)
from phylum.ci.depfile import Depfile, Depfiles, DepfileType, parse_depfile
from phylum.ci.git import git_hash_object, git_repo_name, git_root_dir, git_worktree
from phylum.ci.git import ensure_git_repo_access, git_hash_object, git_repo_name, git_root_dir, git_worktree
from phylum.console import console
from phylum.constants import ENVVAR_NAME_TOKEN, MIN_CLI_VER_INSTALLED
from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error
Expand Down Expand Up @@ -647,6 +647,7 @@ def _check_prerequisites(self) -> None:
* A Phylum CLI version at least as new as the minimum supported version
* Have `git` installed and available for use on the PATH
* Operating within the context of a git repository
* User account executing `git` has access to the repository
"""
LOG.info("Confirming prerequisites ...")

Expand All @@ -661,8 +662,8 @@ def _check_prerequisites(self) -> None:
raise SystemExit(msg)
LOG.debug("`git` binary found on the PATH")

# Referencing this property is enough to ensure the prerequisite
LOG.debug("Git repository root found: %s", self._git_root_dir)
ensure_git_repo_access()
LOG.debug("Git repository root: %s", self._git_root_dir)

def _cmd_extender(
self,
Expand Down
4 changes: 2 additions & 2 deletions src/phylum/ci/ci_none.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from typing import Optional

from phylum.ci.ci_base import CIBase
from phylum.ci.git import git_curent_branch_name, git_remote, git_set_remote_head
from phylum.ci.git import git_current_branch_name, git_remote, git_set_remote_head
from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error
from phylum.logger import LOG

Expand All @@ -35,7 +35,7 @@ def _check_prerequisites(self) -> None:
@cached_property
def phylum_label(self) -> str:
"""Get a custom label for use when submitting jobs for analysis."""
current_branch = git_curent_branch_name()
current_branch = git_current_branch_name()
label = f"{self.ci_platform_name}_{current_branch}_{self.depfile_hash_object}"
label = re.sub(r"\s+", "-", label)
return label
Expand Down
4 changes: 2 additions & 2 deletions src/phylum/ci/ci_precommit.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import Optional

from phylum.ci.ci_base import CIBase
from phylum.ci.git import git_curent_branch_name
from phylum.ci.git import git_current_branch_name
from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error
from phylum.logger import LOG, MARKUP

Expand Down Expand Up @@ -106,7 +106,7 @@ def _check_prerequisites(self) -> None:
@cached_property
def phylum_label(self) -> str:
"""Get a custom label for use when submitting jobs for analysis."""
current_branch = git_curent_branch_name()
current_branch = git_current_branch_name()
label = f"{self.ci_platform_name}_{current_branch}_{self.depfile_hash_object}"
label = re.sub(r"\s+", "-", label)
return label
Expand Down
84 changes: 82 additions & 2 deletions src/phylum/ci/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Optional

from phylum.exceptions import PhylumCalledProcessError, pprint_subprocess_error
from phylum.logger import LOG
from phylum.logger import LOG, MARKUP


def git_base_cmd(git_c_path: Optional[Path] = None) -> list[str]:
Expand All @@ -24,6 +24,86 @@ def git_base_cmd(git_c_path: Optional[Path] = None) -> list[str]:
return ["git", "-C", str(git_c_path.resolve())]


def is_in_git_repo(git_c_path: Optional[Path] = None) -> bool:
"""Predicate for determining if operating within the context of an accessible git repository.
The optional `git_c_path` is used to tell `git` to run as if it were started in that
path instead of the current working directory, which is the default when not provided.
"""
base_cmd = git_base_cmd(git_c_path=git_c_path)
cmd = [*base_cmd, "rev-parse", "--show-toplevel"]

# We want the return code here and don't want to raise when non-zero.
return not bool(subprocess.run(cmd, check=False, capture_output=True).returncode) # noqa: S603


def ensure_git_repo_access(git_c_path: Optional[Path] = None) -> None:
"""Ensure user account executing `git` has access to the repository.
The optional `git_c_path` is used to tell `git` to run as if it were started in that
path instead of the current working directory, which is the default when not provided.
"""
if is_in_git_repo(git_c_path=git_c_path):
LOG.debug("Operating within git repository, with proper access")
return

# There are two likely reasons to fail the git repo membership test:
#
# 1. Not actually in a git repo, in which case there is nothing that can be done.
# 2. The repository is owned by a different user and we don't have access to it.
# This can be remedied with a configuration change. References:
# https://confluence.atlassian.com/pages/viewpage.action?pageId=1167744132
# https://confluence.atlassian.com/pages/viewpage.action?pageId=1384121844
# https://git-scm.com/docs/git-config/2.35.2#Documentation/git-config.txt-safedirectory

base_cmd = git_base_cmd(git_c_path=git_c_path)
cmd = [*base_cmd, "rev-parse", "--show-toplevel"]

try:
_ = subprocess.run(cmd, check=True, text=True, capture_output=True, encoding="utf-8") # noqa: S603
except subprocess.CalledProcessError as outer_err:
# Account for reason #1
std_err: str = outer_err.stderr
if "not a git repository" in std_err:
msg = "Must be operating within the context of a git repository"
raise PhylumCalledProcessError(outer_err, msg) from outer_err

# Account for reason #2
# The error message states the command to use to update the configuration, so use it
start_of_cmd = "git config --global --add safe.directory"
if start_of_cmd in std_err:
msg = """
This git repository is owned by a different user!
Adding repository directory to git global config as safe directory ..."""
LOG.warning(cleandoc(msg))
start_of_cmd_idx = std_err.find(start_of_cmd)
cmd_msg = std_err[start_of_cmd_idx:]
cmd_list = shlex.split(cmd_msg)
# Ensure the `git` part of the command takes into account the optional `git_c_path`
conf_cmd = [*base_cmd, *cmd_list[1:]]
num_tokens = len(conf_cmd)
# Ensure the "<DIR>" part of the command is only one token and that nothing comes after it:
# "<GIT_BASE_CMD> config --global --add safe.directory <DIR>"
expected_num_tokens = len(base_cmd) + 5
if num_tokens != expected_num_tokens:
msg = f"""
{num_tokens} tokens provided but exactly {expected_num_tokens} were expected.
Bailing instead of executing this unexpected command:
[code]{shlex.join(conf_cmd)}[/]
Please report this as a bug if you believe the command is correct."""
raise PhylumCalledProcessError(outer_err, cleandoc(msg)) from outer_err
LOG.debug("Executing command: [code]%s[/]", shlex.join(conf_cmd), extra=MARKUP)
try:
_ = subprocess.run(conf_cmd, check=True, text=True, capture_output=True, encoding="utf-8") # noqa: S603
except subprocess.CalledProcessError as inner_err:
msg = "Unable to add repository to git global config as safe directory"
raise PhylumCalledProcessError(inner_err, msg) from inner_err
msg = f"""
Config updated. Undo for non-ephemeral environments with command:
[code]{shlex.join(conf_cmd).replace("--add", "--unset", 1)}[/]"""
LOG.warning(cleandoc(msg), extra=MARKUP)


def git_remote(git_c_path: Optional[Path] = None) -> str:
"""Get the git remote and return it.
Expand Down Expand Up @@ -181,7 +261,7 @@ def git_root_dir(git_c_path: Optional[Path] = None) -> Path:
return Path(git_root).resolve()


def git_curent_branch_name(git_c_path: Optional[Path] = None) -> str:
def git_current_branch_name(git_c_path: Optional[Path] = None) -> str:
"""Get the current branch name and return it.
The optional `git_c_path` is used to tell `git` to run as if it were started in that
Expand Down
54 changes: 44 additions & 10 deletions tests/unit/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
from dulwich import porcelain
import pytest

from phylum.ci.git import git_branch_exists, git_fetch, git_repo_name
from phylum.ci.git import (
ensure_git_repo_access,
git_branch_exists,
git_fetch,
git_repo_name,
git_root_dir,
is_in_git_repo,
)

# Names of a git repository that will be cloned locally
CLONED_LOCAL_REPO_NAMES = [
Expand All @@ -20,36 +27,36 @@


@pytest.mark.parametrize("repo_name", INITIALIZED_LOCAL_REPO_NAMES)
def test_initialized_local_repo_name(tmp_path, repo_name):
def test_initialized_local_repo_name(tmp_path: Path, repo_name: str) -> None:
"""Ensure a local repo, with no remotes, has it's name correctly identified."""
repo_path: Path = tmp_path / repo_name
repo_path = tmp_path / repo_name
porcelain.init(str(repo_path))
local_repo_name = git_repo_name(git_c_path=repo_path)
assert local_repo_name == repo_name, "The local repo name should be found"


@pytest.mark.parametrize("repo_name", CLONED_LOCAL_REPO_NAMES)
def test_cloned_local_repo_name(tmp_path, repo_name: str):
def test_cloned_local_repo_name(tmp_path: Path, repo_name: str) -> None:
"""Ensure a cloned local repo has it's name correctly identified."""
init_repo_path: Path = tmp_path / repo_name
cloned_repo_path: Path = tmp_path / "cloned_local_repo_name"
init_repo_path = tmp_path / repo_name
cloned_repo_path = tmp_path / "cloned_local_repo_name"
porcelain.init(str(init_repo_path))
porcelain.clone(source=str(init_repo_path), target=str(cloned_repo_path))
remote_repo_name = git_repo_name(git_c_path=cloned_repo_path)
assert remote_repo_name == repo_name, "The cloned local repo name should be found"


def test_cloned_remote_repo_name(tmp_path):
def test_cloned_remote_repo_name(tmp_path: Path) -> None:
"""Ensure a cloned remote repo has it's name correctly identified."""
repo_path: Path = tmp_path / "cloned_remote_repo_name"
repo_path = tmp_path / "cloned_remote_repo_name"
porcelain.clone(source="https://github.com/phylum-dev/phylum-ci.git", target=str(repo_path), depth=1)
remote_repo_name = git_repo_name(git_c_path=repo_path)
assert remote_repo_name == "phylum-ci", "The cloned remote repo name should be found"


def test_fetch_default_remote_branch(tmp_path):
def test_fetch_default_remote_branch(tmp_path: Path) -> None:
"""Ensure `git_fetch` works with a cloned remote repo missing it's default remote branch ref."""
repo_path: Path = tmp_path / "cloned_remote_repo_name"
repo_path = tmp_path / "cloned_remote_repo_name"
porcelain.clone(source="https://github.com/phylum-dev/phylum-ci.git", target=str(repo_path), depth=1)
default_remote_branch_ref = "refs/remotes/origin/main"
assert git_branch_exists(default_remote_branch_ref, git_c_path=repo_path)
Expand All @@ -58,3 +65,30 @@ def test_fetch_default_remote_branch(tmp_path):
assert not git_branch_exists(default_remote_branch_ref, git_c_path=repo_path)
git_fetch(repo="origin", ref="main", git_c_path=repo_path)
assert git_branch_exists(default_remote_branch_ref, git_c_path=repo_path)


def test_is_in_git_repo(tmp_path: Path) -> None:
"""Identify when the operating context is within a git repository or not."""
repo_path = tmp_path / "maybe_a_git_repo"
repo_path.mkdir()
assert not is_in_git_repo(git_c_path=repo_path), "The path should not be a git repo yet"
porcelain.init(str(repo_path))
assert is_in_git_repo(git_c_path=repo_path), "The path should be a git repo"


def test_raises_when_not_in_git_repo(tmp_path: Path) -> None:
"""Ensure an exception is raised when operating outside of a git repository."""
repo_path = tmp_path / "not_a_git_repo"
repo_path.mkdir()
with pytest.raises(SystemExit):
ensure_git_repo_access(git_c_path=repo_path)


def test_git_root_dir(tmp_path: Path) -> None:
"""Ensure the correct git root directory is returned, regardless of depth."""
repo_path = tmp_path / "toplevel"
nested_path = repo_path / "sub_dir_1" / "sub_dir_2"
nested_path.mkdir(parents=True)
porcelain.init(str(repo_path))
assert git_root_dir(git_c_path=repo_path) == repo_path
assert git_root_dir(git_c_path=nested_path) == repo_path

0 comments on commit 5317fd9

Please sign in to comment.