Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add include/exclude filter support for pull-through caching #747

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/706.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pull-through caching now respects the include/exclude filters on the upstream remote.
3 changes: 3 additions & 0 deletions docs/user/guides/publish.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ from the remote source and have Pulp store that package as orphaned content.
pulp python distribution update --name foo --remote bar
```

!!! note
Pull-through caching will respect the includes/excludes filters on the supplied remote.

!!! warning
Support for pull-through caching is provided as a tech preview in Pulp 3.
Functionality may not work or may be incomplete. Also, backwards compatibility when upgrading
Expand Down
52 changes: 31 additions & 21 deletions pulp_python/app/pypi/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import requests
import os

from rest_framework.viewsets import ViewSet
from rest_framework.response import Response
Expand All @@ -15,15 +16,16 @@
Http404,
HttpResponseForbidden,
HttpResponseBadRequest,
StreamingHttpResponse
StreamingHttpResponse,
HttpResponse,
)
from drf_spectacular.utils import extend_schema
from dynaconf import settings
from itertools import chain
from packaging.utils import canonicalize_name
from urllib.parse import urljoin, urlparse, urlunsplit
from pathlib import PurePath
from pypi_simple import parse_links_stream_response
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage

from pulpcore.plugin.viewsets import OperationPostponedResponse
from pulpcore.plugin.tasking import dispatch
Expand All @@ -45,6 +47,7 @@
python_content_to_json,
PYPI_LAST_SERIAL,
PYPI_SERIAL_CONSTANT,
get_remote_package_filter,
)

from pulp_python.app import tasks
Expand Down Expand Up @@ -232,27 +235,34 @@ def list(self, request, path):

def pull_through_package_simple(self, package, path, remote):
"""Gets the package's simple page from remote."""
def parse_url(link):
parsed = urlparse(link.url)
digest, _, value = parsed.fragment.partition('=')
def parse_package(dis_package):
parsed = urlparse(dis_package.url)
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
redirect = f'{path}/{link.text}?redirect={stripped_url}'
d_url = urljoin(self.base_content_url, redirect)
return link.text, d_url, value if digest == 'sha256' else ''
redirect_path = f'{path}/{dis_package.filename}?redirect={stripped_url}'
d_url = urljoin(self.base_content_url, redirect_path)
return dis_package.filename, d_url, dis_package.digests.get("sha256", "")

rfilter = get_remote_package_filter(remote)
if not rfilter.filter_project(package):
raise Http404(f"{package} does not exist.")

url = remote.get_remote_artifact_url(f'simple/{package}/')
kwargs = {}
if proxy_url := remote.proxy_url:
if remote.proxy_username or remote.proxy_password:
parsed_proxy = urlparse(proxy_url)
netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}"
proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", ""))
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}

response = requests.get(url, stream=True, **kwargs)
links = parse_links_stream_response(response)
packages = (parse_url(link) for link in links)
return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True))
remote.headers = remote.headers or []
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
downloader = remote.get_downloader(url=url, max_retries=1)
try:
d = downloader.fetch()
except Exception:
raise Http404(f"Could not find {package}.")

if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=remote.url)
else:
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url)
packages = [
parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)
]
return HttpResponse(write_simple_detail(package, packages))

@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
def retrieve(self, request, path, package):
Expand Down
78 changes: 77 additions & 1 deletion pulp_python/app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from django.conf import settings
from jinja2 import Template
from packaging.utils import canonicalize_name
from packaging.version import parse
from packaging.requirements import Requirement
from packaging.version import parse, InvalidVersion


PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
Expand Down Expand Up @@ -317,3 +318,78 @@ def write_simple_detail(project_name, project_packages, streamed=False):
detail = Template(simple_detail_template)
context = {"project_name": project_name, "project_packages": project_packages}
return detail.stream(**context) if streamed else detail.render(**context)


class PackageIncludeFilter:
"""A special class to help filter Package's based on a remote's include/exclude"""

def __init__(self, remote):
self.remote = remote.cast()
self._filter_includes = self._parse_packages(self.remote.includes)
self._filter_excludes = self._parse_packages(self.remote.excludes)

def _parse_packages(self, packages):
config = defaultdict(lambda: defaultdict(list))
for value in packages:
requirement = Requirement(value)
requirement.name = canonicalize_name(requirement.name)
if requirement.specifier:
requirement.specifier.prereleases = True
config["range"][requirement.name].append(requirement)
else:
config["full"][requirement.name].append(requirement)
return config

def filter_project(self, project_name):
"""Return true/false if project_name would be allowed through remote's filters."""
project_name = canonicalize_name(project_name)
include_full = self._filter_includes.get("full", {})
include_range = self._filter_includes.get("range", {})
include = set(include_range.keys()).union(include_full.keys())
if include and project_name not in include:
return False

exclude_full = self._filter_excludes.get("full", {})
if project_name in exclude_full:
return False

return True

def filter_release(self, project_name, version):
"""Returns true/false if release would be allowed through remote's filters."""
project_name = canonicalize_name(project_name)
if not self.filter_project(project_name):
return False

try:
version = parse(version)
except InvalidVersion:
return False

include_range = self._filter_includes.get("range", {})
if project_name in include_range:
for req in include_range[project_name]:
if version in req.specifier:
break
else:
return False

exclude_range = self._filter_excludes.get("range", {})
if project_name in exclude_range:
for req in exclude_range[project_name]:
if version in req.specifier:
return False

return True


_remote_filters = {}
def get_remote_package_filter(remote):
if date_filter_tuple := _remote_filters.get(remote.pulp_id):
last_update, rfilter = date_filter_tuple
if last_update == remote.pulp_last_updated:
return rfilter

rfilter = PackageIncludeFilter(remote)
_remote_filters[remote.pulp_id] = (remote.pulp_last_updated, rfilter)
return rfilter
41 changes: 41 additions & 0 deletions pulp_python/tests/functional/api/test_full_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
from pulp_python.tests.functional.constants import (
PYPI_URL,
PYTHON_XS_FIXTURE_CHECKSUMS,
PYTHON_SM_PROJECT_SPECIFIER,
PYTHON_SM_FIXTURE_RELEASES,
)

from pypi_simple import ProjectPage
from packaging.version import parse
from urllib.parse import urljoin, urlsplit


Expand Down Expand Up @@ -54,6 +57,44 @@ def test_pull_through_simple(python_remote_factory, python_distribution_factory,
assert PYTHON_XS_FIXTURE_CHECKSUMS[package.filename] == package.digests["sha256"]


@pytest.mark.parallel
def test_pull_through_filter(python_remote_factory, python_distribution_factory):
"""Tests that pull-through respects the includes/excludes filter on the remote."""
remote = python_remote_factory(url=PYPI_URL, includes=["shelf-reader"])
distro = python_distribution_factory(remote=remote.pulp_href)

r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 404
assert r.json() == {'detail': 'pulpcore does not exist.'}

r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200

# Test complex include specifiers
remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
distro = python_distribution_factory(remote=remote.pulp_href)
for package, releases in PYTHON_SM_FIXTURE_RELEASES.items():
url = f"{distro.base_url}simple/{package}/"
project_page = ProjectPage.from_response(requests.get(url), package)
packages = {p.filename for p in project_page.packages if not parse(p.version).is_prerelease}
assert packages == set(releases)

# Test exclude logic
remote = python_remote_factory(includes=[], excludes=["django"])
distro = python_distribution_factory(remote=remote.pulp_href)

r = requests.get(f"{distro.base_url}simple/django/")
assert r.status_code == 404
assert r.json() == {'detail': 'django does not exist.'}

r = requests.get(f"{distro.base_url}simple/pulpcore/")
assert r.status_code == 404
assert r.json() == {'detail': 'Could not find pulpcore.'}

r = requests.get(f"{distro.base_url}simple/shelf-reader/")
assert r.status_code == 200


@pytest.mark.parallel
def test_pull_through_with_repo(
python_repo_with_sync, python_remote_factory, python_distribution_factory
Expand Down
Loading