Skip to content

Commit

Permalink
Merge branch 'master' into issues/4521-duplicate-ci-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak authored Jul 20, 2023
2 parents 9b2cb3c + aaa451b commit 9ce59c9
Show file tree
Hide file tree
Showing 20 changed files with 287 additions and 91 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ tmp/
.coverage*
.mypy_cache
/contrib/admin/.issue_cache/
.docker_cache/
8 changes: 5 additions & 3 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ before_script:
echo "{\"registry-mirrors\": [\"${DOCKER_HUB_MIRROR}\"], \"insecure-registries\": [\"${DOCKER_HUB_MIRROR##*://}\"]}" | sudo tee /etc/docker/daemon.json
export SINGULARITY_DOCKER_HUB_MIRROR="${DOCKER_HUB_MIRROR}"
fi
# Restart or start the Docker daemon
- stopdocker || true
- startdocker || true
- docker info
- cat /etc/hosts
Expand All @@ -29,9 +31,9 @@ before_script:
# a service account bearer token for auth and triggers https://github.com/docker/buildx/issues/267
# where buildx can't use a bearer token from a kube config and falls back to anonymous instead
# of using the system's service account.
- KUBECONFIG=/dev/null docker buildx create --use --name toilbuilder --platform=linux/amd64,linux/arm64 --node=buildkit-amd64 --driver=kubernetes --driver-opt="nodeselector=kubernetes.io/arch=amd64"
# Dump the builder info, and make sure it exists.
- docker buildx inspect --bootstrap || (echo "Docker builder deployment can't be found in our Kubernetes namespace! Are we on the right Gitlab runner?" && exit 1)
- if [[ "${CI_BUILDKIT_DRIVER}" == "kubernetes" ]] ; then KUBECONFIG=/dev/null docker buildx create --use --name=buildkit --platform=linux/amd64,linux/arm64 --node=buildkit-amd64 --driver=kubernetes --driver-opt="nodeselector=kubernetes.io/arch=amd64" ; else docker buildx create --use --name=container-builder --driver=docker-container ; fi
# Report on the builders, and make sure they exist.
- docker buildx inspect --bootstrap || (echo "Docker builder deployment can't be found! Are we on the right Gitlab runner?" && exit 1)
# This will hang if we can't talk to the builder
- (echo "y" | docker buildx prune --keep-storage 80G) || true

Expand Down
55 changes: 42 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -170,24 +170,28 @@ pre_pull_docker:
for i in $$(seq 1 11); do if [[ $$i == "11" ]] ; then exit 1 ; fi ; docker pull sscaling/mtail && break || sleep 60; done

toil_docker: pre_pull_docker docker/Dockerfile
mkdir -p .docker_cache
@set -ex \
; cd docker \
; docker buildx build --platform=$(arch) --tag=$(docker_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
; docker buildx build --platform=$(arch) --tag=$(docker_image):$(TOIL_DOCKER_TAG) --cache-from type=registry,ref=$(docker_image):$(TOIL_DOCKER_MAIN_CACHE_TAG) --cache-from type=registry,ref=$(docker_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../.docker-cache/toil --cache-to type=local,dest=../.docker-cache/toil -f Dockerfile .

prometheus_docker: pre_pull_docker
mkdir -p .docker_cache
@set -ex \
; cd dashboard/prometheus \
; docker buildx build --platform=$(arch) --tag=$(prometheus_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
; docker buildx build --platform=$(arch) --tag=$(prometheus_image):$(TOIL_DOCKER_TAG) --cache-from type=registry,ref=$(prometheus_image):$(TOIL_DOCKER_MAIN_CACHE_TAG) --cache-from type=registry,ref=$(prometheus_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../../.docker-cache/prometheus --cache-to type=local,dest=../../.docker-cache/prometheus -f Dockerfile .

grafana_docker: pre_pull_docker
mkdir -p .docker_cache
@set -ex \
; cd dashboard/grafana \
; docker buildx build --platform=$(arch) --tag=$(grafana_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
; docker buildx build --platform=$(arch) --tag=$(grafana_image):$(TOIL_DOCKER_TAG) --cache-from type=registry,ref=$(grafana_image):$(TOIL_DOCKER_MAIN_CACHE_TAG) --cache-from type=registry,ref=$(grafana_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../../.docker-cache/grafana --cache-to type=local,dest=../../.docker-cache/grafana -f Dockerfile .

mtail_docker: pre_pull_docker
mkdir -p .docker_cache
@set -ex \
; cd dashboard/mtail \
; docker buildx build --platform=$(arch) --tag=$(mtail_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
; docker buildx build --platform=$(arch) --tag=$(mtail_image):$(TOIL_DOCKER_TAG) --cache-from type=registry,ref=$(mtail_image):$(TOIL_DOCKER_MAIN_CACHE_TAG) --cache-from type=registry,ref=$(mtail_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../../.docker-cache/mtail --cache-to type=local,dest=../../.docker-cache/mtail -f Dockerfile .

docker/$(sdist_name): dist/$(sdist_name)
cp $< $@
Expand All @@ -200,17 +204,42 @@ clean_docker:
-docker rmi $(docker_image):$(TOIL_DOCKER_TAG)

push_docker: docker
# Weird if logic is so we fail if all the pushes fail
cd docker ; for i in $$(seq 1 6); do if [[ $$i == "6" ]] ; then exit 1 ; fi ; docker buildx build --platform $(arch) --push --tag=$(docker_image):$(TOIL_DOCKER_TAG) -f Dockerfile . && break || sleep 60; done
cd dashboard/prometheus ; for i in $$(seq 1 6); do if [[ $$i == "6" ]] ; then exit 1 ; fi ; docker buildx build --platform $(arch) --push --tag=$(prometheus_image):$(TOIL_DOCKER_TAG) -f Dockerfile . && break || sleep 60; done
cd dashboard/grafana ; for i in $$(seq 1 6); do if [[ $$i == "6" ]] ; then exit 1 ; fi ; docker buildx build --platform $(arch) --push --tag=$(grafana_image):$(TOIL_DOCKER_TAG) -f Dockerfile . && break || sleep 60; done
cd dashboard/mtail ; for i in $$(seq 1 6); do if [[ $$i == "6" ]] ; then exit 1 ; fi ; docker buildx build --platform $(arch) --push --tag=$(mtail_image):$(TOIL_DOCKER_TAG) -f Dockerfile . && break || sleep 60; done
# Weird if logic is so we fail if all the pushes fail.
# We need to build from the local cache to the cache tag and again from the local cache to the real tag.
cd docker ; \
for i in $$(seq 1 6); do \
if [[ $$i == "6" ]] ; then exit 1 ; fi ; \
docker buildx build --platform $(arch) --push --tag=$(docker_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../.docker-cache/toil --cache-to type=inline -f Dockerfile . && \
docker buildx build --platform $(arch) --push --tag=$(docker_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../.docker-cache/toil -f Dockerfile . && \
break || sleep 60; \
done
cd dashboard/prometheus ; \
for i in $$(seq 1 6); do \
if [[ $$i == "6" ]] ; then exit 1 ; fi ; \
docker buildx build --platform $(arch) --push --tag=$(prometheus_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../../.docker-cache/prometheus --cache-to type=inline -f Dockerfile . && \
docker buildx build --platform $(arch) --push --tag=$(prometheus_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../../.docker-cache/prometheus -f Dockerfile . && \
break || sleep 60; \
done
cd dashboard/grafana ; \
for i in $$(seq 1 6); do \
if [[ $$i == "6" ]] ; then exit 1 ; fi ; \
docker buildx build --platform $(arch) --push --tag=$(grafana_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../../.docker-cache/grafana --cache-to type=inline -f Dockerfile . && \
docker buildx build --platform $(arch) --push --tag=$(grafana_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../../.docker-cache/grafana -f Dockerfile . && \
break || sleep 60; \
done
cd dashboard/mtail ; \
for i in $$(seq 1 6); do \
if [[ $$i == "6" ]] ; then exit 1 ; fi ; \
docker buildx build --platform $(arch) --push --tag=$(mtail_image):$(TOIL_DOCKER_CACHE_TAG) --cache-from type=local,src=../../.docker-cache/mtail --cache-to type=inline -f Dockerfile . && \
docker buildx build --platform $(arch) --push --tag=$(mtail_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../../.docker-cache/mtail -f Dockerfile . && \
break || sleep 60; \
done

load_docker: docker
cd docker ; docker buildx build --platform $(arch) --load --tag=$(docker_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
cd dashboard/prometheus ; docker buildx build --platform $(arch) --load --tag=$(prometheus_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
cd dashboard/grafana ; docker buildx build --platform $(arch) --load --tag=$(grafana_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
cd dashboard/mtail ; docker buildx build --platform $(arch) --load --tag=$(mtail_image):$(TOIL_DOCKER_TAG) -f Dockerfile .
cd docker ; docker buildx build --platform $(arch) --load --tag=$(docker_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../.docker-cache/toil -f Dockerfile .
cd dashboard/prometheus ; docker buildx build --platform $(arch) --load --tag=$(prometheus_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../../.docker-cache/prometheus -f Dockerfile .
cd dashboard/grafana ; docker buildx build --platform $(arch) --load --tag=$(grafana_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../../.docker-cache/grafana -f Dockerfile .
cd dashboard/mtail ; docker buildx build --platform $(arch) --load --tag=$(mtail_image):$(TOIL_DOCKER_TAG) --cache-from type=local,src=../../.docker-cache/mtail -f Dockerfile .

else

Expand Down
2 changes: 2 additions & 0 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ SHELL=bash
export TOIL_DOCKER_REGISTRY?=quay.io/ucsc_cgl
export TOIL_DOCKER_NAME?=toil
export TOIL_DOCKER_TAG?=$(shell python version_template.py dockerTag)
export TOIL_DOCKER_CACHE_TAG?=$(shell python version_template.py cacheTag)
export TOIL_DOCKER_MAIN_CACHE_TAG?=$(shell python version_template.py mainCacheTag)
export TOIL_APPLIANCE_SELF?=$(TOIL_DOCKER_REGISTRY)/$(TOIL_DOCKER_NAME):$(TOIL_DOCKER_TAG)

# TOIL_CHECK_ENV='' # Determines whether toil refers to the same virtualenv paths it spawned from (across machines)
Expand Down
5 changes: 3 additions & 2 deletions requirements-aws.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
boto>=2.48.0, <3
boto3-stubs[s3,sdb,iam,sts,boto3]>=1.28.3, <2
moto>=4.1.11, <5
boto3-stubs[s3,sdb,iam,sts,boto3]>=1.28.3.post2, <2
mypy-boto3-iam>=1.28.3.post2, <2 # Need to force .post1 to be replaced
moto>=4.1.11, <5
1 change: 1 addition & 0 deletions requirements-google.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
apache-libcloud>=2.2.1,<3
google-cloud-storage>=2,<=2.8.0
google-auth>=2.18.1,<3
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ markers =
docker_cuda
encryption
fetchable_appliance
google
google-project
google-storage
gridengine
htcondor
integrative
Expand Down
40 changes: 34 additions & 6 deletions src/toil/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,27 +1266,55 @@ def importFile(self,
def import_file(self,
src_uri: str,
shared_file_name: str,
symlink: bool = True) -> None: ...
symlink: bool = True,
check_existence: bool = True) -> None: ...

@overload
def import_file(self,
src_uri: str,
shared_file_name: None = None,
symlink: bool = True) -> FileID: ...
symlink: bool = True,
check_existence: bool = True) -> FileID: ...

def import_file(self,
src_uri: str,
shared_file_name: Optional[str] = None,
symlink: bool = True) -> Optional[FileID]:
symlink: bool = True,
check_existence: bool = True) -> Optional[FileID]:
"""
Import the file at the given URL into the job store.
By default, returns None if the file does not exist.
:param check_existence: If true, raise FileNotFoundError if the file
does not exist. If false, return None when the file does not
exist.
See :func:`toil.jobStores.abstractJobStore.AbstractJobStore.importFile` for a
full description
"""
self._assertContextManagerUsed()
src_uri = self.normalize_uri(src_uri, check_existence=True)
return self._jobStore.import_file(src_uri, shared_file_name=shared_file_name, symlink=symlink)
full_uri = self.normalize_uri(src_uri, check_existence=check_existence)
try:
imported = self._jobStore.import_file(full_uri, shared_file_name=shared_file_name, symlink=symlink)
except FileNotFoundError:
# TODO: I thought we refactored the different job store import
# methods to not raise and instead return None, but that looks to
# not currently be the case.
if check_existence:
raise
else:
# So translate the raise-based API if needed.
# TODO: If check_existence is false but a shared file name is
# specified, we have no way to report the lack of file
# existence, since we also return None on success!
return None
if imported is None and shared_file_name is None and check_existence:
# We need to protect the caller from missing files.
# We think a file was missing, and we got None becasuse of it.
# We didn't get None instead because of usign a shared file name.
raise FileNotFoundError(f'Could not find file {src_uri}')
return imported

@deprecated(new_function_name='export_file')
def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
Expand All @@ -1308,7 +1336,7 @@ def normalize_uri(uri: str, check_existence: bool = False) -> str:
"""
Given a URI, if it has no scheme, prepend "file:".
:param check_existence: If set, raise an error if a URI points to
:param check_existence: If set, raise FileNotFoundError if a URI points to
a local file that does not exist.
"""
if urlparse(uri).scheme == 'file':
Expand Down
51 changes: 34 additions & 17 deletions src/toil/jobStores/abstractJobStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@
from typing_extensions import Literal

from urllib.parse import ParseResult, urlparse
from urllib.error import HTTPError
from urllib.request import urlopen
from uuid import uuid4

from requests.exceptions import HTTPError

from toil.common import Config, getNodeID, safeUnpickleFromStream
from toil.fileStores import FileID
from toil.job import (CheckpointJobDescription,
Expand Down Expand Up @@ -420,6 +419,8 @@ def import_file(self,
- 'gs'
e.g. gs://bucket/file
Raises FileNotFoundError if the file does not exist.
:param str src_uri: URL that points to a file or object in the storage mechanism of a
supported URL scheme e.g. a blob in an AWS s3 bucket. It must be a file, not a
directory or prefix.
Expand Down Expand Up @@ -453,6 +454,8 @@ def _import_file(self,
asks the other job store class for a stream and writes that stream as either a regular or
a shared file.
Raises FileNotFoundError if the file does not exist.
:param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports
reading from the given URL and getting the file size from the URL.
Expand Down Expand Up @@ -587,6 +590,8 @@ def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
"""
Read the given URL and write its content into the given writable stream.
Raises FileNotFoundError if the URL doesn't exist.
:return: The size of the file in bytes and whether the executable permission bit is set
:rtype: Tuple[int, bool]
"""
Expand Down Expand Up @@ -618,8 +623,12 @@ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> Tuple[int, boo
Reads the contents of the object at the specified location and writes it to the given
writable stream.
Raises FileNotFoundError if the URL doesn't exist.
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
Raises FileNotFoundError if the thing at the URL is not found.
:param ParseResult url: URL that points to a file or object in the storage
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
Expand All @@ -635,7 +644,7 @@ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> Tuple[int, boo
def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
"""
Reads the contents of the given readable stream and writes it to the object at the
specified location.
specified location. Raises FileNotFoundError if the URL doesn't exist..
Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
Expand Down Expand Up @@ -1707,20 +1716,28 @@ def _read_from_url(
# We can only retry on errors that happen as responses to the request.
# If we start getting file data, and the connection drops, we fail.
# So we don't have to worry about writing the start of the file twice.
with closing(urlopen(url.geturl())) as readable:
# Make something to count the bytes we get
# We need to put the actual count in a container so our
# nested function can modify it without creating its own
# local with the same name.
size = [0]
def count(l: int) -> None:
size[0] += l
counter = WriteWatchingStream(writable)
counter.onWrite(count)

# Do the download
shutil.copyfileobj(readable, counter)
return size[0], False
try:
with closing(urlopen(url.geturl())) as readable:
# Make something to count the bytes we get
# We need to put the actual count in a container so our
# nested function can modify it without creating its own
# local with the same name.
size = [0]
def count(l: int) -> None:
size[0] += l
counter = WriteWatchingStream(writable)
counter.onWrite(count)

# Do the download
shutil.copyfileobj(readable, counter)
return size[0], False
except HTTPError as e:
if e.code == 404:
# Translate into a FileNotFoundError for detecting
# un-importable files
raise FileNotFoundError(str(url)) from e
else:
raise

@classmethod
def _get_is_directory(cls, url: ParseResult) -> bool:
Expand Down
Loading

0 comments on commit 9ce59c9

Please sign in to comment.