Skip to content

Commit

Permalink
Add s3 upload storage method
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Perez <[email protected]>
  • Loading branch information
Thingee committed Sep 26, 2024
1 parent 3ed5cdf commit eb11441
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 29 deletions.
12 changes: 12 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ the service as follows::
api_key = 'secret'


storage_method
^^^^^^^^^^^^^^
The ``storage_method`` is a required configuration item, it defines where the
binaries should be stored. The two available method values are ``local`` and
``s3``.

s3_bucket
^^^^^^^^^
The ``s3_bucket`` is required if the ``storage_method`` configuration is set to
``s3``. This defines which bucket the binaries should be stored to.


Self-discovery
--------------
The API provides informational JSON at every step of the URL about what is
Expand Down
50 changes: 47 additions & 3 deletions chacra/controllers/binaries/archs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import hashlib
import logging
import os
import boto3
from botocore.exceptions import ClientError
import pecan
from pecan import response
from pecan.secure import secure
Expand All @@ -26,6 +29,7 @@ def __init__(self, arch):
self.distro_version = request.context['distro_version']
self.ref = request.context['ref']
self.sha1 = request.context['sha1']
self.checksum = None
request.context['arch'] = self.arch

@expose(generic=True, template='json')
Expand Down Expand Up @@ -89,7 +93,7 @@ def index_post(self):
if request.POST.get('force', False) is False:
error('/errors/invalid', 'resource already exists and "force" key was not used')

full_path = self.save_file(file_obj)
full_path, size = self.save_file(file_obj)

if self.binary is None:
path = full_path
Expand All @@ -102,14 +106,21 @@ def index_post(self):
self.binary = Binary(
self.binary_name, self.project, arch=arch,
distro=distro, distro_version=distro_version,
ref=ref, sha1=sha1, path=path, size=os.path.getsize(path)
ref=ref, sha1=sha1, path=path, size=size,
checksum=self.checksum
)
else:
self.binary.path = full_path
self.binary.checksum = self.checksum

# check if this binary is interesting for other configured projects,
# and if so, then mark those other repos so that they can be re-built
self.mark_related_repos()

# Remove the local file after S3 upload
if pecan.conf.storage_method == 's3':
os.remove(full_path)

return dict()

def mark_related_repos(self):
Expand Down Expand Up @@ -175,8 +186,41 @@ def save_file(self, file_obj):
for chunk in file_iterable:
f.write(chunk)

self.checksum = self.generate_checksum(destination)

if pecan.conf.storage_method == 's3':
bucket = pecan.conf.bucket
object_destination = os.path.relpath(destination, pecan.conf.binary_root)

s3_client = boto3.client('s3')
try:
with open(destination, 'rb') as f:
s3_client.put_object(Body=f,
Bucket=bucket,
Key=object_destination,
ChecksumAlgorithm='sha256',
ChecksumSHA256=self.checksum
)
except ClientError as e:
error('/errors/error/', 'file object upload to S3 failed with error %s' % e)

size = os.path.getsize(destination)

# return the full path to the saved object:
return destination
return destination, size

def generate_checksum(self, binary):
# S3 requires SHA256
chsum = None
if pecan.conf.storage_method == 's3':
chsum = hashlib.sha256()
else:
chsum = hashlib.sha512()

with open(binary, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
chsum.update(chunk)
return chsum.hexdigest()

@expose()
def _lookup(self, name, *remainder):
Expand Down
26 changes: 0 additions & 26 deletions chacra/models/binaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,27 +168,6 @@ def __json__(self):

# Listeners


def generate_checksum(mapper, connection, target):
try:
target.path
except AttributeError:
target.checksum = None
return

# FIXME
# sometimes we can accept binaries without a path and that is probably something
# that should not happen. The core purpose of this binary is that it works with
# paths and files, this should be required.
if not target.path:
return
chsum = hashlib.sha512()
with open(target.path, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
chsum.update(chunk)
target.checksum = chsum.hexdigest()


def update_repo(mapper, connection, target):
try:
if target.repo.is_generic:
Expand All @@ -206,11 +185,6 @@ def update_repo(mapper, connection, target):
# triggered it because there is nothing we need to do
pass

# listen for checksum changes
listen(Binary, 'before_insert', generate_checksum)
listen(Binary, 'before_update', generate_checksum)


def add_timestamp_listeners():
# listen for timestamp modifications
listen(Binary, 'before_insert', update_timestamp)
Expand Down
6 changes: 6 additions & 0 deletions config/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,17 @@
'encoding': 'utf-8'
}

# Where to store the data. Options are 's3' or 'local'
storage_method = 'local'

# location for storing uploaded binaries
binary_root = '%(confdir)s/public'
repos_root = '%(confdir)s/repos'
distributions_root = '%(confdir)s/distributions'

# If storage method is s3, provide a bucket name
bucket = ''

# When True it will set the headers so that Nginx can serve the download
# instead of Pecan.
delegate_downloads = False
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ alembic
ipython
python-statsd
requests
boto3
importlib_metadata<=3.6; python_version<'3.8'

0 comments on commit eb11441

Please sign in to comment.