Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add s3 upload storage method #316

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ the service as follows::
api_key = 'secret'


storage_method
^^^^^^^^^^^^^^
The ``storage_method`` is a required configuration item, it defines where the
binaries should be stored. The two available method values are ``local`` and
``s3``.

s3_bucket
^^^^^^^^^
The ``s3_bucket`` is required if the ``storage_method`` configuration is set to
``s3``. This defines which bucket the binaries should be stored to.


Self-discovery
--------------
The API provides informational JSON at every step of the URL about what is
Expand Down
38 changes: 34 additions & 4 deletions chacra/controllers/binaries/archs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import logging
import os
import boto3
from botocore.exceptions import ClientError
import pecan
from pecan import response
from pecan.secure import secure
from pecan import expose, abort, request
from webob.static import FileIter
from chacra.models import Binary
from chacra.models.binaries import Binary, generate_checksum
from chacra import models, util
from chacra.controllers import error
from chacra.controllers.util import repository_is_automatic
Expand All @@ -26,6 +28,7 @@ def __init__(self, arch):
self.distro_version = request.context['distro_version']
self.ref = request.context['ref']
self.sha1 = request.context['sha1']
self.checksum = None
request.context['arch'] = self.arch

@expose(generic=True, template='json')
Expand Down Expand Up @@ -89,7 +92,7 @@ def index_post(self):
if request.POST.get('force', False) is False:
error('/errors/invalid', 'resource already exists and "force" key was not used')

full_path = self.save_file(file_obj)
full_path, size = self.save_file(file_obj)

if self.binary is None:
path = full_path
Expand All @@ -102,14 +105,17 @@ def index_post(self):
self.binary = Binary(
self.binary_name, self.project, arch=arch,
distro=distro, distro_version=distro_version,
ref=ref, sha1=sha1, path=path, size=os.path.getsize(path)
ref=ref, sha1=sha1, path=path, size=size,
checksum=self.checksum
)
else:
self.binary.path = full_path
self.binary.checksum = self.checksum

# check if this binary is interesting for other configured projects,
# and if so, then mark those other repos so that they can be re-built
self.mark_related_repos()

return dict()

def mark_related_repos(self):
Expand Down Expand Up @@ -175,8 +181,32 @@ def save_file(self, file_obj):
for chunk in file_iterable:
f.write(chunk)

size = os.path.getsize(destination)
self.checksum = generate_checksum(destination)

if pecan.conf.storage_method == 's3':
bucket = pecan.conf.bucket
object_destination = os.path.relpath(destination, pecan.conf.binary_root)

s3_client = boto3.client('s3')
try:
with open(destination, 'rb') as f:
s3_client.put_object(Body=f,
Bucket=bucket,
Key=object_destination,
ChecksumAlgorithm='sha256',
ChecksumSHA256=self.checksum
)
except ClientError as e:
error('/errors/error/', 'file object upload to S3 failed with error %s' % e)

# Remove the local file after S3 upload
os.remove(destination)

destination = 's3://' + object_destination[1:]

# return the full path to the saved object:
return destination
return destination, size

@expose()
def _lookup(self, name, *remainder):
Expand Down
30 changes: 10 additions & 20 deletions chacra/models/binaries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
import datetime
import pecan
from sqlalchemy import Column, Integer, String, ForeignKey, Boolean, DateTime, BigInteger
from sqlalchemy.orm import relationship, backref
from sqlalchemy.event import listen
Expand Down Expand Up @@ -169,24 +170,18 @@ def __json__(self):
# Listeners


def generate_checksum(mapper, connection, target):
try:
target.path
except AttributeError:
target.checksum = None
return
def generate_checksum(self, binary):
# S3 requires SHA256
chsum = None
if pecan.conf.storage_method == 's3':
chsum = hashlib.sha256()
else:
chsum = hashlib.sha512()

# FIXME
# sometimes we can accept binaries without a path and that is probably something
# that should not happen. The core purpose of this binary is that it works with
# paths and files, this should be required.
if not target.path:
return
chsum = hashlib.sha512()
with open(target.path, 'rb') as f:
with open(binary, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
chsum.update(chunk)
target.checksum = chsum.hexdigest()
return chsum.hexdigest()


def update_repo(mapper, connection, target):
Expand All @@ -206,11 +201,6 @@ def update_repo(mapper, connection, target):
# triggered it because there is nothing we need to do
pass

# listen for checksum changes
listen(Binary, 'before_insert', generate_checksum)
listen(Binary, 'before_update', generate_checksum)


def add_timestamp_listeners():
# listen for timestamp modifications
listen(Binary, 'before_insert', update_timestamp)
Expand Down
6 changes: 6 additions & 0 deletions config/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,17 @@
'encoding': 'utf-8'
}

# Where to store the data. Options are 's3' or 'local'
storage_method = 'local'

# location for storing uploaded binaries
binary_root = '%(confdir)s/public'
repos_root = '%(confdir)s/repos'
distributions_root = '%(confdir)s/distributions'

# If storage method is s3, provide a bucket name
bucket = ''

# When True it will set the headers so that Nginx can serve the download
# instead of Pecan.
delegate_downloads = False
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ alembic
ipython
python-statsd
requests
boto3
importlib_metadata<=3.6; python_version<'3.8'