diff --git a/ingest/bin/upload-to-s3 b/ingest/bin/upload-to-s3 deleted file mode 100755 index 7ea4e6f..0000000 --- a/ingest/bin/upload-to-s3 +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -# Originally copied from nextstrain/ncov-ingest repo -set -euo pipefail - -bin="$(dirname "$0")" - -main() { - local src="${1:?A source file is required as the first argument.}" - local dst="${2:?A destination s3:// URL is required as the second argument.}" - local cloudfront_domain="${3:-}" - - local s3path="${dst#s3://}" - local bucket="${s3path%%/*}" - local key="${s3path#*/}" - - local src_hash dst_hash no_hash=0000000000000000000000000000000000000000000000000000000000000000 - src_hash="$("$bin/sha256sum" < "$src")" - dst_hash="$(aws s3api head-object --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")" - - if [[ $src_hash != "$dst_hash" ]]; then - # The record count may have changed - src_record_count="$(wc -l < "$src")" - - echo "Uploading $src → $dst" - if [[ "$dst" == *.gz ]]; then - gzip -c "$src" - elif [[ "$dst" == *.xz ]]; then - xz -2 -T0 -c "$src" - else - cat "$src" - fi | aws s3 cp --no-progress - "$dst" --metadata sha256sum="$src_hash",recordcount="$src_record_count" "$(content-type "$dst")" - - if [[ -n $cloudfront_domain ]]; then - echo "Creating CloudFront invalidation for $cloudfront_domain/$key" - if ! "$bin"/cloudfront-invalidate "$cloudfront_domain" "/$key"; then - echo "CloudFront invalidation failed, but exiting with success anyway." - fi - fi - - else - echo "Uploading $src → $dst: files are identical, skipping upload" - fi -} - -content-type() { - case "$1" in - *.tsv) echo --content-type=text/tab-separated-values;; - *.csv) echo --content-type=text/comma-separated-values;; - *.ndjson) echo --content-type=application/x-ndjson;; - *.gz) echo --content-type=application/gzip;; - *.xz) echo --content-type=application/x-xz;; - *) echo --content-type=text/plain;; - esac -} - -main diff --git a/ingest/workflow/snakemake_rules/upload.smk b/ingest/workflow/snakemake_rules/upload.smk index 36fddf9..b1bdcaf 100644 --- a/ingest/workflow/snakemake_rules/upload.smk +++ b/ingest/workflow/snakemake_rules/upload.smk @@ -21,5 +21,5 @@ rule upload_to_s3: cloudfront_domain = config["upload"].get("s3", {}).get("cloudfront_domain", "") shell: """ - ./bin/upload-to-s3 {input:q} {params.s3_dst:q}/{wildcards.remote_file_name:q} {params.cloudfront_domain} + ./vendored/upload-to-s3 --quiet {input:q} {params.s3_dst:q}/{wildcards.remote_file_name:q} {params.cloudfront_domain} """