-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kyl Wellman
authored and
Kyl Wellman
committed
May 25, 2020
1 parent
ab448ed
commit 77a1168
Showing
2 changed files
with
95 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
FROM serratus-base:latest AS build_base | ||
|
||
# Versions | ||
ENV SRATOOLKITVERSION='2.10.4' | ||
ENV BOWTIEVERSION='2.4.1' | ||
|
||
# Bowtie2 - download and install | ||
RUN wget -O bowtie2.zip --quiet "https://downloads.sourceforge.net/project/bowtie-bio/bowtie2/${BOWTIEVERSION}/bowtie2-${BOWTIEVERSION}-linux-x86_64.zip" \ | ||
&& unzip bowtie2.zip \ | ||
&& rm bowtie2.zip \ | ||
&& mkdir /opt/bowtie2-align \ | ||
&& mv bowtie2*/{bowtie2,bowtie2-align-s} /opt/bowtie2-align \ | ||
&& rm -rf bowtie2* | ||
|
||
## SRAToolkit | ||
RUN wget --quiet "https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRATOOLKITVERSION}/sratoolkit.${SRATOOLKITVERSION}-centos_linux64.tar.gz" \ | ||
&& tar xzf "sratoolkit.${SRATOOLKITVERSION}-centos_linux64.tar.gz" \ | ||
&& rm -f "sratoolkit.${SRATOOLKITVERSION}-centos_linux64.tar.gz" \ | ||
&& mkdir -p /opt/sratools \ | ||
# Keep sratools grouped together, so its easy to copy them all out into the runtime | ||
&& mv ./sratoolkit.${SRATOOLKITVERSION}-centos_linux64/bin/{vdb-config*,prefetch*,fastq-dump*,fasterq-dump*,sratools*} /opt/sratools \ | ||
# Install into /usr/local/bin for the rest of the build | ||
&& cp -r /opt/sratools/* /usr/local/bin \ | ||
&& mkdir /etc/ncbi | ||
|
||
# Copy in config for sra tools | ||
COPY serratus-dl/VDB_user-settings.mkfg /root/.ncbi/user-settings.mkfg | ||
RUN vdb-config --report-cloud-identity yes | ||
|
||
FROM amazonlinux:2 AS runtime | ||
|
||
LABEL container.description="serratus: end-to-end batch container" | ||
LABEL software.license="GPLv3" | ||
LABEL tags="aws-cli, samtools, bowtie2, sratoolkit" | ||
|
||
# aws cli, plus dependencies | ||
# -merge has its own python dependency, so do a full python/pip install | ||
RUN yum -y install python3 perl \ | ||
&& alias python=python3 \ | ||
&& curl -O https://bootstrap.pypa.io/get-pip.py \ | ||
&& python3 get-pip.py \ | ||
&& rm get-pip.py \ | ||
&& pip install boto3 awscli \ | ||
&& yum clean all \ | ||
# aws configuration | ||
&& aws configure set default.s3.multipart_threshold 4GB \ | ||
&& aws configure set default.s3.multipart_chunksize 4GB | ||
|
||
# bowtie2 | ||
COPY --from=build_base /opt/bowtie2-align/* /usr/local/bin/ | ||
|
||
# samtools | ||
COPY --from=build_base /usr/local/bin/samtools /usr/local/bin/ | ||
|
||
# sratools | ||
COPY --from=build_base /root/.ncbi /root/.ncbi | ||
COPY --from=build_base /opt/sratools/ /usr/local/bin/ | ||
|
||
# run script | ||
WORKDIR /home/serratus | ||
COPY ./serratus-batch/run /usr/local/bin/ | ||
COPY ./serratus-merge/serratus_summarizer.py ./summarizer.py | ||
|
||
CMD ["run"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env bash | ||
set -euo pipefail | ||
|
||
if [[ $# -ne 2 ]]; then | ||
echo "Usage: $0 <SRA> <Genome>" | ||
echo 'Run the container with docker run <img> run <SRA> <Genome>' | ||
exit 1 | ||
fi | ||
|
||
SRA=$1 | ||
GENOME=$2 | ||
|
||
# May be overridden via environment | ||
FQMAX=${FQMAX:-100000000} | ||
|
||
aws s3 cp --recursive s3://serratus-public/seq/$GENOME/ . | ||
|
||
# Prefetch the data before processing | ||
# This should be VERY fast, and will cause fastq-dump to have | ||
# smoother CPU usage in the end. | ||
prefetch $SRA | ||
|
||
# Create some named pipes for fastq-dump to put its data into. | ||
fastq-dump -X -100000000 -Z $SRA \ | ||
| bowtie2 -x $GENOME --very-sensitive-local --no-unal -U /dev/stdin \ | ||
| python3 summarizer.py /dev/stdin $GENOME.sumzer.tsv $SRA.summary /dev/stdout \ | ||
| samtools view -b > out.bam | ||
|
||
# Stream both bowtie flavors into s3 | ||
S3_OUT="serratus-batch-$(date +%s).bam" | ||
aws s3 cp s3://public-data/testing-batch-outs/${S3_OUT} ./out.bam |