Skip to content

Commit

Permalink
Merge pull request #31 from poeli/master
Browse files Browse the repository at this point in the history
add long-read support and update docker image to accommodate long-reads
  • Loading branch information
vlilanl authored Jan 15, 2025
2 parents 149a6aa + a75fee2 commit 38d8f2f
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 46 deletions.
27 changes: 12 additions & 15 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ FROM continuumio/miniconda3:latest

LABEL developer="Po-E Li"
LABEL email="[email protected]"
LABEL version="1.0.5"
LABEL version="1.0.8"
LABEL software="nmdc_taxa_profilers"
LABEL tags="metagenome, bioinformatics, NMDC, taxonomy"

ENV container docker
ENV container=docker

# system updates
RUN apt-get update --allow-releaseinfo-change \
Expand All @@ -18,32 +18,29 @@ RUN conda config --add channels conda-forge \
&& conda config --add channels bioconda

# install singlem
RUN wget https://github.com/wwood/singlem/archive/refs/tags/v0.15.0.tar.gz \
&& tar -xzf v0.15.0.tar.gz
RUN conda env create -n singlem -f singlem-0.15.0/singlem.yml \
&& ln -s ${PWD}/singlem-0.15.0/bin/* /opt/conda/envs/singlem/bin/
RUN rm -f v0.15.0.tar.gz
RUN conda create -n singlem singlem \
&& conda clean --all -y

# install gottcha2
RUN wget https://github.com/poeli/GOTTCHA2/archive/refs/tags/2.1.8.5.tar.gz \
&& tar -xzf 2.1.8.5.tar.gz
RUN conda env create -n gottcha2 -f GOTTCHA2-2.1.8.5/environment.yml \
&& cp GOTTCHA2-2.1.8.5/gottcha/scripts/*.py /usr/local/bin
RUN rm -rf GOTTCHA2-2.1.8.5/ 2.1.8.5.tar.gz
RUN conda create -n gottcha2 gottcha2=2.1.8.8 \
&& conda clean --all -y

# install kraken2
RUN conda create -n kraken2 kraken2=2.1.2
RUN conda create -n kraken2 kraken2=2.1.2 \
&& conda clean --all -y

# install centrifuge
RUN conda create -n centrifuge centrifuge=1.0.4_beta
RUN conda create -n centrifuge centrifuge=1.0.4_beta \
&& conda clean --all -y

# install krona
# The "curl"
RUN conda install curl krona \
&& conda clean --all -y \
&& ktUpdateTaxonomy.sh

# install additional libs
RUN conda install pandas click
RUN conda install pandas click && conda clean --all -y
ADD *.py /opt/conda/bin/

CMD ["/bin/bash"]
4 changes: 3 additions & 1 deletion ReadbasedAnalysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ workflow ReadbasedAnalysis {
String proj
String prefix = sub(proj, ":", "_")
Boolean? paired = false
Boolean? long_read = false
String bbtools_container = "microbiomedata/bbtools:38.96"
String docker = "microbiomedata/nmdc_taxa_profilers:1.0.5"
String docker = "microbiomedata/nmdc_taxa_profilers:1.0.8"
}

call stage {
Expand All @@ -30,6 +31,7 @@ workflow ReadbasedAnalysis {
input: READS = stage.reads,
DB = db_gottcha2,
PREFIX = prefix,
LONG_READ = long_read,
CPU = cpu,
DOCKER = docker
}
Expand Down
60 changes: 31 additions & 29 deletions ReadbasedAnalysisTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ task profilerGottcha2 {
String DB
String PREFIX
String? RELABD_COL = "ROLLUP_DOC"
Boolean? LONG_READ = false
String DOCKER
Int? CPU = 4
}
command <<<

set -euo pipefail
. /opt/conda/etc/profile.d/conda.sh
conda activate gottcha2
Expand All @@ -20,11 +20,13 @@ task profilerGottcha2 {
-t ~{CPU} \
-o . \
-p ~{PREFIX} \
--database ~{DB}

grep "^species" ~{PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ~{PREFIX}.krona.html - || true
--database ~{DB} \
~{true="-np" false="" LONG_READ}

grep "^species" ~{PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ~{PREFIX}.krona.html - || true

gottcha2.py --version > ~{PREFIX}.info
touch ~{PREFIX}.full.tsv
>>>
output {
File report_tsv = "~{PREFIX}.tsv"
Expand Down Expand Up @@ -138,31 +140,31 @@ task profilerKraken2 {
}
}

task generateSummaryJson {
input {
Array[Map[String, String]?] TSV_META_JSON
String PREFIX
String DOCKER
}

command {
outputTsv2json.py --meta ~{write_json(TSV_META_JSON)} > ~{PREFIX}.json
}
output {
File summary_json = "~{PREFIX}.json"
}
runtime {
docker: DOCKER
node: 1
nwpn: 1
memory: "45G"
time: "04:00:00"
}
meta {
author: "Po-E Li, B10, LANL"
email: "[email protected]"
}
}
# task generateSummaryJson {
# input {
# Array[Map[String, String]?] TSV_META_JSON
# String PREFIX
# String DOCKER
# }
# command {
# outputTsv2json.py --meta ~{write_json(TSV_META_JSON)} > ~{PREFIX}.json
# }
# output {
# File summary_json = "~{PREFIX}.json"
# }
# runtime {
# docker: DOCKER
# node: 1
# nwpn: 1
# memory: "45G"
# time: "04:00:00"
# }
# meta {
# author: "Po-E Li, B10, LANL"
# email: "[email protected]"
# }
# }
task stage {
input {
Expand Down
1 change: 1 addition & 0 deletions ReadbasedAnalysis_inputs.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"ReadbasedAnalysis.input_file": "https://nmdc-edge.org/projects/KUYAOFKQW2mZJFBc/output/ReadsQC/SRR7877884-int-0.1/SRR7877884-int-0.1.anqdpht.fastq.gz",
"ReadbasedAnalysis.paired": false,
"ReadbasedAnalysis.long_read": false,
"ReadbasedAnalysis.prefix": "TEST",
"ReadbasedAnalysis.cpu": 8,
"ReadbasedAnalysis.proj": "TEST"
Expand Down
2 changes: 1 addition & 1 deletion options.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"default_runtime_attributes": {
"docker": "microbiomedata/nmdc_taxa_profilers:1.0.5"
"docker": "microbiomedata/nmdc_taxa_profilers:1.0.8"
}
}

0 comments on commit 38d8f2f

Please sign in to comment.