Skip to content

Commit

Permalink
Update dockerfile, dependencies, and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ckandoth committed Dec 7, 2020
1 parent 7586cfa commit ba6bef5
Show file tree
Hide file tree
Showing 14 changed files with 150 additions and 58 deletions.
6 changes: 4 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ install:
- docker image prune -f
- wget -P tests https://data.cyri.ac/Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz
- gzip -d tests/Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz
- wget -P tests https://data.cyri.ac/homo_sapiens_vep_101_GRCh38_chr21.tar.gz
- tar -zxf tests/homo_sapiens_vep_101_GRCh38_chr21.tar.gz -C tests
- wget -P tests https://data.cyri.ac/homo_sapiens_vep_102_GRCh38_chr21.tar.gz
- tar -zxf tests/homo_sapiens_vep_102_GRCh38_chr21.tar.gz -C tests

script:
- perl tests/vcf2maf.t
- perl tests/vcf2vcf.t
- perl tests/maf2vcf.t
28 changes: 15 additions & 13 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,34 @@
FROM clearlinux:latest AS builder

# Install a minimal versioned OS into /install_root, and bundled tools if any
ENV CLEAR_VERSION=33910
ENV CLEAR_VERSION=33980
RUN swupd os-install --no-progress --no-boot-update --no-scripts \
--version ${CLEAR_VERSION} \
--path /install_root \
--statedir /swupd-state \
--bundles os-core-update,which

# Download and install conda into /usr/bin
ENV MINICONDA_VERSION=py37_4.8.3
ENV MINICONDA_VERSION=py37_4.9.2
RUN swupd bundle-add --no-progress curl && \
curl -sL https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh && \
sh /tmp/miniconda.sh -bfp /usr

# Use conda to install remaining tools/dependencies into /usr/local
ENV VEP_VERSION=101.0 \
HTSLIB_VERSION=1.9 \
BCFTOOLS_VERSION=1.9 \
SAMTOOLS_VERSION=1.9
ENV VEP_VERSION=102.0 \
HTSLIB_VERSION=1.10.2 \
BCFTOOLS_VERSION=1.10.2 \
SAMTOOLS_VERSION=1.10 \
LIFTOVER_VERSION=377
RUN conda create -qy -p /usr/local \
-c conda-forge \
-c bioconda \
-c defaults \
ensembl-vep==${VEP_VERSION} \
htslib==${HTSLIB_VERSION} \
bcftools==${BCFTOOLS_VERSION} \
samtools==${SAMTOOLS_VERSION}
-c conda-forge \
-c bioconda \
-c defaults \
ensembl-vep==${VEP_VERSION} \
htslib==${HTSLIB_VERSION} \
bcftools==${BCFTOOLS_VERSION} \
samtools==${SAMTOOLS_VERSION} \
ucsc-liftover==${LIFTOVER_VERSION}

# Deploy the minimal OS and tools into a clean target layer
FROM scratch
Expand Down
4 changes: 2 additions & 2 deletions maf2maf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
my ( $tum_depth_col, $tum_rad_col, $tum_vad_col ) = qw( t_depth t_ref_count t_alt_count );
my ( $nrm_depth_col, $nrm_rad_col, $nrm_vad_col ) = qw( n_depth n_ref_count n_alt_count );
my ( $vep_path, $vep_data, $vep_forks, $buffer_size, $any_allele ) = ( "$ENV{HOME}/miniconda3/bin", "$ENV{HOME}/.vep", 4, 5000, 0 );
my ( $ref_fasta, $filter_vcf ) = ( "$ENV{HOME}/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz", "" );
my ( $ref_fasta, $filter_vcf ) = ( "$ENV{HOME}/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz", "" );
my ( $species, $ncbi_build, $cache_version, $maf_center, $max_filter_ac ) = ( "homo_sapiens", "GRCh37", "", ".", 10 );
my $perl_bin = $Config{perlpath};

Expand Down Expand Up @@ -385,7 +385,7 @@ =head1 OPTIONS
--species Ensembl-friendly name of species (e.g. mus_musculus for mouse) [homo_sapiens]
--ncbi-build NCBI reference assembly of variants in MAF (e.g. GRCm38 for mouse) [GRCh37]
--cache-version Version of offline cache to use with VEP (e.g. 75, 84, 91) [Default: Installed version]
--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/101_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz]
--ref-fasta Reference FASTA file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz]
--help Print a brief help message and quit
--man Print the detailed manual
Expand Down
4 changes: 2 additions & 2 deletions maf2vcf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use Pod::Usage qw( pod2usage );

# Set any default paths and constants
my $ref_fasta = "$ENV{HOME}/.vep/homo_sapiens/91_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz";
my $ref_fasta = "$ENV{HOME}/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz";
my ( $tum_depth_col, $tum_rad_col, $tum_vad_col ) = qw( t_depth t_ref_count t_alt_count );
my ( $nrm_depth_col, $nrm_rad_col, $nrm_vad_col ) = qw( n_depth n_ref_count n_alt_count );

Expand Down Expand Up @@ -352,7 +352,7 @@ =head1 OPTIONS
--input-maf Path to input file in MAF format
--output-dir Path to output directory where VCFs will be stored, one per TN-pair
--output-vcf Path to output multi-sample VCF containing all TN-pairs [<output-dir>/<input-maf-name>.vcf]
--ref-fasta Path to reference Fasta file [~/.vep/homo_sapiens/91_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz]
--ref-fasta Path to reference Fasta file [~/.vep/homo_sapiens/102_GRCh37/Homo_sapiens.GRCh37.dna.toplevel.fa.gz]
--per-tn-vcfs Specify this to generate VCFs per-TN pair, in addition to the multi-sample VCF
--tum-depth-col Name of MAF column for read depth in tumor BAM [t_depth]
--tum-rad-col Name of MAF column for reference allele depth in tumor BAM [t_ref_count]
Expand Down
4 changes: 2 additions & 2 deletions tests/maf2maf.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ chdir $script_dir;

# Set the number of tests we'll run, and run them
use Test::Simple tests => 8;
ok( system( "perl maf2maf.pl --help > /dev/null" ) == 0 );
ok( system( "perl maf2maf.pl --man > /dev/null" ) == 0 );
ok( system( "docker run --rm vcf2maf:master perl maf2maf.pl --help > /dev/null" ) == 0 );
ok( system( "docker run --rm vcf2maf:master perl maf2maf.pl --man > /dev/null" ) == 0 );

# Test standard operation, diff, and cleanup
ok( system( "perl maf2maf.pl --input-maf tests/test.maf --output-maf tests/test_output.vep_isoforms.new.maf" ) == 0 );
Expand Down
17 changes: 6 additions & 11 deletions tests/maf2vcf.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,11 @@ my $script_dir = dirname( $test_dir );
chdir $script_dir;

# Set the number of tests we'll run, and run them
use Test::Simple tests => 6;
ok( system( "perl maf2vcf.pl --help > /dev/null" ) == 0 );
ok( system( "perl maf2vcf.pl --man > /dev/null" ) == 0 );
use Test::Simple tests => 4;
ok( system( "docker run --rm vcf2maf:master perl maf2vcf.pl --help > /dev/null" ) == 0 );
ok( system( "docker run --rm vcf2maf:master perl maf2vcf.pl --man > /dev/null" ) == 0 );

# Test standard operation, diff, and cleanup
ok( system( "perl maf2vcf.pl --input-maf tests/test.maf --output-dir tests --output-vcf tests/test_maf2vcf.new.vcf" ) == 0 );
ok( system( "diff tests/test_maf2vcf.vcf tests/test_maf2vcf.new.vcf" ) == 0 );
system( "rm -f tests/test_maf2vcf.new.vcf tests/test.pairs.tsv" );

# Test standard operation with the TSV file with minimal MAF columns, diff, and cleanup
ok( system( "perl maf2vcf.pl --input-maf tests/test.tsv --output-dir tests --output-vcf tests/test_maf2vcf.new.vcf" ) == 0 );
ok( system( "diff tests/test_maf2vcf.vcf tests/test_maf2vcf.new.vcf" ) == 0 );
system( "rm -f tests/test_maf2vcf.new.vcf tests/test.pairs.tsv" );
ok( system( "docker run --rm -v $test_dir:/opt/tests vcf2maf:master perl maf2vcf.pl --input-maf tests/test_b38_output.maf --output-dir tests --output-vcf tests/test_b38.new.vcf --ref-fasta tests/Homo_sapiens.GRCh38.dna.chromosome.21.fa" ) == 0 );
ok( system( "bash -c 'diff <(cat tests/test_b38.vcf) <(grep -v ^##reference tests/test_b38.new.vcf)'" ) == 0 );
system( "rm -f tests/test_b38.new.vcf tests/test_b38_output.pairs.tsv" );
31 changes: 31 additions & 0 deletions tests/test_b37.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
##fileformat=VCFv4.2
##fileDate=20201207
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic Depths of REF and ALT(s) in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TUMOR NORMAL
21 36164589 . A C . . GT:AD:DP 0/1:.,21:54 0/0:.,.:0
21 36164775 . C T . . GT:AD:DP 0/1:778,123:901 0/0:482,0:482
21 36252846 . T C . . GT:AD:DP 0/1:450,54:504 0/0:.,.:0
21 36252852 . A ACCTCTT . . GT:AD:DP 0/1:658,225:883 0/0:343,0:343
21 36252892 . C T . . GT:AD:DP 0/1:788,74:862 0/0:522,1:523
21 36252947 . T TG . . GT:AD:DP 0/1:449,162:611 0/0:527,1:528
21 36259208 . G A . . GT:AD:DP 0/1:34,17:51 0/0:.,.:0
21 37416080 . C G . . GT:AD:DP 0/1:1010,130:1140 0/0:975,0:975
21 39755547 . T C . . GT:AD:DP 0/1:329,23:352 0/0:.,.:0
21 39772519 . G A . . GT:AD:DP 0/1:892,307:1199 0/0:566,1:567
21 39772528 . G T . . GT:AD:DP 0/1:190,153:343 0/0:507,0:507
21 39775581 . C T . . GT:AD:DP 0/1:149,138:287 0/0:.,.:0
21 39947608 . G A . . GT:AD:DP 0/1:92,99:191 0/0:.,.:0
21 39947622 . C T . . GT:AD:DP 0/1:223,96:331 0/0:442,0:449
21 42851109 . GT G . . GT:AD:DP 0/1:435,77:512 0/0:695,0:695
21 42851146 . C T . . GT:AD:DP 0/1:782,373:1224 0/0:996,0:1044
21 42860421 . G T . . GT:AD:DP 0/1:350,30:380 0/0:.,.:0
21 42866477 . T C . . GT:AD:DP 0/1:260,96:356 0/0:781,0:781
21 43505435 . C T . . GT:AD:DP 0/1:193,173:366 0/0:.,.:0
21 44513238 . C T . . GT:AD:DP 0/1:425,29:454 0/0:344,0:344
21 44524485 . A G . . GT:AD:DP 0/1:607,60:667 0/0:.,.:0
21 44838997 . C T . . GT:AD:DP 0/1:.,45:113 0/0:.,.:0
21 45655257 . C A . . GT:AD:DP 0/1:88,13:101 0/0:153,0:153
21 45656786 . C T . . GT:AD:DP 0/1:827,123:950 0/0:629,1:630
21 45656994 . C T . . GT:AD:DP 0/1:745,199:944 0/0:596,0:596
32 changes: 27 additions & 5 deletions tests/test_b38.vcf
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
##fileformat=VCFv4.2
##contig=<ID=21,length=46709983>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=AD,Number=G,Type=Integer,Description="Allelic Depths of REF and ALT(s) in the order listed">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic Depths of REF and ALT(s) in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TUMOR NORMAL
21 43094659 . T TCTCATA . . . GT:AD:DP 0/1:10,10:20 0/0:11,0:11
21 43094660 . C CTCATAC . . . GT:AD:DP 0/1:10,10:20 0/0:11,0:11
21 43094667 . T C . . . GT:AD:DP 0/1:10,10:20 0/0:11,0:11
21 43104346 . G T . . . GT:AD:DP 0/1:10,10:20 0/0:11,0:11
21 34792292 . A C . . . GT:AD:DP 0/1:.,21:54 0/0:.,.:0
21 34792478 . C T . . . GT:AD:DP 0/1:778,123:901 0/0:482,0:482
21 34880549 . T C . . . GT:AD:DP 0/1:450,54:504 0/0:.,.:0
21 34880555 . A ACCTCTT . . . GT:AD:DP 0/1:658,225:883 0/0:343,0:343
21 34880595 . C T . . . GT:AD:DP 0/1:788,74:862 0/0:522,1:523
21 34880650 . T TG . . . GT:AD:DP 0/1:449,162:611 0/0:527,1:528
21 34886911 . G A . . . GT:AD:DP 0/1:34,17:51 0/0:.,.:0
21 36043782 . C G . . . GT:AD:DP 0/1:1010,130:1140 0/0:975,0:975
21 38383625 . T C . . . GT:AD:DP 0/1:329,23:352 0/0:.,.:0
21 38400597 . G A . . . GT:AD:DP 0/1:892,307:1199 0/0:566,1:567
21 38400606 . G T . . . GT:AD:DP 0/1:190,153:343 0/0:507,0:507
21 38403659 . C T . . . GT:AD:DP 0/1:149,138:287 0/0:.,.:0
21 38575684 . G A . . . GT:AD:DP 0/1:92,99:191 0/0:.,.:0
21 38575698 . C T . . . GT:AD:DP 0/1:223,96:331 0/0:442,0:449
21 41479182 . GT G . . . GT:AD:DP 0/1:435,77:512 0/0:695,0:695
21 41479219 . C T . . . GT:AD:DP 0/1:782,373:1224 0/0:996,0:1044
21 41488494 . G T . . . GT:AD:DP 0/1:350,30:380 0/0:.,.:0
21 41494550 . T C . . . GT:AD:DP 0/1:260,96:356 0/0:781,0:781
21 42085325 . C T . . . GT:AD:DP 0/1:193,173:366 0/0:.,.:0
21 43093128 . C T . . . GT:AD:DP 0/1:425,29:454 0/0:344,0:344
21 43104375 . A G . . . GT:AD:DP 0/1:607,60:667 0/0:.,.:0
21 43419117 . C T . . . GT:AD:DP 0/1:.,45:113 0/0:.,.:0
21 44235374 . C A . . . GT:AD:DP 0/1:88,13:101 0/0:153,0:153
21 44236903 . C T . . . GT:AD:DP 0/1:827,123:950 0/0:629,1:630
21 44237111 . C T . . . GT:AD:DP 0/1:745,199:944 0/0:596,0:596
Loading

0 comments on commit ba6bef5

Please sign in to comment.