diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ddc80d9..b7e07eca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,9 +43,9 @@ jobs: - name: Run RAPID pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -entry RAPID -profile test,docker --outdir ./results-rapid + nextflow run ${GITHUB_WORKSPACE} -entry RAPID -profile test_github,docker --outdir ./results-rapid - name: Run FULL pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results-full + nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./results-full diff --git a/CITATIONS.md b/CITATIONS.md index 8cfb197f..d179e501 100755 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -98,6 +98,14 @@ > Wright, C. et al. 2023. Chromosome evolution in Lepidoptera. bioRxiv. 540473. https://doi.org/10.1101/2023.05.12.540473 +- [Java](https://docs.oracle.com/javase/8/docs/api/overview-summary.html) + + > Oracle. 2023. Java Documentation. https://docs.oracle.com/javase/8/docs/index.html. (Accessed on 25th September 2023). + +- [coreutils](https://github.com/coreutils/coreutils) + + > GNU Coreutils. 2023. coreutils [online]. https://github.com/coreutils/coreutils/releases/tag/v9.4. (Accessed on 25th September 2023). + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/assets/local_testing/nxOscSUBSET.yaml b/assets/local_testing/nxOscSUBSET.yaml index 6284a4ce..7f7fab4c 100755 --- a/assets/local_testing/nxOscSUBSET.yaml +++ b/assets/local_testing/nxOscSUBSET.yaml @@ -12,7 +12,7 @@ assem_reads: hic: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/subset/ supplementary: path alignment: - data_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_SUBSET/gene_set/ + data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/ common_name: "" # For future implementation (adding bee, wasp, ant etc) geneset: "Gae_host.Gae" #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv" diff --git a/conf/modules.config b/conf/modules.config index f64c6f75..4a34280d 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,7 +11,6 @@ */ process { - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, diff --git a/conf/test.config b/conf/test.config index 9966e48d..ebe8c6bc 100755 --- a/conf/test.config +++ b/conf/test.config @@ -14,15 +14,10 @@ */ params { - config_profile_name = 'GitHub FULL test' - config_profile_description = 'FULL Test Data for GitHub Actions test' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + config_profile_name = "Test profile" + config_profile_description = "Minimal test dataset to check pipeline function" // Input data - input = "${projectDir}/assets/github_testing/TreeValTinyTest.yaml" + input = "${projectDir}/assets/local_testing/nxOscSUBSET.yaml" outdir = "TinyTest" } diff --git a/conf/test_full.config b/conf/test_full.config index deb9aac9..8356bbbe 100755 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,9 +16,10 @@ cleanup = true params { - config_profile_name = "FULL local test profile" - config_profile_description = "FULL test dataset to check pipeline function, using a current full local dataset" + config_profile_name = "FULL local test profile" + config_profile_description = "FULL test dataset to check pipeline function, using a current full local dataset" - input = "${projectDir}/assets/local_testing/nxOscDF5033.yaml" - outdir = "SmallTest" + // Input data + input = "${projectDir}/assets/local_testing/nxOscDF5033.yaml" + outdir = "SmallTest" } diff --git a/conf/test_github.config b/conf/test_github.config new file mode 100755 index 00000000..36e4791b --- /dev/null +++ b/conf/test_github.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + First download the test databases and edit the yaml file to match the download path. + + Use as follows: + nextflow run sanger-tol/treeval -profile test_github,singularity + + On LSF / tol farm: + bsub -Is -tty -e error -o out -n 2 -q oversubscribed -M4000 -R'select[mem>4000] rusage[mem=4000] span[hosts=1]' 'nextflow run main.nf -profile test,singularity,sanger' + +---------------------------------------------------------------------------------------- +*/ + +process { + maxForks = 1 +} + +params { + config_profile_name = "GitHub Test profile" + config_profile_description = "Minimal test dataset to check pipeline function on GitHub" + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/assets/github_testing/TreeValTinyTest.yaml" + outdir = "TinyTest" +} diff --git a/docs/usage.md b/docs/usage.md index 42965d4b..5c0e969d 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,7 +16,31 @@ The TreeVal pipeline has a few requirements before being able to run: ## Prior to running TreeVal -:warning: Please ensure you read the following sections on Directory Strucutre (`gene_alignment_data`, `synteny`, scripts), HiC data prep and Pacbio data prep. Without these you may not be able to successfully run the TreeVal pipeline. If nothing is clear then leave an issue report. +:warning: Please ensure you read the following sections on Directory Structure (`gene_alignment_data`, `synteny`, scripts), HiC data prep and Pacbio data prep. Without these you may not be able to successfully run the TreeVal pipeline. If nothing is clear then leave an issue report. + +### Local testing + +
+ Details + +We provide a complete set of test databases that can be used to test the pipeline locally. + +First, choose a download location `${TREEVAL_TEST_DATA}` and run this command: + +``` +cd ${TREEVAL_TEST_DATA} +curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - +sed -i "s|/home/runner/work/treeval/treeval|${TREEVAL_TEST_DATA}|" TreeValTinyData/gene_alignment_data/fungi/csv_data/LaetiporusSulphureus.gfLaeSulp1-data.csv +``` + +Then, modify the configuration file to point at that download location and off you go: + +``` +sed -i "s|/home/runner/work/treeval/treeval|${TREEVAL_TEST_DATA}|" assets/github_testing/TreeValTinyTest.yaml +nextflow run . -profile test_github,singularity +``` + +
### Directory Structure diff --git a/nextflow.config b/nextflow.config index 924fcff7..d8756676 100755 --- a/nextflow.config +++ b/nextflow.config @@ -153,7 +153,8 @@ profiles { } test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test_github { includeConfig 'conf/test_github.config' } + test_full { includeConfig 'conf/test_full.config' } }