From 271ee00fa4916a4f345fa92c5516d6662f2a7d2d Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Sep 2023 09:16:57 +0000 Subject: [PATCH 1/7] Fixed the data path --- assets/local_testing/nxOscSUBSET.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/local_testing/nxOscSUBSET.yaml b/assets/local_testing/nxOscSUBSET.yaml index 6284a4ce..7f7fab4c 100755 --- a/assets/local_testing/nxOscSUBSET.yaml +++ b/assets/local_testing/nxOscSUBSET.yaml @@ -12,7 +12,7 @@ assem_reads: hic: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/subset/ supplementary: path alignment: - data_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_SUBSET/gene_set/ + data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/ common_name: "" # For future implementation (adding bee, wasp, ant etc) geneset: "Gae_host.Gae" #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv" From 27d2e751b038064596dbd04a4dbc0bcabbc0fdf1 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Sep 2023 10:03:02 +0000 Subject: [PATCH 2/7] Reintroduced the github_test profile, and made the test profile work on the farm --- .github/workflows/ci.yml | 4 ++-- conf/test.config | 11 +++-------- conf/test_full.config | 9 +++++---- conf/test_github.config | 30 ++++++++++++++++++++++++++++++ nextflow.config | 3 ++- 5 files changed, 42 insertions(+), 15 deletions(-) create mode 100755 conf/test_github.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ddc80d9..b7e07eca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,9 +43,9 @@ jobs: - name: Run RAPID pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -entry RAPID -profile test,docker --outdir ./results-rapid + nextflow run ${GITHUB_WORKSPACE} -entry RAPID -profile test_github,docker --outdir ./results-rapid - name: Run FULL pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results-full + nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./results-full diff --git a/conf/test.config b/conf/test.config index 9966e48d..ebe8c6bc 100755 --- a/conf/test.config +++ b/conf/test.config @@ -14,15 +14,10 @@ */ params { - config_profile_name = 'GitHub FULL test' - config_profile_description = 'FULL Test Data for GitHub Actions test' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + config_profile_name = "Test profile" + config_profile_description = "Minimal test dataset to check pipeline function" // Input data - input = "${projectDir}/assets/github_testing/TreeValTinyTest.yaml" + input = "${projectDir}/assets/local_testing/nxOscSUBSET.yaml" outdir = "TinyTest" } diff --git a/conf/test_full.config b/conf/test_full.config index deb9aac9..8356bbbe 100755 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,9 +16,10 @@ cleanup = true params { - config_profile_name = "FULL local test profile" - config_profile_description = "FULL test dataset to check pipeline function, using a current full local dataset" + config_profile_name = "FULL local test profile" + config_profile_description = "FULL test dataset to check pipeline function, using a current full local dataset" - input = "${projectDir}/assets/local_testing/nxOscDF5033.yaml" - outdir = "SmallTest" + // Input data + input = "${projectDir}/assets/local_testing/nxOscDF5033.yaml" + outdir = "SmallTest" } diff --git a/conf/test_github.config b/conf/test_github.config new file mode 100755 index 00000000..52f1741b --- /dev/null +++ b/conf/test_github.config @@ -0,0 +1,30 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + First download the test databases and edit the yaml file to match the download path. + + Use as follows: + nextflow run sanger-tol/treeval -profile test_github,singularity + + On LSF / tol farm: + bsub -Is -tty -e error -o out -n 2 -q oversubscribed -M4000 -R'select[mem>4000] rusage[mem=4000] span[hosts=1]' 'nextflow run main.nf -profile test,singularity,sanger' + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = "GitHub Test profile" + config_profile_description = "Minimal test dataset to check pipeline function on GitHub" + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/assets/github_testing/TreeValTinyTest.yaml" + outdir = "TinyTest" +} diff --git a/nextflow.config b/nextflow.config index 924fcff7..d8756676 100755 --- a/nextflow.config +++ b/nextflow.config @@ -153,7 +153,8 @@ profiles { } test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test_github { includeConfig 'conf/test_github.config' } + test_full { includeConfig 'conf/test_full.config' } } From ceddbbaf90d48544aeea647ad1c173b291c85e7f Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Sep 2023 10:09:32 +0000 Subject: [PATCH 3/7] typo --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 42965d4b..b85d9b9f 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,7 +16,7 @@ The TreeVal pipeline has a few requirements before being able to run: ## Prior to running TreeVal -:warning: Please ensure you read the following sections on Directory Strucutre (`gene_alignment_data`, `synteny`, scripts), HiC data prep and Pacbio data prep. Without these you may not be able to successfully run the TreeVal pipeline. If nothing is clear then leave an issue report. +:warning: Please ensure you read the following sections on Directory Structure (`gene_alignment_data`, `synteny`, scripts), HiC data prep and Pacbio data prep. Without these you may not be able to successfully run the TreeVal pipeline. If nothing is clear then leave an issue report. ### Directory Structure From f434f337de76b8888fe5e02f36eeac8ccada0596 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Sep 2023 10:09:45 +0000 Subject: [PATCH 4/7] Added instructions for people to run the test --- docs/usage.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index b85d9b9f..60306b84 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,6 +18,34 @@ The TreeVal pipeline has a few requirements before being able to run: :warning: Please ensure you read the following sections on Directory Structure (`gene_alignment_data`, `synteny`, scripts), HiC data prep and Pacbio data prep. Without these you may not be able to successfully run the TreeVal pipeline. If nothing is clear then leave an issue report. +### Local testing + +
+ Details + +We provide a complete set of test databases that can be used to test the pipeline locally. + +First, choose a download location `${TREEVAL_TEST_DATA}` and run this command: + +``` +cd ${TREEVAL_TEST_DATA} +curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - +``` + +Then, modify the configuration file to point at that download location: + +``` +sed -i "s|/home/runner/work/treeval/treeval|${TREEVAL_TEST_DATA}|" assets/github_testing/TreeValTinyTest.yaml +``` + +And off you go: + +``` +nextflow run . -profile test_github,singularity +``` + +
+ ### Directory Structure
From 7b9830572a898892a2b3adee8fbf80ee6ca9fc5a Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Sep 2023 10:10:01 +0000 Subject: [PATCH 5/7] Added some missing citations, as realised when reviewing curationpretext --- CITATIONS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index 8cfb197f..d179e501 100755 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -98,6 +98,14 @@ > Wright, C. et al. 2023. Chromosome evolution in Lepidoptera. bioRxiv. 540473. https://doi.org/10.1101/2023.05.12.540473 +- [Java](https://docs.oracle.com/javase/8/docs/api/overview-summary.html) + + > Oracle. 2023. Java Documentation. https://docs.oracle.com/javase/8/docs/index.html. (Accessed on 25th September 2023). + +- [coreutils](https://github.com/coreutils/coreutils) + + > GNU Coreutils. 2023. coreutils [online]. https://github.com/coreutils/coreutils/releases/tag/v9.4. (Accessed on 25th September 2023). + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) From 702cf96e8235150a1fb4fbae1c40182eb72c4228 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 27 Sep 2023 05:45:50 +0000 Subject: [PATCH 6/7] Updated the test instructions, as the GitHub path is hardcoded in the test data too --- docs/usage.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 60306b84..5c0e969d 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -30,17 +30,13 @@ First, choose a download location `${TREEVAL_TEST_DATA}` and run this command: ``` cd ${TREEVAL_TEST_DATA} curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - +sed -i "s|/home/runner/work/treeval/treeval|${TREEVAL_TEST_DATA}|" TreeValTinyData/gene_alignment_data/fungi/csv_data/LaetiporusSulphureus.gfLaeSulp1-data.csv ``` -Then, modify the configuration file to point at that download location: +Then, modify the configuration file to point at that download location and off you go: ``` sed -i "s|/home/runner/work/treeval/treeval|${TREEVAL_TEST_DATA}|" assets/github_testing/TreeValTinyTest.yaml -``` - -And off you go: - -``` nextflow run . -profile test_github,singularity ``` From 0092471a3b422a6d9cf1ff13dd603cddd402d604 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 27 Sep 2023 10:37:27 +0100 Subject: [PATCH 7/7] Updating for more control of CI jobs --- conf/modules.config | 1 - conf/test_github.config | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index f64c6f75..4a34280d 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,7 +11,6 @@ */ process { - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, diff --git a/conf/test_github.config b/conf/test_github.config index 52f1741b..36e4791b 100755 --- a/conf/test_github.config +++ b/conf/test_github.config @@ -15,6 +15,10 @@ ---------------------------------------------------------------------------------------- */ +process { + maxForks = 1 +} + params { config_profile_name = "GitHub Test profile" config_profile_description = "Minimal test dataset to check pipeline function on GitHub"