From 0aa31ed911c1db241aa013f40bac7963e778b9c4 Mon Sep 17 00:00:00 2001 From: Ivan Blagoev Topolsky Date: Sun, 8 Oct 2023 15:17:27 +0200 Subject: [PATCH] Split out common installation from tutorials - installation tutorial with more details about the exact paths used (following user feedback) - installation is saved as artifact and re-used in subsequent tutorials --- .github/workflows/tutorials.yaml | 28 ++++++-- docs/tutorial_0_install.md | 106 +++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 5 deletions(-) create mode 100644 docs/tutorial_0_install.md diff --git a/.github/workflows/tutorials.yaml b/.github/workflows/tutorials.yaml index 60390453..378f0a9e 100644 --- a/.github/workflows/tutorials.yaml +++ b/.github/workflows/tutorials.yaml @@ -9,7 +9,7 @@ concurrency: cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} jobs: - build_n_push: + installer: runs-on: ubuntu-latest defaults: @@ -20,6 +20,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 with: + sparse-checkout: | + docs + tests + sparse-checkout-cone-mode: false lfs: false - name: Install conda environment dependencies @@ -29,7 +33,7 @@ jobs: miniforge-variant: Mambaforge python-version: "3.11" mamba-version: "*" - channels: conda-forge,bioconda #,defaults # see: https://github.com/conda-forge/status/issues/144 + channels: conda-forge,bioconda channel-priority: strict activate-environment: JupyText auto-update-conda: true @@ -39,14 +43,28 @@ jobs: run: | python -m ipykernel install --user - - name: Run notebooks + - name: Run installation notebooks working-directory: ./docs - run: ./convert.sh branch + run: ./convert.sh --branch *_install.md - - uses: actions/upload-artifact@v3 + - name: Create V-pipe installation archive + # HACK this acceleraters upload while preserving Unix-specifics (case sensitivity, file attributes). + run: tar -cvf vpipeinstallation.tar ./docs/vp-analysis + + - name: Keep installation + uses: actions/upload-artifact@v3 + with: + name: VPipeInstallation + path: vpipeinstallation.tar + if-no-files-found: error + + - name: Save notebooks + uses: actions/upload-artifact@v3 with: name: JupyterNotebooks path: ./docs/*.ipynb + + # - name: Publish # uses: peaceiris/actions-gh-pages@v3 # with: diff --git a/docs/tutorial_0_install.md b/docs/tutorial_0_install.md new file mode 100644 index 00000000..eac09eaf --- /dev/null +++ b/docs/tutorial_0_install.md @@ -0,0 +1,106 @@ +--- +jupyter: + jupytext: + cell_metadata_filter: -all + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.14.5 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + + +# V-Pipe Installation + +V-pipe is a workflow designed for the analysis of next generation sequencing (NGS) data from viral pathogens. It produces a number of results in a curated format (e.g., consensus sequences, SNV calls, local/global haplotypes). V-pipe is written using the Snakemake workflow management system. + +The present tutorial will show you how to install V-pipe and the dependencies required to start using it - bioconda, conda-froge mamba and snakemake - before continuing with other tutorials and analyse virus data. + +## Requirements + +V-pipe is optimized for Linux or Mac OS systems, and bioconda isn't supported on Windows. Therefore, we recommend users with a Windows system to [install WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) - this is not a full virtual machine but rather a way to run Windows and Linux cooperatively at the same time. + + +## Organizing Software + +We will organise our software in the following tree structure, which will be reused in all subsequent tutorials: + +```text +πŸ“ [HOME] +β””β”€β”€β”€πŸ“vp-analysis + β”œβ”€β”€β”€πŸ“V-pipe # V-pipe checked out from Github + β”œβ”€β”€β”€πŸ“Miniforge3 # bioconda + conda-forge + mamba + Snakemake + β”œβ”€β”€β”€πŸ“work # work directories + β”œβ”€β”€β”€πŸ“work-tests # … + β””β”€β”€β”€πŸ“ … # … +``` + +- `vp-analysis` is the main directory where we will store everything. +- `Miniforge3` is the directory where conda will be installed including the dependencies to start using V-pipe. +- `V-pipe` is the directory where V-pipe's own code will be downloaded from GitHub +- finally, each analysis of virus data will be performed into directory like `work…`, which holds the configuration and the sequencing data for that particular analysis. + + +## Install V-pipe and conda from scratch + +V-pipe uses the [Bioconda](https://bioconda.github.io/) bioinformatics software repository for all its pipeline components. The pipeline itself is implemented using [Snakemake](https://snakemake.readthedocs.io/en/stable/). + +For advanced users: If your are fluent with these tools, see [below](#fluent-users) + +In this present short tutorial you will learn how to setup a workflow for the various examples in the analysis tutorials. + +To deploy V-pipe, you can use the installation script with the following parameters: + +```bash +curl -O 'https://raw.githubusercontent.com/cbg-ethz/V-pipe/master/utils/quick_install.sh' +bash quick_install.sh -p vp-analysis -w work +``` + +> **Note**: +> * using `-p` specifies the subdirectory where to download and install snakemake and V-pipe +> * using `-w` will create a working directory and populate it. It will colloquial the references and the default `config/config.yaml`, and create a handy `vpipe` short-cut script to invoke `snakemake`. +> * an additional option `-b` (not demonstrated above) allows to install a spefic branch or tagged version. If nothing is specified, the master branch will be installed. + +If you get `zsh: permission denied: ./quick_install.sh`, run `chmod +x quick_install.sh` this gives the necessary permissions. + + +**Tip:** To create and populate other new working directories, you can call init_project.sh from within the new directory: + +```bash +cd vp-analysis/ + +mkdir -p working_2 +cd working_2 +../V-pipe/init_project.sh + +# now edit config.yaml, samples.tsv, run your analysis etc. + +cd - +``` + +### Analyse data + +Now that you have setup the software necessary to start using V-pipe, you can follow with one of the tutorials showing you analysis of viral sequencing data: + +- [tutorial_hiv.md](tutorial_hiv.md): uses HIV test data +- [tutorial_sarscov2.md](tutorial_sarscov2.md): uses SARS-CoV-2 data from a publication + + +## Fluent users + +For advanced users: If your are fluent with these tools, you can: + +* directly download and install [bioconda](https://bioconda.github.io/user/install.html) and [snakemake](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html#installation-via-conda), +* specifiy your V-pipe configuration, and start using V-pipe + +Use `--use-conda` to [automatically download and install](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#integrated-package-management) any further pipeline dependencies. Please refer to the documentation for additional instructions. + + +### Reusing an existing conda installation + +[TO BE CONTINUED]