diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index c9feac8..70fda3d 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -8,11 +8,14 @@ workdir: workflow.current_basedir # Use default configuration values. Override with Snakemake's --configfile/--config options. configfile: "defaults/config.yaml" -SEGMENTS = ["l", "s"] +segments = ["L", "S"] + +wildcard_constraints: + segment = "|".join(segments) rule all: input: - auspice_tree = expand("auspice/lassa_{segment}.json", segment=SEGMENTS) + auspice_tree = expand("auspice/lassa_{segment}.json", segment=segments) include: "rules/prepare_sequences.smk" include: "rules/construct_phylogeny.smk" diff --git a/phylogenetic/build-configs/ci/copy_example_data.smk b/phylogenetic/build-configs/ci/copy_example_data.smk index 88f517f..0fae62b 100644 --- a/phylogenetic/build-configs/ci/copy_example_data.smk +++ b/phylogenetic/build-configs/ci/copy_example_data.smk @@ -3,8 +3,8 @@ rule copy_example_data: sequences="example_data/sequences_{segment}.fasta", metadata="example_data/metadata_{segment}.tsv", output: - sequences="data/sequences_{segment}.fasta", - metadata="data/metadata_{segment}.tsv", + sequences="data/{segment}/sequences.fasta", + metadata="data/{segment}/metadata.tsv", shell: """ cp -f {input.sequences} {output.sequences} diff --git a/phylogenetic/defaults/lassa_l.gb b/phylogenetic/defaults/lassa_L.gb similarity index 100% rename from phylogenetic/defaults/lassa_l.gb rename to phylogenetic/defaults/lassa_L.gb diff --git a/phylogenetic/defaults/lassa_s.gb b/phylogenetic/defaults/lassa_S.gb similarity index 100% rename from phylogenetic/defaults/lassa_s.gb rename to phylogenetic/defaults/lassa_S.gb diff --git a/phylogenetic/example_data/metadata_l.tsv b/phylogenetic/example_data/metadata_L.tsv similarity index 100% rename from phylogenetic/example_data/metadata_l.tsv rename to phylogenetic/example_data/metadata_L.tsv diff --git a/phylogenetic/example_data/metadata_s.tsv b/phylogenetic/example_data/metadata_S.tsv similarity index 100% rename from phylogenetic/example_data/metadata_s.tsv rename to phylogenetic/example_data/metadata_S.tsv diff --git a/phylogenetic/example_data/sequences_l.fasta b/phylogenetic/example_data/sequences_L.fasta similarity index 100% rename from phylogenetic/example_data/sequences_l.fasta rename to phylogenetic/example_data/sequences_L.fasta diff --git a/phylogenetic/example_data/sequences_s.fasta b/phylogenetic/example_data/sequences_S.fasta similarity index 100% rename from phylogenetic/example_data/sequences_s.fasta rename to phylogenetic/example_data/sequences_S.fasta diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 9951de6..f7e3929 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -34,10 +34,10 @@ to the ones produced by Augur commands. rule ancestral: """Reconstructing ancestral sequences and mutations""" input: - tree = "results/tree_{segment}.nwk", - alignment = "results/aligned_{segment}.fasta", + tree = "results/{segment}/tree.nwk", + alignment = "results/{segment}/aligned.fasta", output: - node_data = "results/nt_muts_{segment}.json" + node_data = "results/{segment}/nt_muts.json" params: inference = "joint" shell: @@ -52,11 +52,11 @@ rule ancestral: rule translate: """Translating amino acid sequences""" input: - tree = "results/tree_{segment}.nwk", - node_data = "results/nt_muts_{segment}.json", + tree = "results/{segment}/tree.nwk", + node_data = "results/{segment}/nt_muts.json", reference = "defaults/lassa_{segment}.gb" output: - node_data = "results/aa_muts_{segment}.json" + node_data = "results/{segment}/aa_muts.json" shell: """ augur translate \ @@ -69,10 +69,10 @@ rule translate: rule traits: """Inferring ancestral traits for {params.columns!s}""" input: - tree = "results/tree_{segment}.nwk", - metadata = "data/metadata_{segment}.tsv", + tree = "results/{segment}/tree.nwk", + metadata = "data/{segment}/metadata.tsv", output: - node_data = "results/traits_{segment}.json", + node_data = "results/{segment}/traits.json", params: strain_id_field = config["strain_id_field"], columns = config['traits']['columns'] diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index 89fa9e0..010a50c 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -22,9 +22,9 @@ See Augur's usage docs for these commands for more details. rule tree: """Building tree""" input: - alignment = "results/aligned_{segment}.fasta" + alignment = "results/{segment}/aligned.fasta" output: - tree = "results/tree_raw_{segment}.nwk" + tree = "results/{segment}/tree_raw.nwk" params: method = "iqtree" shell: @@ -44,12 +44,12 @@ rule refine: - fix clock rate at {params.clock_rate} """ input: - tree = "results/tree_raw_{segment}.nwk", - alignment = "results/aligned_{segment}.fasta", - metadata = "data/metadata_{segment}.tsv", + tree = "results/{segment}/tree_raw.nwk", + alignment = "results/{segment}/aligned.fasta", + metadata = "data/{segment}/metadata.tsv", output: - tree = "results/tree_{segment}.nwk", - node_data = "results/branch_lengths_{segment}.json" + tree = "results/{segment}/tree.nwk", + node_data = "results/{segment}/branch_lengths.json" params: strain_id_field = config["strain_id_field"], coalescent = config['refine']['coalescent'], diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index a831358..ba5ac9b 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -29,9 +29,9 @@ rule colors: input: color_schemes = "defaults/color_schemes.tsv", color_orderings = "defaults/color_orderings.tsv", - metadata = "data/metadata_{segment}.tsv", + metadata = "data/{segment}/metadata.tsv", output: - colors = "results/colors_{segment}.tsv" + colors = "results/{segment}/colors.tsv" shell: """ python3 scripts/assign-colors.py \ @@ -44,17 +44,17 @@ rule colors: rule export: """Exporting data files for for auspice""" input: - tree = "results/tree_{segment}.nwk", - metadata = "data/metadata_{segment}.tsv", - branch_lengths = "results/branch_lengths_{segment}.json", - traits = "results/traits_{segment}.json", - nt_muts = "results/nt_muts_{segment}.json", - aa_muts = "results/aa_muts_{segment}.json", - colors = "results/colors_{segment}.tsv", + tree = "results/{segment}/tree.nwk", + metadata = "data/{segment}/metadata.tsv", + branch_lengths = "results/{segment}/branch_lengths.json", + traits = "results/{segment}/traits.json", + nt_muts = "results/{segment}/nt_muts.json", + aa_muts = "results/{segment}/aa_muts.json", + colors = "results/{segment}/colors.tsv", description = config['export']['description'], auspice_config = config['export']['auspice_config'], output: - auspice = "results/lassa_{segment}.json", + auspice = "results/{segment}/lassa.json", params: strain_id_field = config["strain_id_field"], shell: @@ -73,8 +73,8 @@ rule export: rule final_strain_name: input: - auspice_json="results/lassa_{segment}.json", - metadata="data/metadata_{segment}.tsv", + auspice_json="results/{segment}/lassa.json", + metadata="data/{segment}/metadata.tsv", output: auspice_json="auspice/lassa_{segment}.json", params: diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index f5253d9..da85928 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -24,8 +24,8 @@ See Augur's usage docs for these commands for more details. rule download: """Downloading sequences and metadata from data.nextstrain.org""" output: - sequences = "data/sequences_{segment}.fasta.zst", - metadata = "data/metadata_{segment}.tsv.zst" + sequences = "data/{segment}/sequences.fasta.zst", + metadata = "data/{segment}/metadata.tsv.zst" params: sequences_url = config["sequences_url"], metadata_url = config["metadata_url"], @@ -38,11 +38,11 @@ rule download: rule decompress: """Decompressing sequences and metadata""" input: - sequences = "data/sequences_{segment}.fasta.zst", - metadata = "data/metadata_{segment}.tsv.zst" + sequences = "data/{segment}/sequences.fasta.zst", + metadata = "data/{segment}/metadata.tsv.zst" output: - sequences = "data/sequences_{segment}.fasta", - metadata = "data/metadata_{segment}.tsv" + sequences = "data/{segment}/sequences.fasta", + metadata = "data/{segment}/metadata.tsv" shell: """ zstd -d -c {input.sequences} > {output.sequences} @@ -56,11 +56,11 @@ rule filter: - excluding strains in {input.exclude} """ input: - sequences = "data/sequences_{segment}.fasta", - metadata = "data/metadata_{segment}.tsv", + sequences = "data/{segment}/sequences.fasta", + metadata = "data/{segment}/metadata.tsv", exclude = config['filter']['exclude'] output: - sequences = "results/filtered_{segment}.fasta" + sequences = "results/{segment}/filtered.fasta" params: strain_id_field = config["strain_id_field"], group_by = config['filter']['group_by'], @@ -83,10 +83,10 @@ rule align: - filling gaps with N """ input: - sequences = "results/filtered_{segment}.fasta", + sequences = "results/{segment}/filtered.fasta", reference = "defaults/lassa_{segment}.gb" output: - alignment = "results/aligned_{segment}.fasta" + alignment = "results/{segment}/aligned.fasta" shell: """ augur align \