-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathanalysis-pipeline.sh
89 lines (70 loc) · 2.85 KB
/
analysis-pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
#
# BioBombe Analysis Pipeline
# Gregory Way, 2018
#
# This file will perform the entire BioBombe analysis pipeline. Modules 0, 1, 2, and 3
# are not performed in this script. Module 0 downloads and processes raw gene expression
# data from GTEX, TCGA, and TARGET. Module 1 performs a parameter sweep over the neural
# network models (Denoising Autoencoder and Variational Autoencoder). Module 2 actually
# performs the serial compression with increasing bottleneck layers. Module 3 builds
# the real and permuted networks used to interpret the compressed features.
#
# The first step in this pipeline is to download the precomputed data output from
# Module 2
conda activate biobombe
##############################
# Step 1: Get Data (Module 2)
##############################
cd 2.sequential-compression
python download-biobombe-archive.py
##############################
# Step 2: Analyze Components (Module 4)
##############################
cd ../4.analyze-components
# Visualize sample reconstruction and correlation between input and reconstructed output
bash components-analysis.sh
##############################
# Step 3: Analyze Stability (Module 5)
##############################
cd ../5.analyze-stability
# Apply SVCCA to weight matrices and compare algorithm stability across dimensions
bash stability_analysis.sh
##############################
# Step 4: Analyze Weights (Module 6)
##############################
cd ../6.biobombe-projection
# Project the networks onto the compressed weight matrices to derive biological insight
python interpret-compression.py
##############################
# Step 5: Analyze Coverage of Weight Matrices (Module 7)
##############################
cd ../7.analyze-coverage
# Project the networks onto the compressed weight matrices to derive biological insight
bash run_coverage_analysis.sh
##############################
# Step 6: Perform GTEx module interpretation of increased blood correlation
##############################
cd ../8.gtex-interpret
# Determine the difference between VAE models k = 2 and k = 3 and apply features to
# external datasets
bash gtex_analysis.sh
##############################
# Step 7: Perform the TCGA classification analysis predicting cancer type and mutations
##############################
cd ../9.tcga-classify
# Train several models and visualize results
bash classify_analysis.sh
##############################
# Step 8: Perform signature analysis to detect sex and MYCN amplification
##############################
cd ../10.gene-expression-signatures
# Train several models and visualize results
bash signature_analysis.sh
##############################
# Step 9: Perform a simulation experiment to determine if feature number is associated
# with feature importance
##############################
cd ../11.simulation-feature-number
# Train several models and visualize results
bash simulation_analysis.sh