Analysis and plotting of demon data
To plot charts of variant allele frequencies and genotype sizes use plot_all_charts
. For normal tissue, you should specify a value for max_allele_count
so that the axes are appropriately scaled.
To plot Muller plots and grids use plot_all_images
.
To pull the latest github version into a cluster folder (using terminal):
cd XXX # replace XXX with the path to the cluster folder
git pull https://github.com/robjohnnoble/demonanalysis
To install that version on a cluster profile (using R):
library(devtools)
install("XXX/demonanalysis") # replace XXX with the path to the cluster folder
library(demonanalysis)
subfolder_name <- "XXX" # insert name of top-level folder
input_dir <- paste0("all_results/", subfolder_name) # folder containing results of the batch
num_parameters <- count_parameters(input_dir) # number of simulation parameters (first columns in data)
output_dir_plots <- paste0("plots/", subfolder_name) # folder to receive image files
output_dir_data <- paste0("data/", subfolder_name) # folder containing data files
all_statuses(input_dir, summary = TRUE) # should be "Exit code 0" (when finished) or "So far no status" (while running)
For growing tumours, to plot charts of variant allele frequencies and genotype sizes:
create_plots_batch(input_dir, output_dir = output_dir_plots, type = "chart")
For normal tissue, to plot charts of variant allele frequencies and genotype sizes:
create_plots_batch(input_dir, output_dir = output_dir_plots, type = "chart", max_size = 50)
To plot Muller plots and grids:
create_plots_batch(input_dir, output_dir = output_dir_plots, type = "plot")
data <- all_output(input_dir, include_diversities = FALSE) # combined data for a batch of simulations, excluding diversity columns
plot_curves_faceted(data, num_parameters, x_var = "Generation", y_var = "MeanBirthRate", output_filename = "curves", output_dir = output_dir_plots)
# change x_var, y_var and output_filename as appropriate
data <- all_output(input_dir) # combined data for a batch of simulations, including diversity columns
data <- add_relative_time(data, start_size = 5500, num_parameters = num_parameters) # add columns useful for plotting trajectories
start_size_range <- 500 * 2^(0:8) # NumCells at time of initial measurement for forecasting
gap_range <- (1:10)/10 # gap between time of initial measurement and second measurement
final_size <- 1E5 # waiting time is measured until tumour reaches this NumCells value
summary <- get_summary(data, start_size_range, gap_range, final_size, num_parameters = num_parameters) # summary data for each simulation, for each combination of gap and final_size
cor_summary <- get_cor_summary(summary, c("DriverDiversity", "DriverEdgeDiversity"), num_parameters = num_parameters, min_count = 5) # summary dataframe of correlations with "outcome", including all cells
wait_cor_summary <- get_wait_cor_summary(summary, c("DriverDiversity", "DriverEdgeDiversity"), num_parameters = num_parameters, min_count = 5) # summary dataframe of correlations with "waiting_time", including all cells
depth_wait_cor_summary <- get_wait_cor_summary(summary, c(paste0("DriverDiversityFrom1SamplesAtDepth", 0:10), paste0("DriverDiversityFrom4SamplesAtDepth", 0:10)), num_parameters, min_count = 5) # summary dataframe of correlations with "waiting_time" for different biopsy protocols