Skip to content

Commit

Permalink
added 'seed' parameter to simulation method
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander Dietrich committed Nov 21, 2023
1 parent b32f811 commit 5a84693
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 8 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: SimBu
Title: Simulate Bulk RNA-seq Datasets from Single-Cell Datasets
Version: 1.5.2
Version: 1.5.3
Authors@R:
person(given = "Alexander",
family = "Dietrich",
Expand Down
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Changes in version 1.5.2 (2023-11-21)
+ Added 'seed' parameter to simulation

Changes in version 1.5.2 (2023-11-08)
+ Updated SimBu to be compatibel with Seurat v5

Expand Down
23 changes: 17 additions & 6 deletions R/simulator.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ NULL
#' @param remove_bias_in_counts boolean; if TRUE (default) the internal mRNA bias that is present in count data will be *removed* using the number of reads mapped to each cell
#' @param remove_bias_in_counts_method 'read-number' (default) or 'gene-number'; method with which the mRNA bias in counts will be removed
#' @param norm_counts boolean; if TRUE the samples simulated with counts will be normalized to CPMs, default is FALSE
#' @param seed numeric; fix this value if you want the same cells to be sampled
#'
#' @return returns two vectors (one based on counts, one based on tpm; depends on which matrices are present in data) with expression values for all genes in the provided dataset
#'
Expand All @@ -49,7 +50,8 @@ simulate_sample <- function(data,
total_read_counts,
remove_bias_in_counts,
remove_bias_in_counts_method,
norm_counts) {
norm_counts,
seed) {
if (!all(names(simulation_vector) %in% unique(SummarizedExperiment::colData(data)[["cell_type"]]))) {
stop("Some cell-types in the provided simulation vector are not in the annotation.")
}
Expand All @@ -72,6 +74,10 @@ simulate_sample <- function(data,
if (n == 0) {
n <- 1
}
if(!is.na(seed)){
# fix seed for random selection of cells
set.seed(seed)
}
cells <- dplyr::slice_sample(cells_of_type_x, n = n, replace = TRUE)
cells <- cells[["cell_ID"]]
}
Expand Down Expand Up @@ -169,6 +175,7 @@ simulate_sample <- function(data,
#' @param total_read_counts numeric; sets the total read count value for each sample
#' @param whitelist list; give a list of cell-types you want to keep for the simulation; if NULL, all are used
#' @param blacklist list; give a list of cell-types you want to remove for the simulation; if NULL, all are used; is applied after whitelist
#' @param seed numeric; specifiy a seed for RNG. This effects cell sampling; with a fixed seed you will always sample the same cells for each sample (seed value is incrased by 1 for each sample). Default = NA (two simulation runs will sample different cells).
#' @param BPPARAM BiocParallel::bpparam() by default; if specific number of threads x want to be used, insert: BiocParallel::MulticoreParam(workers = x)
#' @param run_parallel boolean, decide if multi-threaded calculation will be run. FALSE by default
#'
Expand Down Expand Up @@ -270,6 +277,7 @@ simulate_bulk <- function(data,
total_read_counts = NULL,
whitelist = NULL,
blacklist = NULL,
seed = NA,
BPPARAM = BiocParallel::bpparam(),
run_parallel = FALSE) {
# switch multi-threading on/off
Expand Down Expand Up @@ -428,19 +436,22 @@ simulate_bulk <- function(data,
##### generate the samples #####

# sample cells and generate pseudo-bulk profiles
all_samples <- BiocParallel::bplapply(simulation_vector_list, function(x) {
samples <- simulate_sample(
all_samples <- BiocParallel::bplapply(seq_along(simulation_vector_list), function(i) {

simulation_vector <- simulation_vector_list[[i]]
sample <- simulate_sample(
data = data,
scaling_vector = scaling_vector,
simulation_vector = x,
simulation_vector = simulation_vector,
total_cells = ncells,
total_read_counts = total_read_counts,
remove_bias_in_counts = remove_bias_in_counts,
remove_bias_in_counts_method = remove_bias_in_counts_method,
norm_counts = norm_counts
norm_counts = norm_counts,
seed = ifelse(is.na(seed), seed, seed + i) # ensure that samples still are different if seed is set
)

return(samples)
return(sample)
}, BPPARAM = BPPARAM)

bulk_counts <- Matrix::Matrix(vapply(
Expand Down
3 changes: 3 additions & 0 deletions man/simulate_bulk.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/simulate_sample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions tests/testthat/test_simulator.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,25 @@ test_that("can generate different simulation scenarios & check multi threads", {
testthat::expect_s4_class(SimBu::simulate_bulk(data = dataset, scenario = "weighted", weighted_cell_type = "T cells CD4", weighted_amount = .5, scaling_factor = "NONE", nsamples = 10, ncells = 100, run_parallel = FALSE)[["bulk"]], "SummarizedExperiment")

testthat::expect_s4_class(SimBu::simulate_bulk(data = dataset, scenario = "even", scaling_factor = "NONE", nsamples = 10, ncells = 100, BPPARAM = BiocParallel::MulticoreParam(workers = 2), run_parallel = FALSE)[["bulk"]], "SummarizedExperiment")
testthat::expect_s4_class(SimBu::simulate_bulk(data = dataset, scenario = "even", scaling_factor = "NONE", nsamples = 10, ncells = 100, total_read_counts = 100000, BPPARAM = BiocParallel::MulticoreParam(workers = 2), run_parallel = FALSE)[["bulk"]], "SummarizedExperiment")
})

test_that("test RNG", {
# use even scenario with no variance between samples
# with fixed seed, both simulations have exactly the same count values
seed <- 123
sim1 <- SimBu::simulate_bulk(data = dataset, scenario = "even", scaling_factor = "NONE", balance_even_mirror_scenario = 0, nsamples = 10, ncells = 100, run_parallel = FALSE, seed = seed)
sim2 <- SimBu::simulate_bulk(data = dataset, scenario = "even", scaling_factor = "NONE", balance_even_mirror_scenario = 0, nsamples = 10, ncells = 100, run_parallel = FALSE, seed = seed)
x1 <- rowSums(assays(sim1$bulk)[['bulk_counts']])
x2 <- rowSums(assays(sim2$bulk)[['bulk_counts']])
testthat::expect_equal(x1,x2)

# test that samples inside one simulation still are different
sample1 <- sim1$bulk[,1]
sample2 <- sim1$bulk[,2]
testthat::expect_false(all(assays(sample1)[['bulk_counts']] == assays(sample2)[['bulk_counts']]))

})

test_that("test different scaling factor calculations + mRNA bias removal from counts", {
testthat::expect_s4_class(SimBu::simulate_bulk(data = dataset, scenario = "random", scaling_factor = "census", nsamples = 10, ncells = 100, run_parallel = FALSE)[["bulk"]], "SummarizedExperiment")
Expand Down

0 comments on commit 5a84693

Please sign in to comment.