Skip to content

Commit

Permalink
Merge pull request #315 from stemangiola/improve-scaling-robustness
Browse files Browse the repository at this point in the history
use matrix multiplication
  • Loading branch information
stemangiola authored Sep 17, 2024
2 parents 9c9edcc + d27e6a0 commit 407a482
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 24 deletions.
14 changes: 10 additions & 4 deletions R/functions_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,9 @@ get_differential_transcript_abundance_bulk_SE <- function(.data,
# Replace `:` with ___ because it creates error with edgeR
if(design |> colnames() |> str_detect(":") |> any()) {
message("tidybulk says: the interaction term `:` has been replaced with `___` in the design matrix, in order to work with edgeR.")
colnames(design) = design |> colnames() |> str_replace(":", "___")
colnames(design) = design |> colnames() |> str_replace(":", "___")
}

# Print the design column names in case I want contrasts
message(
sprintf(
Expand Down Expand Up @@ -1070,6 +1070,7 @@ get_differential_transcript_abundance_bulk_voom_SE <- function(.data,
#' @param .contrasts A character vector. See edgeR makeContrasts specification for the parameter `contrasts`. If contrasts are not present the first covariate is the one the model is tested against (e.g., ~ factor_of_interest)
#' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)
#' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")
#' @param .scaling_factor A tidyeval (column name) for the precalculated TMM scaling
#' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames.
#' @param ... Additional arguments for glmmSeq
#'
Expand All @@ -1085,13 +1086,15 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
test_above_log2_fold_change = NULL,

scaling_method = "TMM",
.scaling_factor = NULL,
omit_contrast_in_colnames = FALSE,
prefix = "",
.dispersion = NULL,
...) {

.abundance = enquo(.abundance)
.dispersion = enquo(.dispersion)
.scaling_factor = enquo(.scaling_factor)

# Check if contrasts are of the same form
if(
Expand Down Expand Up @@ -1146,7 +1149,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
data = metadata
)

if(quo_is_symbolic(.dispersion))
if(.dispersion |> quo_is_symbolic())
dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
else
dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
Expand All @@ -1161,7 +1164,10 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
dispersion = dispersion[rownames(counts)]

# Scaling
sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method)
if(.scaling_factor |> quo_is_symbolic())
sizeFactors = .data |> pivot_sample() |> pull(!!.scaling_factor)
else
sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method)


glmmSeq_object =
Expand Down
16 changes: 8 additions & 8 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -594,14 +594,14 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance)
#' @details Tranform the feature abundance across samples so to have the same quantile distribution (using preprocessCore).
#'
#' Underlying method
#'
#'
#' If `limma_normalize_quantiles` is chosen
#'
#'
#' .data |>limma::normalizeQuantiles()
#'
#'
#' If `preprocesscore_normalize_quantiles_use_target` is chosen
#'
#' .data |>
#'
#' .data |>
#' preprocessCore::normalize.quantiles.use.target(
#' target = preprocessCore::normalize.quantiles.determine.target(.data)
#' )
Expand Down Expand Up @@ -638,7 +638,7 @@ setGeneric("quantile_normalise_abundance", function(.data,
.abundance = NULL,
method = "limma_normalize_quantiles",
target_distribution = NULL,

action = "add")
{

Expand Down Expand Up @@ -695,7 +695,7 @@ setGeneric("quantile_normalise_abundance", function(.data,
}

if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm)

.data_norm_quant =
.data_norm |>
preprocessCore::normalize.quantiles.use.target(
Expand Down Expand Up @@ -2604,7 +2604,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#' @param significance_threshold DEPRECATED - A real between 0 and 1 (usually 0.05).
#' @param fill_missing_values DEPRECATED - A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.
#' @param .contrasts DEPRECATED - This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest)
#' @param ... Further arguments passed to some of the internal functions. Currently, it is needed just for internal debug.
#' @param ... Further arguments passed to some of the internal experimental functions. For example for glmmSeq, it is possible to pass .dispersion, and .scaling_factor column tidyeval to skip the caluclation of dispersion and scaling and use precalculated values. This is helpful is you want to calculate those quantities on many genes and do DE testing on fewer genes. .scaling_factor is the TMM value that can be obtained with tidybulk::scale_abundance.
#'
#'
#' @details This function provides the option to use edgeR \url{https://doi.org/10.1093/bioinformatics/btp616}, limma-voom \url{https://doi.org/10.1186/gb-2014-15-2-r29}, limma_voom_sample_weights \url{https://doi.org/10.1093/nar/gkv412} or DESeq2 \url{https://doi.org/10.1186/s13059-014-0550-8} to perform the testing.
Expand Down
15 changes: 7 additions & 8 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,17 +188,16 @@ setMethod("tidybulk", "RangedSummarizedExperiment", .tidybulk_se)

my_counts_scaled =
list(
assays(.data) %>%
as.list() %>%
.[[1]] %>%
multiply_by(
rep(multiplier, rep(nrow(.),length(multiplier)))
)
assay(.data) %*%
diag(multiplier)

) %>%
setNames(value_scaled)
colnames(my_counts_scaled[[1]]) = assay(.data) |> colnames()


# Add the assay
assays(.data) = assays(.data) %>% c(my_counts_scaled)
assays(.data, withDimnames=FALSE) = assays(.data) %>% c(my_counts_scaled)

.data %>%

Expand Down Expand Up @@ -313,7 +312,7 @@ setMethod("scale_abundance",
as.matrix()

if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm)

.data_norm =
.data_norm |>
preprocessCore::normalize.quantiles.use.target(
Expand Down
6 changes: 3 additions & 3 deletions man/quantile_normalise_abundance-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/test_differential_abundance-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 407a482

Please sign in to comment.