From 37d502933344ed5d9bc938f8cff2c5e189ac27d3 Mon Sep 17 00:00:00 2001 From: Johannes Rainer Date: Tue, 15 Oct 2024 12:04:17 +0200 Subject: [PATCH] feat: add new method backendRequiredSpectraVariables - Add new method `backendRequiredSpectraVariables()` that allows a backend to specify which spectra variables are necessary to ensure proper functionality. Subsetting functions (like `selectSpectraVariables()`) should use these. --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ NEWS.md | 5 +++++ R/AllGenerics.R | 2 ++ R/MsBackend.R | 22 +++++++++++++++++-- R/MsBackendDataFrame.R | 17 ++++++++++---- R/MsBackendHdf5Peaks.R | 10 +++++++-- R/MsBackendMemory.R | 9 +++++++- R/MsBackendMzR.R | 10 +++++++-- .../test_MsBackend/test_spectra_subsetting.R | 5 +++-- man/MsBackend.Rd | 8 +++++++ man/hidden_aliases.Rd | 12 ++++++++++ tests/testthat/test_MsBackend.R | 1 + tests/testthat/test_MsBackendDataFrame.R | 7 +++++- tests/testthat/test_MsBackendHdf5Peaks.R | 5 +++++ tests/testthat/test_MsBackendMemory.R | 5 +++++ tests/testthat/test_MsBackendMzR.R | 8 ++++++- vignettes/MsBackend.Rmd | 17 ++++++++++++++ 18 files changed, 131 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1892f972..431584b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.15.11 +Version: 1.15.12 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index e3e4970c..df65fbe1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,6 +71,7 @@ exportMethods(backendBpparam) exportMethods(backendInitialize) exportMethods(backendMerge) exportMethods(backendParallelFactor) +exportMethods(backendRequiredSpectraVariables) exportMethods(bin) exportMethods(c) exportMethods(centroided) @@ -207,6 +208,7 @@ importFrom(methods,.hasSlot) importFrom(methods,.valueClassTest) importFrom(methods,as) importFrom(methods,callNextMethod) +importFrom(methods,existsMethod) importFrom(methods,is) importFrom(methods,new) importFrom(methods,setAs) diff --git a/NEWS.md b/NEWS.md index c3cf888c..d21a1714 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # Spectra 1.15 +## Changes in 1.15.12 + +- Add generic `backendRequiredSpectraVariables()` to allow definition of + mandatory spectra variables for a backend. + ## Changes in 1.15.11 - Add reference to `MsBackendMetaboLights`. diff --git a/R/AllGenerics.R b/R/AllGenerics.R index d02aa13c..856cb69e 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -1,6 +1,8 @@ #' @include hidden_aliases.R NULL +setGeneric("backendRequiredSpectraVariables", function(object, ...) + standardGeneric("backendRequiredSpectraVariables")) #' @rdname hidden_aliases setMethod("bin", "numeric", MsCoreUtils::bin) setGeneric("combinePeaks", function(object, ...) diff --git a/R/MsBackend.R b/R/MsBackend.R index f1721a3e..f2a86d0b 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -17,6 +17,8 @@ #' @aliases dataStorageBasePath<-,MsBackendMzR-method #' @aliases extractByIndex #' @aliases msLeveL<-,MsBackend-method +#' @aliases backendRequiredSpectraVariables +#' @aliases backendRequiredSpectraVariables,MsBackend-method #' #' @description #' @@ -280,6 +282,13 @@ #' `MsBackendMzR` on the other hand returns `factor(dataStorage(object))` #' hence suggesting to split the object by data file. #' +#' - `backendRequiredSpectraVariables()`: returns a `character` with spectra +#' variable names that are mandatory for a specific backend. The default +#' returns an empty `character()`. The implementation for `MsBackendMzR` +#' returns `c("dataStorage", "scanIndex")` as these two spectra variables +#' are required to load the MS data on-the-fly. This method needs only to +#' be implemented if a backend requires specific variables to be defined. +#' #' - `dataOrigin()`: gets a `character` of length equal to the number of #' spectra in `object` with the *data origin* of each spectrum. This could #' e.g. be the mzML file from which the data was read. @@ -965,6 +974,12 @@ setMethod("backendParallelFactor", "MsBackend", function(object, ...) { factor() }) +#' @export +setMethod("backendRequiredSpectraVariables", "MsBackend", + function(object, ...) { + character() + }) + #' @rdname MsBackend #' #' @export @@ -1104,7 +1119,8 @@ setReplaceMethod("dataStorage", "MsBackend", function(object, value) { #' @export setMethod("dropNaSpectraVariables", "MsBackend", function(object) { svs <- spectraVariables(object) - svs <- svs[!(svs %in% c("mz", "intensity"))] + req_cols <- c(backendRequiredSpectraVariables(object), c("mz", "intensity")) + svs <- svs[!(svs %in% req_cols)] spd <- spectraData(object, columns = svs) keep <- !vapply1l(spd, function(z) { allna <- all(is.na(z)) @@ -1112,11 +1128,13 @@ setMethod("dropNaSpectraVariables", "MsBackend", function(object) { FALSE else allna }) - selectSpectraVariables(object, c(svs[keep], "mz", "intensity")) + selectSpectraVariables(object, c(svs[keep], req_cols)) }) #' @rdname MsBackend #' +#' @importFrom methods existsMethod +#' #' @export setMethod("extractByIndex", c("MsBackend", "ANY"), function(object, i) { if (existsMethod("[", class(object)[1L])) diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index c04f2f6f..6959d771 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -22,7 +22,8 @@ setClass("MsBackendDataFrame", version = "0.2")) setValidity("MsBackendDataFrame", function(object) { - msg <- .valid_spectra_data_required_columns(object@spectraData) + msg <- .valid_spectra_data_required_columns( + object@spectraData, backendRequiredSpectraVariables(object)) if (length(msg)) return(msg) msg <- c( @@ -92,6 +93,12 @@ setMethod("backendMerge", "MsBackendDataFrame", function(object, ...) { res }) +#' @rdname hidden_aliases +setMethod("backendRequiredSpectraVariables", "MsBackendDataFrame", + function(object, ...) { + "dataStorage" + }) + ## Data accessors #' @rdname hidden_aliases @@ -413,14 +420,16 @@ setMethod("selectSpectraVariables", "MsBackendDataFrame", paste(spectraVariables[!(spectraVariables %in% spectraVariables(object))], collapse = ", "), " not available") + bv <- backendRequiredSpectraVariables(object) + if (!all(bv %in% spectraVariables)) + stop("Spectra variables ", + paste(bv[!bv %in% spectraVariables], collapse = ","), + " are required by the backend") keep <- spectraVariables[spectraVariables %in% colnames(object@spectraData)] if (length(keep)) object@spectraData <- object@spectraData[, keep, drop = FALSE] - msg <- .valid_spectra_data_required_columns(object@spectraData) - if (length(msg)) - stop(msg) object@peaksVariables <- intersect(object@peaksVariables, spectraVariables) validObject(object) diff --git a/R/MsBackendHdf5Peaks.R b/R/MsBackendHdf5Peaks.R index ebcb8ea2..27f14753 100644 --- a/R/MsBackendHdf5Peaks.R +++ b/R/MsBackendHdf5Peaks.R @@ -26,8 +26,8 @@ setClass("MsBackendHdf5Peaks", prototype = prototype(version = "0.1", readonly = FALSE)) setValidity("MsBackendHdf5Peaks", function(object) { - msg <- .valid_spectra_data_required_columns(object@spectraData, - c("dataStorage", "scanIndex")) + msg <- .valid_spectra_data_required_columns( + object@spectraData, backendRequiredSpectraVariables(object)) fls <- unique(object@spectraData$dataStorage) msg <- c(msg, .valid_ms_backend_mod_count(object@modCount, fls)) msg <- c(msg, .valid_ms_backend_files_exist(fls)) @@ -36,6 +36,12 @@ setValidity("MsBackendHdf5Peaks", function(object) { else msg }) +#' @rdname hidden_aliases +setMethod("backendRequiredSpectraVariables", "MsBackendHdf5Peaks", + function(object, ...) { + c("dataStorage", "scanIndex") + }) + #' @rdname hidden_aliases #' #' @importFrom fs path_sanitize diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 594fc799..4bde69ac 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -122,6 +122,12 @@ setMethod("backendMerge", "MsBackendMemory", function(object, ...) { res }) +#' @rdname hidden_aliases +setMethod("backendRequiredSpectraVariables", "MsBackendMemory", + function(object, ...) { + "dataStorage" + }) + ## Data accessors #' @rdname hidden_aliases @@ -514,7 +520,8 @@ setMethod("selectSpectraVariables", "MsBackendMemory", z[, keep, drop = FALSE]) } } - msg <- .valid_spectra_data_required_columns(object@spectraData) + msg <- .valid_spectra_data_required_columns( + object@spectraData, backendRequiredSpectraVariables(object)) if (length(msg)) stop(msg) validObject(object) diff --git a/R/MsBackendMzR.R b/R/MsBackendMzR.R index 69a04987..a7930e0d 100644 --- a/R/MsBackendMzR.R +++ b/R/MsBackendMzR.R @@ -24,14 +24,20 @@ setClass("MsBackendMzR", prototype = prototype(version = "0.1", readonly = TRUE)) setValidity("MsBackendMzR", function(object) { - msg <- .valid_spectra_data_required_columns(object@spectraData, - c("dataStorage", "scanIndex")) + msg <- .valid_spectra_data_required_columns( + object@spectraData, backendRequiredSpectraVariables(object)) msg <- c(msg, .valid_ms_backend_files_exist( unique(object@spectraData$dataStorage))) if (length(msg)) msg else TRUE }) +#' @rdname hidden_aliases +setMethod("backendRequiredSpectraVariables", "MsBackendMzR", + function(object, ...) { + c("dataStorage", "scanIndex") + }) + #' @rdname hidden_aliases #' #' @importFrom methods callNextMethod diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index 1782747c..93adce0d 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -92,8 +92,9 @@ test_that("selectSpectraVariables", { if (!isReadOnly(be) || inherits(be, "MsBackendCached") || inherits(be, "MsBackendDataFrame")) { tmp <- be - res <- selectSpectraVariables(tmp, c("mz", "intensity", - "dataStorage", "scanIndex")) + res <- selectSpectraVariables( + tmp, union(c("mz", "intensity", "dataStorage", "scanIndex"), + backendRequiredSpectraVariables(be))) expect_true(all(names(coreSpectraVariables()) %in% spectraVariables(res))) expect_true(all(is.na(res$msLevel))) diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index e4424015..16b5e782 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -23,6 +23,8 @@ \alias{dataStorageBasePath<-,MsBackendMzR-method} \alias{extractByIndex} \alias{msLeveL<-,MsBackend-method} +\alias{backendRequiredSpectraVariables} +\alias{backendRequiredSpectraVariables,MsBackend-method} \alias{backendBpparam,MsBackend-method} \alias{backendInitialize,MsBackend-method} \alias{backendMerge,list-method} @@ -576,6 +578,12 @@ The default implementation returns a factor of length 0 (\code{factor()}) providing thus no default splitting. \code{backendParallelFactor()} for \code{MsBackendMzR} on the other hand returns \code{factor(dataStorage(object))} hence suggesting to split the object by data file. +\item \code{backendRequiredSpectraVariables()}: returns a \code{character} with spectra +variable names that are mandatory for a specific backend. The default +returns an empty \code{character()}. The implementation for \code{MsBackendMzR} +returns \code{c("dataStorage", "scanIndex")} as these two spectra variables +are required to load the MS data on-the-fly. This method needs only to +be implemented if a backend requires specific variables to be defined. \item \code{dataOrigin()}: gets a \code{character} of length equal to the number of spectra in \code{object} with the \emph{data origin} of each spectrum. This could e.g. be the mzML file from which the data was read. diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index 3e70d26c..ce4e63e9 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -10,6 +10,7 @@ \alias{bin,numeric-method} \alias{show,MsBackendDataFrame-method} \alias{backendMerge,MsBackendDataFrame-method} +\alias{backendRequiredSpectraVariables,MsBackendDataFrame-method} \alias{acquisitionNum,MsBackendDataFrame-method} \alias{peaksData,MsBackendDataFrame-method} \alias{centroided,MsBackendDataFrame-method} @@ -60,6 +61,7 @@ \alias{$<-,MsBackendDataFrame-method} \alias{split,MsBackendDataFrame,ANY-method} \alias{filterAcquisitionNum,MsBackendDataFrame-method} +\alias{backendRequiredSpectraVariables,MsBackendHdf5Peaks-method} \alias{backendInitialize,MsBackendHdf5Peaks-method} \alias{show,MsBackendHdf5Peaks-method} \alias{peaksData,MsBackendHdf5Peaks-method} @@ -80,6 +82,7 @@ \alias{backendMerge,MsBackendHdf5Peaks-method} \alias{show,MsBackendMemory-method} \alias{backendMerge,MsBackendMemory-method} +\alias{backendRequiredSpectraVariables,MsBackendMemory-method} \alias{acquisitionNum,MsBackendMemory-method} \alias{centroided,MsBackendMemory-method} \alias{centroided<-,MsBackendMemory-method} @@ -132,6 +135,7 @@ \alias{[,MsBackendMemory-method} \alias{split,MsBackendMemory,ANY-method} \alias{filterAcquisitionNum,MsBackendMemory-method} +\alias{backendRequiredSpectraVariables,MsBackendMzR-method} \alias{backendInitialize,MsBackendMzR-method} \alias{show,MsBackendMzR-method} \alias{peaksData,MsBackendMzR-method} @@ -168,6 +172,8 @@ \S4method{backendMerge}{MsBackendDataFrame}(object, ...) +\S4method{backendRequiredSpectraVariables}{MsBackendDataFrame}(object, ...) + \S4method{acquisitionNum}{MsBackendDataFrame}(object) \S4method{peaksData}{MsBackendDataFrame}(object, columns = c("mz", "intensity")) @@ -275,6 +281,8 @@ dataOrigin = character() ) +\S4method{backendRequiredSpectraVariables}{MsBackendHdf5Peaks}(object, ...) + \S4method{backendInitialize}{MsBackendHdf5Peaks}( object, files = character(), @@ -322,6 +330,8 @@ \S4method{backendMerge}{MsBackendMemory}(object, ...) +\S4method{backendRequiredSpectraVariables}{MsBackendMemory}(object, ...) + \S4method{acquisitionNum}{MsBackendMemory}(object) \S4method{centroided}{MsBackendMemory}(object) @@ -431,6 +441,8 @@ dataOrigin = character() ) +\S4method{backendRequiredSpectraVariables}{MsBackendMzR}(object, ...) + \S4method{backendInitialize}{MsBackendMzR}(object, files, ..., BPPARAM = bpparam()) \S4method{show}{MsBackendMzR}(object) diff --git a/tests/testthat/test_MsBackend.R b/tests/testthat/test_MsBackend.R index cea6af27..01fa65c2 100644 --- a/tests/testthat/test_MsBackend.R +++ b/tests/testthat/test_MsBackend.R @@ -57,6 +57,7 @@ test_that("MsBackend methods throw errors", { expect_error(dm$a, "implemented for") expect_error(dm$a <- "a", "implemented for") expect_error(extractByIndex(dm, 1), "implemented for") + expect_equal(backendRequiredSpectraVariables(dm), character()) }) test_that("extractByIndex not implemented fallback", { diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index e5de3662..238e945a 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -653,7 +653,7 @@ test_that("selectSpectraVariables,MsBackendDataFrame works", { expect_equal(colnames(res@spectraData), c("dataStorage", "rtime")) expect_equal(res@peaksVariables, be@peaksVariables) - expect_error(selectSpectraVariables(be, "rtime"), "dataStorage is/are missing") + expect_error(selectSpectraVariables(be, "rtime"), "are required") expect_error(selectSpectraVariables(be, "something"), "something not available") @@ -1024,3 +1024,8 @@ test_that("[[,[[<-,MsBackendDataFrame works", { test_that("supportsSetBackend,MsBackendDataFrame", { expect_true(supportsSetBackend(MsBackendDataFrame())) }) + +test_that("backendRequiredSpectraVariables,MsBackendDataFrame works", { + expect_equal(backendRequiredSpectraVariables(MsBackendDataFrame()), + "dataStorage") +}) diff --git a/tests/testthat/test_MsBackendHdf5Peaks.R b/tests/testthat/test_MsBackendHdf5Peaks.R index 3604d895..17495169 100644 --- a/tests/testthat/test_MsBackendHdf5Peaks.R +++ b/tests/testthat/test_MsBackendHdf5Peaks.R @@ -413,3 +413,8 @@ test_that("backendParallelFactor,MsBackendHdf5Peaks", { factor(dataStorage(sciex_hd5), levels = unique(dataStorage(sciex_hd5)))) }) + +test_that("backendRequiredSpectraVariables,MsBackendHdf5Peaks works", { + expect_equal(backendRequiredSpectraVariables(MsBackendHdf5Peaks()), + c("dataStorage", "scanIndex")) +}) diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index bb3c9973..119e2c56 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -944,3 +944,8 @@ test_that("tic,MsBackendMemory works", { test_that("supportsSetBackend,MsBackendMemory", { expect_true(supportsSetBackend(MsBackendMemory())) }) + +test_that("backendRequiredSpectraVariables,MsBackendMemory works", { + expect_equal(backendRequiredSpectraVariables(MsBackendMemory()), + "dataStorage") +}) diff --git a/tests/testthat/test_MsBackendMzR.R b/tests/testthat/test_MsBackendMzR.R index 36de14c4..d8a83227 100644 --- a/tests/testthat/test_MsBackendMzR.R +++ b/tests/testthat/test_MsBackendMzR.R @@ -504,7 +504,7 @@ test_that("selectSpectraVariables,MsBackendMzR works", { expect_equal(res@peaksVariables, c("mz", "intensity")) expect_error(selectSpectraVariables(be, c("dataStorage", "msLevel")), - "scanIndex is/are missing") + "required") }) test_that("$,$<-,MsBackendMzR works", { @@ -597,3 +597,9 @@ test_that("dataStorageBasePath,dataStorageBasePath<-,MsBackendMzR works", { #' errors expect_error(dataStorageBasePath(tmp) <- "some path", "Provided path") }) + +test_that("backendRequiredSpectraVariables,MsBackendMzR works", { + tmp <- MsBackendMzR() + expect_equal(backendRequiredSpectraVariables(tmp), + c("dataStorage", "scanIndex")) +}) diff --git a/vignettes/MsBackend.Rmd b/vignettes/MsBackend.Rmd index 9b5191ed..5192084e 100644 --- a/vignettes/MsBackend.Rmd +++ b/vignettes/MsBackend.Rmd @@ -1590,6 +1590,23 @@ setMethod("backendParallelFactor", "MsBackend", function(object, ...) { ``` +### `backendRequiredSpectraVariables()` + +The `backendRequiredSpectraVariables()` method can be implemented if a backend +needs specific spectra variables to work. The default implementation is: + +```{r} +setMethod("backendRequiredSpectraVariables", "MsBackend", + function(object, ...) { + character() + }) +``` + +The implementation for `MsBackendMzR` returns `c("dataStorage", "scanIndex")` as +the backend needs these two spectra variables to load the MS data on-the-fly +from the original data files. + + ### `dropNaSpectraVariables()` The `dropNaSpectraVariables()` is supposed to allow removing all spectra