Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

105 refactor patternmarkers #111

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions R/class-CogapsResult.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,19 +395,22 @@ Pw=rep(1,ncol(object@featureLoadings)), PwNull=FALSE)
#' @param threshold the type of threshold to be used. The default "all" will
#' distribute features into patterns with the highest ranking as ranked by the
#' increasing Euclidian distance between feature loadings and \code{lp}. The
#' alternative "cut" will do the same, but will only keep the features that are
#' ranked higher than the first feature having greater intra-pattern rank than
#' inter-pattern rank. This is useful to limit the number of markers ranked
#' similarly everywhere.
#' alternative "cut" will only keep the features that are ranked higher than
#' the first feature having greater intra-pattern compared to inter-pattern
#' rank. This is useful to limit the number of markers ranked similarly
#' everywhere. Features may be present in multiple patterns for "cut".
#' @param lp list of vectors of weights for each pattern to be used for finding
#' markers. If NULL, list of synthetic one-hot markers for each column of the
#' featureLoadings matrix will be generated and matched against.
#' markers. If NULL, list of synthetic one-hot markers for each pattern will be
#' generated and matched against.
#' @param axis controls the matrix to use for ranking. 1 for featureLoadings,
#' 2 for sampleFactors.
#' @return List of: list of marker features for each pattern (best rank first),
#' and a matrix of ranks of each feature in each pattern.
#' a matrix of ranks of each feature in each pattern, a matrix of scores for
#' each feature in each pattern.
#' @examples
#' data(GIST)
#' pm <- patternMarkers(GIST.result)
setGeneric("patternMarkers", function(object, threshold="all", lp=NULL) standardGeneric("patternMarkers"))
setGeneric("patternMarkers", function(object, threshold="all", lp=NULL, axis=1) standardGeneric("patternMarkers"))

#' MANOVA statistical test for patterns between sample groups
#' @export
Expand Down
63 changes: 46 additions & 17 deletions R/methods-CogapsResult.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,19 @@ function(patterngeneset, whichpattern=1, padj_threshold = 0.05)
#' @rdname patternMarkers-methods
#' @aliases patternMarkers
setMethod("patternMarkers", signature(object="CogapsResult"),
function(object, threshold, lp){
Amatrix <- object@featureLoadings
Pmatrix <- t(object@sampleFactors)
function(object, threshold, lp, axis){
#look for features-markers of patterns with axis=1
#or samples-markers of patterns with axis=2
if(axis == 1){
Amatrix <- object@featureLoadings
Pmatrix <- t(object@sampleFactors)
} else if(axis == 2){
Amatrix <- object@sampleFactors
Pmatrix <- t(object@featureLoadings)
} else {
stop("axis must be 1 or 2")
}


# determine norm for A if Ps were rescaled to have max 1
pscale <- apply(Pmatrix,1,max)
Expand Down Expand Up @@ -432,37 +442,56 @@ function(object, threshold, lp){
ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))
}

#container for feature scores
ssscores<-ssranks

#for each lp, calculate the L2 distance from each row of A to lp[i], rank
for (i in seq_along(lp)){
sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp[[i]])%*%(x-lp[[i]])))
ssranks[,i] <- rank(sstat, ties.method="first")
ssscores[,i] <- sstat
}

if(threshold=="all"){
ssgenes.th <- .patternMarkers_all(ssranks)
} else if(threshold=="cut"){
ssgenes.th <- .patternMarkers_cut(ssranks)
}

return(list("PatternMarkers"=ssgenes.th,
"PatternRanks"=ssranks,
"PatternScores"=ssscores))

})

#' @noRd
.patternMarkers_all <- function(ssranks) {
pIndx<-apply(ssranks,1,which.min)
pNames<-setNames(seq_along(lp), names(lp))
pNames<-setNames(seq_along(colnames(ssranks)), colnames(ssranks))
ssgenes.th <- lapply(pNames,function(x) names(pIndx[pIndx==x]))

#sort genes by rank for output
for (i in seq_along(ssgenes.th)){
order <- names(sort(ssranks[,i]))
ssgenes.th[[i]] <- intersect(order, ssgenes.th[[i]])
}
} else if(threshold=="cut"){
ssgenes.th <- list()
for (i in seq_along(lp)){
sortSim <- names(sort(ssranks[,i], decreasing = FALSE))
#first intra-pattern rank that is worse than inter-pattern rank
geneThresh <- min(which(ssranks[sortSim, i] > apply(ssranks[sortSim,], 1, min)))
markerGenes <- sortSim[1:geneThresh-1]
ssgenes.th[[i]] <- markerGenes
}
names(ssgenes.th) <- names(lp)
}
return(ssgenes.th)
}

return(list("PatternMarkers"=ssgenes.th,"PatternRanks"=ssranks))
#' @noRd
.patternMarkers_cut <- function(ssranks) {
ssgenes.th <- list()
for (i in seq_along(colnames(ssranks))){
sortSim <- names(sort(ssranks[,i], decreasing = FALSE))
#first intra-pattern rank that is worse than inter-pattern rank
geneThresh <- min(which(ssranks[sortSim, i] > apply(ssranks[sortSim,], 1, min)))
markerGenes <- sortSim[1:geneThresh-1]
ssgenes.th[[i]] <- markerGenes
}
names(ssgenes.th) <- colnames(ssranks)

})
return(ssgenes.th)
}

#' @rdname calcCoGAPSStat-methods
#' @aliases calcCoGAPSStat
Expand Down
22 changes: 13 additions & 9 deletions man/patternMarkers-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test_patternMarkers.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,32 @@ test_that("test outputs generic in threshold = all", {
nrow(res@featureLoadings))
})

test_that("all outputs are present", {
data(GIST)
res <- CoGAPS(GIST.data_frame, nIterations=100,
seed=1, messages=FALSE)
test <- patternMarkers(res)
expect_true(all(c("PatternMarkers", "PatternRanks", "PatternScores") %in%
names(test)))
})

test_that("all outputs are present with non-default axis", {
data(GIST)
res <- CoGAPS(GIST.data_frame, nIterations=100,
seed=1, messages=FALSE)
test <- patternMarkers(res, axis = 2)
expect_true(all(c("PatternMarkers", "PatternRanks", "PatternScores") %in%
names(test)))
})

test_that("all samples present with non-default axis and threshold='all' ", {
data(GIST)
res <- CoGAPS(GIST.data_frame, nIterations=100,
seed=1, messages=FALSE)
test <- patternMarkers(res, axis = 2, threshold = "all")
expect_true(all(unique(unlist(test$PatternMarkers))
%in% rownames(res@sampleFactors)))
})

############################## functional tests ###############################
gapsMock <- function(mock){
Expand Down
Loading
Loading