diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R index 284d860c..1544cf26 100644 --- a/R/EnsembleFSResult.R +++ b/R/EnsembleFSResult.R @@ -126,11 +126,17 @@ EnsembleFSResult = R6Class("EnsembleFSResult", #' The weights used are equal to the performance scores of each voter/model (or the inverse scores if the measure is minimized). #' The un-weighted methods use same weights for all voters (equal to 1). #' + #' Note that some methods output a feature ranking instead of a score per feature. + #' Therefore we also calculate **Borda's score**: + #' \eqn{s_{borda} = (p-i)/(p-1)}, where \eqn{p} is the total number of features, and \eqn{i} is the feature ranking. + #' So the best feature gets a borda score of \eqn{1} and the worst-ranked feature a borda score of \eqn{0}. + #' This score is method-agnostic, i.e. it can be used to compare the feature rankings across different methods. + #' #' The following methods are currently supported: #' #' - `"av"|"av_weighted"` (approval voting) selects the candidates that have the highest approval score, i.e. the features that appear the most often. #' This is the default feature ranking method. - #' - `"sav"|"sav_weighted"` (satisfaction approval voting) selects the candidates that have a higher satisfaction score,in proportion to the size of the voters approval sets. + #' - `"sav"|"sav_weighted"` (satisfaction approval voting) selects the candidates that have a higher satisfaction score, in proportion to the size of the voters approval sets. #' Voters who approve more candidates contribute a lesser score to the individual approved candidates. #' #' @param method (`character(1)`)\cr @@ -138,6 +144,8 @@ EnsembleFSResult = R6Class("EnsembleFSResult", #' #' @return A [data.table::data.table] listing all the features, ordered by decreasing scores (depends on the `"method"`). #' An extra column `"norm_score"` is produced for methods for which the original scores (i.e. approval counts in the case of approval voting) can be normalized and interpreted as **selection probabilities**, see Meinshausen et al. (2010). + #' The `"borda_score"` column is always included to incorporate feature ranking methods that don't output per-feature scores but only rankings. + #' feature_ranking = function(method = "av") { assert_choice(method, choices = c("av", "av_weighted", "sav", "sav_weighted")) diff --git a/R/voting_methods.R b/R/voting_methods.R index 2f69a1c8..a39ff547 100644 --- a/R/voting_methods.R +++ b/R/voting_methods.R @@ -1,9 +1,10 @@ # some collection of voting methods for feature ranking -# Parameters: # @param voters list of feature vectors (best features, each a subset of "candidates") # @param candidates vector with ALL features # @param weights vector of weights, 1-1 correspondence with voters +# @return data.table with 4 columns: features (mandatory), score, norm_score, borda_score (mandatory). Always features are ordered with decreasing `score` (or descreasing +# `borda_score` if a method returns only a ranking). approval_voting = function(voters, candidates, weights) { # faster R version in case of equal weights @@ -31,10 +32,11 @@ approval_voting = function(voters, candidates, weights) { setorderv(res, cols = "score", order = -1) } - res + res[, borda_score := (nrow(res) - .I) / (nrow(res) - 1)] } satisfaction_approval_voting = function(voters, candidates, weights) { res = as.data.table(SAV_rcpp(voters, candidates, weights)) setorderv(res, cols = "score", order = -1) + res[, borda_score := (nrow(res) - .I) / (nrow(res) - 1)] } diff --git a/man/AutoFSelector.Rd b/man/AutoFSelector.Rd index a7d46be4..9c59c626 100644 --- a/man/AutoFSelector.Rd +++ b/man/AutoFSelector.Rd @@ -149,7 +149,6 @@ Hash (unique identifier) for this partial object, excluding some components whic \if{html}{\out{
Inherited methods