Skip to content

Commit

Permalink
Merge pull request #477 from oooo26/master
Browse files Browse the repository at this point in the history
Support AUC on CV for classification problem
  • Loading branch information
oooo26 authored Jan 31, 2023
2 parents 994fe73 + c903187 commit 7d2bd24
Show file tree
Hide file tree
Showing 18 changed files with 352 additions and 135 deletions.
2 changes: 1 addition & 1 deletion R-package/R/abesspca.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ abesspca <- function(x,
exchange_num = c_max,
path_type = path_type,
is_warm_start = warm.start,
ic_type = 1,
ic_type = ic_type,
ic_coef = ic_scale,
Kfold = nfolds,
sequence = s_list_bool,
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/utility.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ map_tunetype2numeric <- function(tune.type) {
"bic" = 2,
"gic" = 3,
"ebic" = 4,
"cv" = 1
"cv" = 0
)
ic_type
}
Expand Down
67 changes: 49 additions & 18 deletions python/abess/bess_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class bess_base(BaseEstimator):
- If alpha = 0, it indicates ordinary least square.
ic_type : {'aic', 'bic', 'gic', 'ebic'}, optional, default='ebic'
ic_type : {'aic', 'bic', 'gic', 'ebic', 'loss'}, optional, default='ebic'
The type of criterion for choosing the support size if `cv=1`.
ic_coef : float, optional, default=1.0
Constant that controls the regularization strength
Expand All @@ -52,6 +52,14 @@ class bess_base(BaseEstimator):
- If cv>1, support size will be chosen by CV's test loss,
instead of IC.
cv_score : {'test_loss', ...}, optional, default='test_loss'
The score used on test data for CV.
- All methods support {'test_loss'}.
- LogisticRegression also supports {'roc_auc'}.
- MultinomialRegression also supports {'roc_auc_ovo', 'roc_auc_ovr'},
which indicate "One vs One/Rest" algorithm, respectively.
thread : int, optional, default=1
Max number of multithreads.
Expand Down Expand Up @@ -131,6 +139,7 @@ def __init__(
ic_type="ebic",
ic_coef=1.0,
cv=1,
cv_score="test_loss",
thread=1,
A_init=None,
always_select=None,
Expand Down Expand Up @@ -170,6 +179,7 @@ def __init__(
self.ic_type = ic_type
self.ic_coef = ic_coef
self.cv = cv
self.cv_score = cv_score
self.screening_size = screening_size
self.always_select = always_select
self.primary_model_fit_max_iter = primary_model_fit_max_iter
Expand Down Expand Up @@ -323,28 +333,49 @@ def fit(self,
else:
raise ValueError("path_type should be \'seq\' or \'gs\'")

# Ic_type: aic, bic, gic, ebic
if self.ic_type == "aic":
ic_type_int = 1
elif self.ic_type == "bic":
ic_type_int = 2
elif self.ic_type == "gic":
ic_type_int = 3
elif self.ic_type == "ebic":
ic_type_int = 4
elif self.ic_type == "hic":
ic_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\","
" \"gic\" or \"hic\".")

# cv
if (not isinstance(self.cv, int) or self.cv <= 0):
raise ValueError("cv should be an positive integer.")
if self.cv > n:
raise ValueError("cv should be smaller than n.")

# Ic_type: aic, bic, gic, ebic
# cv_score: test_loss, roc_auc
if self.cv == 1:
if self.ic_type == "loss":
eval_type_int = 0
elif self.ic_type == "aic":
eval_type_int = 1
elif self.ic_type == "bic":
eval_type_int = 2
elif self.ic_type == "gic":
eval_type_int = 3
elif self.ic_type == "ebic":
eval_type_int = 4
elif self.ic_type == "hic":
eval_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\","
" \"gic\" or \"hic\".")
else:
if self.cv_score == "test_loss":
eval_type_int = 0
elif self.cv_score == "roc_auc" and self.model_type == "Logistic":
eval_type_int = 1
elif (self.cv_score == "roc_auc_ovo" and
self.model_type == "Multinomial"):
eval_type_int = 2
elif (self.cv_score == "roc_auc_ovr" and
self.model_type == "Multinomial"):
eval_type_int = 3
else:
raise ValueError(
"cv_score should be \"test_loss\", "
"\"roc_auc\"(for logistic), "
"\"roc_auc_ovo\"(for multinomial), or "
"\"roc_auc_ovr\"(for multinomial).")

# cv_fold_id
if cv_fold_id is None:
cv_fold_id = np.array([], dtype="int32")
Expand Down Expand Up @@ -561,7 +592,7 @@ def fit(self,
X, y, sample_weight, n, p, normalize, algorithm_type_int,
model_type_int,
self.max_iter, self.exchange_num, path_type_int,
self.is_warm_start, ic_type_int, self.ic_coef, self.cv,
self.is_warm_start, eval_type_int, self.ic_coef, self.cv,
g_index,
support_sizes, alphas, cv_fold_id, new_s_min, new_s_max,
new_lambda_min, new_lambda_max, n_lambda, self.screening_size,
Expand Down
65 changes: 39 additions & 26 deletions python/abess/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ class SparsePCA(bess_base):
- If cv>1, support size will be chosen by CV's test loss,
instead of IC.
cv_score : {'test_loss'}, optional, default='test_loss'
The score used on test data for CV.
Only 'test_loss' is supported for PCA now.
thread : int, optional, default=1
Max number of multithreads.
Expand Down Expand Up @@ -125,8 +129,9 @@ class SparsePCA(bess_base):
"""

def __init__(self, support_size=None, group=None,
ic_type="loss", ic_coef=1.0, cv=1, thread=1,
A_init=None, always_select=None,
ic_type="loss", ic_coef=1.0,
cv=1, cv_score="test_loss",
thread=1, A_init=None, always_select=None,
max_iter=20, exchange_num=5, is_warm_start=True,
splicing_type=1,
screening_size=-1,
Expand All @@ -137,7 +142,7 @@ def __init__(self, support_size=None, group=None,
max_iter=max_iter, exchange_num=exchange_num,
is_warm_start=is_warm_start, support_size=support_size,
# s_min=s_min, s_max=s_max,
ic_type=ic_type, ic_coef=ic_coef, cv=cv,
ic_type=ic_type, ic_coef=ic_coef, cv=cv, cv_score=cv_score,
screening_size=screening_size,
always_select=always_select,
thread=thread,
Expand Down Expand Up @@ -264,23 +269,31 @@ def fit(self, X=None, y=None, is_normal=False,
# model_type_int = 7
path_type_int = 1

# Ic_type
if self.ic_type == "loss":
ic_type_int = 0
elif self.ic_type == "aic":
ic_type_int = 1
elif self.ic_type == "bic":
ic_type_int = 2
elif self.ic_type == "gic":
ic_type_int = 3
elif self.ic_type == "ebic":
ic_type_int = 4
elif self.ic_type == "hic":
ic_type_int = 5
# Ic_type: aic, bic, gic, ebic
# cv_score: test_loss, roc_auc
if self.cv == 1:
if self.ic_type == "loss":
eval_type_int = 0
elif self.ic_type == "aic":
eval_type_int = 1
elif self.ic_type == "bic":
eval_type_int = 2
elif self.ic_type == "gic":
eval_type_int = 3
elif self.ic_type == "ebic":
eval_type_int = 4
elif self.ic_type == "hic":
eval_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\","
" \"gic\" or \"hic\".")
else:
raise ValueError(
"ic_type should be \"loss\", \"aic\", \"bic\","
" \"ebic\", \"gic\" or \"hic\".")
if self.cv_score == "test_loss":
eval_type_int = 0
else:
raise ValueError(
"cv_score should be \"test_loss\".")

# cv
if (not isinstance(self.cv, int) or self.cv <= 0):
Expand Down Expand Up @@ -425,7 +438,7 @@ def fit(self, X=None, y=None, is_normal=False,
n, p, normalize, Sigma,
self.max_iter, self.exchange_num,
path_type_int, self.is_warm_start,
ic_type_int, self.ic_coef, self.cv,
eval_type_int, self.ic_coef, self.cv,
g_index,
support_sizes,
cv_fold_id,
Expand Down Expand Up @@ -633,15 +646,15 @@ def fit(self, X, y=None, r=None, sparse_matrix=False):

# Ic_type
if self.ic_type == "aic":
ic_type_int = 1
eval_type_int = 1
elif self.ic_type == "bic":
ic_type_int = 2
eval_type_int = 2
elif self.ic_type == "gic":
ic_type_int = 3
eval_type_int = 3
elif self.ic_type == "ebic":
ic_type_int = 4
eval_type_int = 4
elif self.ic_type == "hic":
ic_type_int = 5
eval_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\", \"gic\", "
Expand Down Expand Up @@ -769,7 +782,7 @@ def fit(self, X, y=None, r=None, sparse_matrix=False):
X, n, p, normalize,
self.max_iter, self.exchange_num,
path_type_int, self.is_warm_start,
ic_type_int, self.ic_coef,
eval_type_int, self.ic_coef,
g_index,
support_sizes,
alphas,
Expand Down
Loading

0 comments on commit 7d2bd24

Please sign in to comment.