Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support AUC on CV for classification problem #477

Merged
merged 12 commits into from
Jan 31, 2023
2 changes: 1 addition & 1 deletion R-package/R/abesspca.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ abesspca <- function(x,
exchange_num = c_max,
path_type = path_type,
is_warm_start = warm.start,
ic_type = 1,
ic_type = ic_type,
ic_coef = ic_scale,
Kfold = nfolds,
sequence = s_list_bool,
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/utility.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ map_tunetype2numeric <- function(tune.type) {
"bic" = 2,
"gic" = 3,
"ebic" = 4,
"cv" = 1
"cv" = 0
)
ic_type
}
Expand Down
67 changes: 49 additions & 18 deletions python/abess/bess_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class bess_base(BaseEstimator):

- If alpha = 0, it indicates ordinary least square.

ic_type : {'aic', 'bic', 'gic', 'ebic'}, optional, default='ebic'
ic_type : {'aic', 'bic', 'gic', 'ebic', 'loss'}, optional, default='ebic'
The type of criterion for choosing the support size if `cv=1`.
ic_coef : float, optional, default=1.0
Constant that controls the regularization strength
Expand All @@ -52,6 +52,14 @@ class bess_base(BaseEstimator):
- If cv>1, support size will be chosen by CV's test loss,
instead of IC.

cv_score : {'test_loss', ...}, optional, default='test_loss'
The score used on test data for CV.

- All methods support {'test_loss'}.
- LogisticRegression also supports {'roc_auc'}.
- MultinomialRegression also supports {'roc_auc_ovo', 'roc_auc_ovr'},
which indicate "One vs One/Rest" algorithm, respectively.

thread : int, optional, default=1
Max number of multithreads.

Expand Down Expand Up @@ -131,6 +139,7 @@ def __init__(
ic_type="ebic",
ic_coef=1.0,
cv=1,
cv_score="test_loss",
thread=1,
A_init=None,
always_select=None,
Expand Down Expand Up @@ -170,6 +179,7 @@ def __init__(
self.ic_type = ic_type
self.ic_coef = ic_coef
self.cv = cv
self.cv_score = cv_score
self.screening_size = screening_size
self.always_select = always_select
self.primary_model_fit_max_iter = primary_model_fit_max_iter
Expand Down Expand Up @@ -323,28 +333,49 @@ def fit(self,
else:
raise ValueError("path_type should be \'seq\' or \'gs\'")

# Ic_type: aic, bic, gic, ebic
if self.ic_type == "aic":
ic_type_int = 1
elif self.ic_type == "bic":
ic_type_int = 2
elif self.ic_type == "gic":
ic_type_int = 3
elif self.ic_type == "ebic":
ic_type_int = 4
elif self.ic_type == "hic":
ic_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\","
" \"gic\" or \"hic\".")

# cv
if (not isinstance(self.cv, int) or self.cv <= 0):
raise ValueError("cv should be an positive integer.")
if self.cv > n:
raise ValueError("cv should be smaller than n.")

# Ic_type: aic, bic, gic, ebic
# cv_score: test_loss, roc_auc
if self.cv == 1:
if self.ic_type == "loss":
eval_type_int = 0
elif self.ic_type == "aic":
eval_type_int = 1
elif self.ic_type == "bic":
eval_type_int = 2
elif self.ic_type == "gic":
eval_type_int = 3
elif self.ic_type == "ebic":
eval_type_int = 4
elif self.ic_type == "hic":
eval_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\","
" \"gic\" or \"hic\".")
else:
if self.cv_score == "test_loss":
eval_type_int = 0
elif self.cv_score == "roc_auc" and self.model_type == "Logistic":
eval_type_int = 1
elif (self.cv_score == "roc_auc_ovo" and
self.model_type == "Multinomial"):
eval_type_int = 2
elif (self.cv_score == "roc_auc_ovr" and
self.model_type == "Multinomial"):
eval_type_int = 3
else:
raise ValueError(
"cv_score should be \"test_loss\", "
"\"roc_auc\"(for logistic), "
"\"roc_auc_ovo\"(for multinomial), or "
"\"roc_auc_ovr\"(for multinomial).")

# cv_fold_id
if cv_fold_id is None:
cv_fold_id = np.array([], dtype="int32")
Expand Down Expand Up @@ -561,7 +592,7 @@ def fit(self,
X, y, sample_weight, n, p, normalize, algorithm_type_int,
model_type_int,
self.max_iter, self.exchange_num, path_type_int,
self.is_warm_start, ic_type_int, self.ic_coef, self.cv,
self.is_warm_start, eval_type_int, self.ic_coef, self.cv,
g_index,
support_sizes, alphas, cv_fold_id, new_s_min, new_s_max,
new_lambda_min, new_lambda_max, n_lambda, self.screening_size,
Expand Down
65 changes: 39 additions & 26 deletions python/abess/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ class SparsePCA(bess_base):
- If cv>1, support size will be chosen by CV's test loss,
instead of IC.

cv_score : {'test_loss'}, optional, default='test_loss'
The score used on test data for CV.
Only 'test_loss' is supported for PCA now.

thread : int, optional, default=1
Max number of multithreads.

Expand Down Expand Up @@ -125,8 +129,9 @@ class SparsePCA(bess_base):
"""

def __init__(self, support_size=None, group=None,
ic_type="loss", ic_coef=1.0, cv=1, thread=1,
A_init=None, always_select=None,
ic_type="loss", ic_coef=1.0,
cv=1, cv_score="test_loss",
thread=1, A_init=None, always_select=None,
max_iter=20, exchange_num=5, is_warm_start=True,
splicing_type=1,
screening_size=-1,
Expand All @@ -137,7 +142,7 @@ def __init__(self, support_size=None, group=None,
max_iter=max_iter, exchange_num=exchange_num,
is_warm_start=is_warm_start, support_size=support_size,
# s_min=s_min, s_max=s_max,
ic_type=ic_type, ic_coef=ic_coef, cv=cv,
ic_type=ic_type, ic_coef=ic_coef, cv=cv, cv_score=cv_score,
screening_size=screening_size,
always_select=always_select,
thread=thread,
Expand Down Expand Up @@ -264,23 +269,31 @@ def fit(self, X=None, y=None, is_normal=False,
# model_type_int = 7
path_type_int = 1

# Ic_type
if self.ic_type == "loss":
ic_type_int = 0
elif self.ic_type == "aic":
ic_type_int = 1
elif self.ic_type == "bic":
ic_type_int = 2
elif self.ic_type == "gic":
ic_type_int = 3
elif self.ic_type == "ebic":
ic_type_int = 4
elif self.ic_type == "hic":
ic_type_int = 5
# Ic_type: aic, bic, gic, ebic
# cv_score: test_loss, roc_auc
if self.cv == 1:
if self.ic_type == "loss":
eval_type_int = 0
elif self.ic_type == "aic":
eval_type_int = 1
elif self.ic_type == "bic":
eval_type_int = 2
elif self.ic_type == "gic":
eval_type_int = 3
elif self.ic_type == "ebic":
eval_type_int = 4
elif self.ic_type == "hic":
eval_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\","
" \"gic\" or \"hic\".")
else:
raise ValueError(
"ic_type should be \"loss\", \"aic\", \"bic\","
" \"ebic\", \"gic\" or \"hic\".")
if self.cv_score == "test_loss":
eval_type_int = 0
else:
raise ValueError(
"cv_score should be \"test_loss\".")

# cv
if (not isinstance(self.cv, int) or self.cv <= 0):
Expand Down Expand Up @@ -425,7 +438,7 @@ def fit(self, X=None, y=None, is_normal=False,
n, p, normalize, Sigma,
self.max_iter, self.exchange_num,
path_type_int, self.is_warm_start,
ic_type_int, self.ic_coef, self.cv,
eval_type_int, self.ic_coef, self.cv,
g_index,
support_sizes,
cv_fold_id,
Expand Down Expand Up @@ -633,15 +646,15 @@ def fit(self, X, y=None, r=None, sparse_matrix=False):

# Ic_type
if self.ic_type == "aic":
ic_type_int = 1
eval_type_int = 1
elif self.ic_type == "bic":
ic_type_int = 2
eval_type_int = 2
elif self.ic_type == "gic":
ic_type_int = 3
eval_type_int = 3
elif self.ic_type == "ebic":
ic_type_int = 4
eval_type_int = 4
elif self.ic_type == "hic":
ic_type_int = 5
eval_type_int = 5
else:
raise ValueError(
"ic_type should be \"aic\", \"bic\", \"ebic\", \"gic\", "
Expand Down Expand Up @@ -769,7 +782,7 @@ def fit(self, X, y=None, r=None, sparse_matrix=False):
X, n, p, normalize,
self.max_iter, self.exchange_num,
path_type_int, self.is_warm_start,
ic_type_int, self.ic_coef,
eval_type_int, self.ic_coef,
g_index,
support_sizes,
alphas,
Expand Down
Loading