diff --git a/.gitignore b/.gitignore index aa0113dd0..ffe64042f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,7 @@ doc/datasets/generated/ doc/generated/ # Distribution / packaging - +mapieenv .Python env/ build/ diff --git a/AUTHORS.rst b/AUTHORS.rst index 8fc0ca2d5..c23e89db9 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -24,4 +24,5 @@ Contributors * Louis Lacombe * Arnaud Capitaine * Tarik Tazi +* Matthias Adamsen To be continued ... diff --git a/Makefile b/Makefile index 5cadb8574..a3561eef2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: tests doc build lint: - flake8 . --exclude=doc + flake8 . --exclude=doc,mapieenv type-check: mypy mapie diff --git a/mapie/classification.py b/mapie/classification.py index 815f112d7..a14344fcb 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -167,7 +167,13 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): [False False True]] """ - valid_methods_ = ["naive", "score", "cumulated_score", "top_k", "raps"] + valid_methods_ = ["naive", + "score", + "cumulated_score", + "top_k", + "raps", + "mondrian" + ] fit_attributes = [ "single_estimator_", "estimators_", @@ -1003,7 +1009,11 @@ def fit( X, y = indexable(X, y) y = _check_y(y) - assert type_of_target(y) == "multiclass" + if type_of_target(y) != "multiclass" and self.method != "mondrian": + raise ValueError( + "Invalid method. " + "Binary classification problems require the mondrian method. " + ) sample_weight, X, y = check_null_weight(sample_weight, X, y) y = cast(NDArray, y) n_samples = _num_samples(y) @@ -1077,6 +1087,9 @@ def fit( self.conformity_scores_ = np.take_along_axis( 1 - y_pred_proba, y.reshape(-1, 1), axis=1 ) + elif self.method == "mondrian": + self.conformity_scores_ = 1-y_pred_proba + elif self.method in ["cumulated_score", "raps"]: self.conformity_scores_, self.cutoff = ( self._get_true_label_cumsum_proba( @@ -1255,11 +1268,20 @@ def predict( self.conformity_scores_regularized, alpha_np ) + elif self.method == "mondrian": + + self.quantiles_ = compute_quantiles( + self.conformity_scores_, + alpha_np, + mondrian=True + ) else: + self.quantiles_ = compute_quantiles( self.conformity_scores_, alpha_np ) + else: self.quantiles_ = (n + 1) * (1 - alpha_np) @@ -1352,6 +1374,14 @@ def predict( - y_pred_proba_last, -EPSILON ) + elif self.method == "mondrian": + self.quantiles_ = np.transpose(self.quantiles_, [1, 0]) + + prediction_sets = np.greater_equal( + y_pred_proba - (1 - self.quantiles_), + -EPSILON + ) + else: raise ValueError( "Invalid method. " diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 510f11010..96faf502f 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -22,7 +22,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -METHODS = ["score", "cumulated_score", "raps"] +METHODS = ["score", "cumulated_score", "raps", "mondrian"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] WRONG_INCLUDE_LABELS = ["randomised", "True", "False", "other", 1, 2.5, (1, 2)] Y_PRED_PROBA_WRONG = [ @@ -263,6 +263,17 @@ agg_scores="mean" ) ), + "mondrian": ( + Params( + method="mondrian", + cv="prefit", + random_state=None + ), + ParamsPredict( + include_last_label="randomized", + agg_scores="mean" + ) + ), } COVERAGES = { @@ -281,7 +292,8 @@ "naive": 5 / 9, "top_k": 1, "raps": 1, - "raps_randomized": 8/9 + "raps_randomized": 8/9, + "mondrian": 1 } X_toy = np.arange(9).reshape(-1, 1) @@ -464,6 +476,17 @@ [False, True, True], [False, False, True], ], + "mondrian": [ + [True, False, False], + [True, False, False], + [True, True, False], + [True, True, True], + [True, True, True], + [True, True, True], + [False, True, True], + [False, True, True], + [False, False, True], + ], } REGULARIZATION_PARAMETERS = [ @@ -865,6 +888,7 @@ def test_toy_dataset_predictions(strategy: str) -> None: include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"] ) + np.testing.assert_allclose(y_ps[:, :, 0], y_toy_mapie[strategy]) np.testing.assert_allclose( classification_coverage_score(y_toy, y_ps[:, :, 0]), diff --git a/mapie/utils.py b/mapie/utils.py index a19be25c5..62549ccbe 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -616,7 +616,9 @@ def check_alpha_and_last_axis(vector: NDArray, alpha_np: NDArray): return vector, alpha_np -def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray: +def compute_quantiles(vector: NDArray, + alpha: NDArray, + mondrian=False) -> NDArray: """Compute the desired quantiles of a vector. Parameters @@ -634,15 +636,23 @@ def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray: Quantiles of the vector. """ n = len(vector) - if len(vector.shape) <= 2: + if len(vector.shape) <= 2 and not mondrian: + quantiles_ = np.stack([ + np_quantile( + vector, + ((n + 1) * (1 - _alpha)) / n, + method="higher" + ) for _alpha in alpha + ]) + elif len(vector.shape) <= 2 and mondrian: quantiles_ = np.stack([ np_quantile( vector, ((n + 1) * (1 - _alpha)) / n, method="higher", + axis=0 ) for _alpha in alpha ]) - else: check_alpha_and_last_axis(vector, alpha) quantiles_ = np.stack(