From 51fb5e0a19e784ed22bd1867db923de845c76e08 Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 26 Oct 2022 14:52:10 +0200 Subject: [PATCH 01/10] Progress on adding the mondrian conformal --- .gitignore | 2 +- mapie/classification.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index aa0113dd0..ffe64042f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,7 @@ doc/datasets/generated/ doc/generated/ # Distribution / packaging - +mapieenv .Python env/ build/ diff --git a/mapie/classification.py b/mapie/classification.py index 815f112d7..ccadf41a5 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1003,7 +1003,11 @@ def fit( X, y = indexable(X, y) y = _check_y(y) - assert type_of_target(y) == "multiclass" + if type_of_target(y) != "multiclass" and self.method != "mondrian": + raise ValueError( + "Invalid method. " + "Binary classification problems require the mondrian method. " + ) sample_weight, X, y = check_null_weight(sample_weight, X, y) y = cast(NDArray, y) n_samples = _num_samples(y) @@ -1352,6 +1356,7 @@ def predict( - y_pred_proba_last, -EPSILON ) + elif self. else: raise ValueError( "Invalid method. " From cb84ddf42f89d4231b158af8c5eb901c0a7deb7f Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 26 Oct 2022 15:26:49 +0200 Subject: [PATCH 02/10] v 0.1.0 of mondrian finished --- mapie/classification.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index ccadf41a5..1ee629998 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -167,7 +167,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): [False False True]] """ - valid_methods_ = ["naive", "score", "cumulated_score", "top_k", "raps"] + valid_methods_ = ["naive", "score", "cumulated_score", "top_k", "raps","mondrian"] fit_attributes = [ "single_estimator_", "estimators_", @@ -1081,6 +1081,9 @@ def fit( self.conformity_scores_ = np.take_along_axis( 1 - y_pred_proba, y.reshape(-1, 1), axis=1 ) + elif self.method == "mondrian": + self.conformity_scores_ = 1-y_pred_proba + elif self.method in ["cumulated_score", "raps"]: self.conformity_scores_, self.cutoff = ( self._get_true_label_cumsum_proba( @@ -1227,7 +1230,7 @@ def predict( check_alpha_and_n_samples(alpha_np, n) if self.method == "naive": - self.quantiles_ = 1 - alpha_np + self.quantiles_ = 1 - alpha_np else: if (cv == "prefit") or (agg_scores in ["mean"]): if self.method == "raps": @@ -1356,7 +1359,12 @@ def predict( - y_pred_proba_last, -EPSILON ) - elif self. + elif self.method == "mondrian": #TODO: + prediction_sets = np.greater_equal( + y_pred_proba,self.quantiles_ + ) + + else: raise ValueError( "Invalid method. " From ae8b009eaf54ffb383d7c5f48cad5c43c9405a41 Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 2 Nov 2022 09:37:01 +0100 Subject: [PATCH 03/10] Added mondrian --- mapie/classification.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mapie/classification.py b/mapie/classification.py index 1ee629998..022239454 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1263,10 +1263,12 @@ def predict( alpha_np ) else: + print("1") self.quantiles_ = compute_quantiles( self.conformity_scores_, alpha_np ) + print("2") else: self.quantiles_ = (n + 1) * (1 - alpha_np) From ae8f4b00d433cbbf9152a514403f192b595692f7 Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 2 Nov 2022 10:18:16 +0100 Subject: [PATCH 04/10] Finished implementing the mondrian conformal --- mapie/classification.py | 12 ++++++++++-- mapie/utils.py | 14 +++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index 022239454..cec225c97 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1262,13 +1262,19 @@ def predict( self.conformity_scores_regularized, alpha_np ) + elif self.method == "mondrian": + self.quantiles_ = compute_quantiles( + self.conformity_scores_, + alpha_np, + mondrian = True + ) else: - print("1") + self.quantiles_ = compute_quantiles( self.conformity_scores_, alpha_np ) - print("2") + else: self.quantiles_ = (n + 1) * (1 - alpha_np) @@ -1362,6 +1368,8 @@ def predict( -EPSILON ) elif self.method == "mondrian": #TODO: + print(y_pred_proba) + print(y_pred_proba.shape) prediction_sets = np.greater_equal( y_pred_proba,self.quantiles_ ) diff --git a/mapie/utils.py b/mapie/utils.py index a19be25c5..e6ed5a8af 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -616,7 +616,7 @@ def check_alpha_and_last_axis(vector: NDArray, alpha_np: NDArray): return vector, alpha_np -def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray: +def compute_quantiles(vector: NDArray, alpha: NDArray,mondrian = False) -> NDArray: """Compute the desired quantiles of a vector. Parameters @@ -634,15 +634,23 @@ def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray: Quantiles of the vector. """ n = len(vector) - if len(vector.shape) <= 2: + if len(vector.shape) <= 2 and not mondrian: + quantiles_ = np.stack([ + np_quantile( + vector, + ((n + 1) * (1 - _alpha)) / n, + method="higher" + ) for _alpha in alpha + ]) + elif len(vector.shape) <= 2 and mondrian: quantiles_ = np.stack([ np_quantile( vector, ((n + 1) * (1 - _alpha)) / n, method="higher", + axis=1 ) for _alpha in alpha ]) - else: check_alpha_and_last_axis(vector, alpha) quantiles_ = np.stack( From 48f132770e0fd29b94087f2d52a0ce3070a67841 Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 2 Nov 2022 11:04:41 +0100 Subject: [PATCH 05/10] 'Pride cometh before the fall' The actual implementation of the mondrian conformal now complete --- mapie/classification.py | 15 +++++++++++---- mapie/utils.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index cec225c97..c535c9be4 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1263,6 +1263,7 @@ def predict( alpha_np ) elif self.method == "mondrian": + self.quantiles_ = compute_quantiles( self.conformity_scores_, alpha_np, @@ -1281,6 +1282,10 @@ def predict( # Build prediction sets if self.method == "score": if (cv == "prefit") or (agg_scores == "mean"): + print("y proba shape") + print(y_pred_proba.shape) + print("quantiles shape") + print(self.quantiles_.shape) prediction_sets = np.greater_equal( y_pred_proba - (1 - self.quantiles_), -EPSILON ) @@ -1368,11 +1373,12 @@ def predict( -EPSILON ) elif self.method == "mondrian": #TODO: - print(y_pred_proba) - print(y_pred_proba.shape) - prediction_sets = np.greater_equal( + self.quantiles_ = np.transpose(self.quantiles_,[1,0]) + + prediction_sets = (np.greater_equal( y_pred_proba,self.quantiles_ - ) + )) + else: @@ -1380,4 +1386,5 @@ def predict( "Invalid method. " "Allowed values are 'score' or 'cumulated_score'." ) + print(prediction_sets) return y_pred, prediction_sets diff --git a/mapie/utils.py b/mapie/utils.py index e6ed5a8af..849b8d52c 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -648,7 +648,7 @@ def compute_quantiles(vector: NDArray, alpha: NDArray,mondrian = False) -> NDArr vector, ((n + 1) * (1 - _alpha)) / n, method="higher", - axis=1 + axis=0 ) for _alpha in alpha ]) else: From 917c06becdf85ff8b0586577b6db41c3a080d7c1 Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 2 Nov 2022 11:09:44 +0100 Subject: [PATCH 06/10] removed spurious prints --- mapie/classification.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index c535c9be4..f2f75ae88 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1282,10 +1282,6 @@ def predict( # Build prediction sets if self.method == "score": if (cv == "prefit") or (agg_scores == "mean"): - print("y proba shape") - print(y_pred_proba.shape) - print("quantiles shape") - print(self.quantiles_.shape) prediction_sets = np.greater_equal( y_pred_proba - (1 - self.quantiles_), -EPSILON ) @@ -1375,9 +1371,9 @@ def predict( elif self.method == "mondrian": #TODO: self.quantiles_ = np.transpose(self.quantiles_,[1,0]) - prediction_sets = (np.greater_equal( - y_pred_proba,self.quantiles_ - )) + prediction_sets = np.greater_equal( + y_pred_proba - (1 - self.quantiles_), -EPSILON + ) @@ -1386,5 +1382,4 @@ def predict( "Invalid method. " "Allowed values are 'score' or 'cumulated_score'." ) - print(prediction_sets) return y_pred, prediction_sets From 53ec51d4d53db5030bcb70366f6deed1eebdb77f Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 16 Nov 2022 09:40:36 +0100 Subject: [PATCH 07/10] Brought changes in line with flake8 formatting --- Makefile | 2 +- mapie/classification.py | 27 ++++++++++++++++----------- mapie/tests/test_classification.py | 13 ++++++++++++- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 5cadb8574..4a58860e6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: tests doc build lint: - flake8 . --exclude=doc + flake8 . --exclude=doc mapieenv type-check: mypy mapie diff --git a/mapie/classification.py b/mapie/classification.py index f2f75ae88..a14344fcb 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -167,7 +167,13 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): [False False True]] """ - valid_methods_ = ["naive", "score", "cumulated_score", "top_k", "raps","mondrian"] + valid_methods_ = ["naive", + "score", + "cumulated_score", + "top_k", + "raps", + "mondrian" + ] fit_attributes = [ "single_estimator_", "estimators_", @@ -1230,7 +1236,7 @@ def predict( check_alpha_and_n_samples(alpha_np, n) if self.method == "naive": - self.quantiles_ = 1 - alpha_np + self.quantiles_ = 1 - alpha_np else: if (cv == "prefit") or (agg_scores in ["mean"]): if self.method == "raps": @@ -1267,15 +1273,15 @@ def predict( self.quantiles_ = compute_quantiles( self.conformity_scores_, alpha_np, - mondrian = True + mondrian=True ) else: - + self.quantiles_ = compute_quantiles( self.conformity_scores_, alpha_np ) - + else: self.quantiles_ = (n + 1) * (1 - alpha_np) @@ -1368,15 +1374,14 @@ def predict( - y_pred_proba_last, -EPSILON ) - elif self.method == "mondrian": #TODO: - self.quantiles_ = np.transpose(self.quantiles_,[1,0]) + elif self.method == "mondrian": + self.quantiles_ = np.transpose(self.quantiles_, [1, 0]) prediction_sets = np.greater_equal( - y_pred_proba - (1 - self.quantiles_), -EPSILON + y_pred_proba - (1 - self.quantiles_), + -EPSILON ) - - - + else: raise ValueError( "Invalid method. " diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 510f11010..4cc52fe79 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -22,7 +22,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -METHODS = ["score", "cumulated_score", "raps"] +METHODS = ["score", "cumulated_score", "raps","mondrian"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] WRONG_INCLUDE_LABELS = ["randomised", "True", "False", "other", 1, 2.5, (1, 2)] Y_PRED_PROBA_WRONG = [ @@ -263,6 +263,17 @@ agg_scores="mean" ) ), + "mondrian": ( + Params( + method="mondrian", + cv="prefit", + random_state=None + ), + ParamsPredict( + include_last_label="randomized", + agg_scores="mean" + ) + ), } COVERAGES = { From 1b0e00deb64062aa60dcd4640883f55f100b6adc Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 16 Nov 2022 11:23:13 +0100 Subject: [PATCH 08/10] adding tests for the mondrian method. --- Makefile | 2 +- mapie/tests/test_classification.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 4a58860e6..a3561eef2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: tests doc build lint: - flake8 . --exclude=doc mapieenv + flake8 . --exclude=doc,mapieenv type-check: mypy mapie diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 4cc52fe79..ff45ace03 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -292,7 +292,8 @@ "naive": 5 / 9, "top_k": 1, "raps": 1, - "raps_randomized": 8/9 + "raps_randomized": 8/9, + "mondrian": 1 } X_toy = np.arange(9).reshape(-1, 1) @@ -475,6 +476,17 @@ [False, True, True], [False, False, True], ], + "mondrian": [ + [True, False, False], + [True, False, False], + [True, True, False], + [True, True, True], + [True, True, True], + [True, True, True], + [False, True, True], + [False, True, True], + [False, False, True], + ], } REGULARIZATION_PARAMETERS = [ @@ -875,7 +887,8 @@ def test_toy_dataset_predictions(strategy: str) -> None: alpha=0.5, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"] - ) + ) + np.testing.assert_allclose(y_ps[:, :, 0], y_toy_mapie[strategy]) np.testing.assert_allclose( classification_coverage_score(y_toy, y_ps[:, :, 0]), From caa9de6d2785499d9557e76bd58c67f280081f6f Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 16 Nov 2022 12:28:26 +0100 Subject: [PATCH 09/10] Now the linting tests pass.. --- mapie/tests/test_classification.py | 4 ++-- mapie/utils.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index ff45ace03..96faf502f 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -22,7 +22,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -METHODS = ["score", "cumulated_score", "raps","mondrian"] +METHODS = ["score", "cumulated_score", "raps", "mondrian"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] WRONG_INCLUDE_LABELS = ["randomised", "True", "False", "other", 1, 2.5, (1, 2)] Y_PRED_PROBA_WRONG = [ @@ -887,7 +887,7 @@ def test_toy_dataset_predictions(strategy: str) -> None: alpha=0.5, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"] - ) + ) np.testing.assert_allclose(y_ps[:, :, 0], y_toy_mapie[strategy]) np.testing.assert_allclose( diff --git a/mapie/utils.py b/mapie/utils.py index 849b8d52c..62549ccbe 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -616,7 +616,9 @@ def check_alpha_and_last_axis(vector: NDArray, alpha_np: NDArray): return vector, alpha_np -def compute_quantiles(vector: NDArray, alpha: NDArray,mondrian = False) -> NDArray: +def compute_quantiles(vector: NDArray, + alpha: NDArray, + mondrian=False) -> NDArray: """Compute the desired quantiles of a vector. Parameters From a569c399f5b4a62398ac6d35927655ebaf81f616 Mon Sep 17 00:00:00 2001 From: mata Date: Wed, 25 Jan 2023 10:46:29 +0100 Subject: [PATCH 10/10] Updated the Authors.rst file --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 8fc0ca2d5..c23e89db9 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -24,4 +24,5 @@ Contributors * Louis Lacombe * Arnaud Capitaine * Tarik Tazi +* Matthias Adamsen To be continued ...