diff --git a/.github/workflows/pytest_pipeline.yml b/.github/workflows/pytest_pipeline.yml new file mode 100644 index 0000000..7aa071f --- /dev/null +++ b/.github/workflows/pytest_pipeline.yml @@ -0,0 +1,35 @@ +name: Pytest - ImputeGAP - Pipeline - 12 + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libmlpack-dev + sudo apt-get install libopenblas-dev + sudo apt-get install python3-dev build-essential + pip install --upgrade google protobuf + pip install -r requirements.txt + pip install mypy + pip install pytest + + + - name: Run pytest + run: | + python -m pytest ./tests/test_pipeline.py \ No newline at end of file diff --git a/.github/workflows/pytest_pipeline_10.yml b/.github/workflows/pytest_pipeline_10.yml new file mode 100644 index 0000000..baf03da --- /dev/null +++ b/.github/workflows/pytest_pipeline_10.yml @@ -0,0 +1,35 @@ +name: Pytest - ImputeGAP - Pipeline - 10 + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libmlpack-dev + sudo apt-get install libopenblas-dev + sudo apt-get install python3-dev build-essential + pip install --upgrade google protobuf + pip install -r requirements.txt + pip install mypy + pip install pytest + + + - name: Run pytest + run: | + python -m pytest ./tests/test_pipeline.py \ No newline at end of file diff --git a/.github/workflows/pytest_pipeline_12_6.yml b/.github/workflows/pytest_pipeline_12_6.yml new file mode 100644 index 0000000..159f552 --- /dev/null +++ b/.github/workflows/pytest_pipeline_12_6.yml @@ -0,0 +1,35 @@ +name: Pytest - ImputeGAP - Pipeline - 12.6 + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12.6' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libmlpack-dev + sudo apt-get install libopenblas-dev + sudo apt-get install python3-dev build-essential + pip install --upgrade google protobuf + pip install -r requirements.txt + pip install mypy + pip install pytest + + + - name: Run pytest + run: | + python -m pytest ./tests/test_pipeline.py \ No newline at end of file diff --git a/.github/workflows/pytest_pipeline_12_8.yml b/.github/workflows/pytest_pipeline_12_8.yml new file mode 100644 index 0000000..3559e20 --- /dev/null +++ b/.github/workflows/pytest_pipeline_12_8.yml @@ -0,0 +1,35 @@ +name: Pytest - ImputeGAP - Pipeline - 12.8 + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12.6' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libmlpack-dev + sudo apt-get install libopenblas-dev + sudo apt-get install python3-dev build-essential + pip install --upgrade google protobuf + pip install -r requirements.txt + pip install mypy + pip install pytest + + + - name: Run pytest + run: | + python -m pytest ./tests/test_pipeline.py \ No newline at end of file diff --git a/.github/workflows/pytest_pipeline_13.yml b/.github/workflows/pytest_pipeline_13.yml new file mode 100644 index 0000000..b90479e --- /dev/null +++ b/.github/workflows/pytest_pipeline_13.yml @@ -0,0 +1,35 @@ +name: Pytest - ImputeGAP - Pipeline - 13 + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + lfs: true + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.13' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libmlpack-dev + sudo apt-get install libopenblas-dev + sudo apt-get install python3-dev build-essential + pip install --upgrade google protobuf + pip install -r requirements.txt + pip install mypy + pip install pytest + + + - name: Run pytest + run: | + python -m pytest ./tests/test_pipeline.py \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 946795b..bc4e1f2 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,8 +2,150 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - @@ -72,8 +214,8 @@ - - + + - + - + - + - + - - - - - + + + + + - - - - - + + + + + @@ -270,7 +433,17 @@ - + + + + + + + + + + + @@ -298,23 +471,26 @@ - + + + - + - - + + + - + diff --git a/README.md b/README.md index cffa72b..d40113c 100644 --- a/README.md +++ b/README.md @@ -89,8 +89,8 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"), max_series=5, max_values= ts_1.normalize(normalizer="z_score") # [OPTIONAL] you can plot your raw data / print the information -ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets") -ts_1.print(limit=10) +ts_1.plot(input_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets") +ts_1.print(limit_series=10) ``` @@ -116,10 +116,10 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") # 3. contamination of the data with MCAR scenario -infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True) +infected_data = ts_1.Contamination.mcar(ts_1.data, series_rate=0.4, missing_rate=0.2, use_seed=True) # [OPTIONAL] you can plot your raw data / print the contamination -ts_1.print(limit=10) +ts_1.print(limit_series=10) ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./imputegap/assets") ``` @@ -146,7 +146,7 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +infected_data = ts_1.Contamination.mcar(ts_1.data) # 4. imputation of the contaminated data # choice of the algorithm, and their parameters (default, automl, or defined by the user) @@ -190,20 +190,20 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +infected_data = ts_1.Contamination.mcar(ts_1.data) # 4. imputation of the contaminated data # imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm -cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, - "optimizer": "bayesian", - "options": {"n_calls": 5}}) +cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_def=False, params={"ground_truth": ts_1.data, + "optimizer": "bayesian", + "options": {"n_calls": 5}}) # 5. score the imputation with the raw_data -cdrec.score(ts_1.data, cdrec.imputed_matrix) +cdrec.score(ts_1.data, cdrec.recov_data) # 6. [OPTIONAL] display the results ts_1.print_results(cdrec.metrics) -ts_1.plot(raw_data=ts_1.data, infected_data=infected_data, imputed_data=cdrec.imputed_matrix, title="imputation", +ts_1.plot(input_data=ts_1.data, incomp_data=infected_data, imputed_data=cdrec.recov_data, title="imputation", max_series=1, save_path="./imputegap/assets", display=True) # 7. [OPTIONAL] save hyperparameters @@ -234,7 +234,7 @@ ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("eeg-alcohol")) # 3. call the explanation of your dataset with a specific algorithm to gain insight on the Imputation results -shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec") +shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec") # [OPTIONAL] print the results with the impact of each feature. Explainer.print(shap_values, shap_details) diff --git a/build/lib/imputegap/algorithms/cdrec.py b/build/lib/imputegap/algorithms/cdrec.py index 93f6deb..86e92fd 100644 --- a/build/lib/imputegap/algorithms/cdrec.py +++ b/build/lib/imputegap/algorithms/cdrec.py @@ -101,13 +101,13 @@ def native_cdrec(__py_matrix, __py_rank, __py_epsilon, __py_iterations): return __py_imputed_matrix; -def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None): +def cdrec(incomp_data, truncation_rank, iterations, epsilon, logs=True, lib_path=None): """ CDRec algorithm for matrix imputation of missing values using Centroid Decomposition. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input matrix with contamination (missing values represented as NaNs). truncation_rank : int The truncation rank for matrix decomposition (must be greater than 1 and smaller than the number of series). @@ -127,18 +127,18 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_pa Example ------- - >>> imputed_data = cdrec(contamination=contamination_matrix, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True) - >>> print(imputed_data) + >>> recov_data = cdrec(incomp_data=incomp_data, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True) + >>> print(recov_data) """ start_time = time.time() # Record start time # Call the C++ function to perform recovery - imputed_matrix = native_cdrec(contamination, truncation_rank, epsilon, iterations) + recov_data = native_cdrec(incomp_data, truncation_rank, epsilon, iterations) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation cdrec - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/cpp_integration.py b/build/lib/imputegap/algorithms/cpp_integration.py index b9e1f7c..ec302ee 100644 --- a/build/lib/imputegap/algorithms/cpp_integration.py +++ b/build/lib/imputegap/algorithms/cpp_integration.py @@ -119,11 +119,11 @@ def your_algo(contamination, param, logs=True): start_time = time.time() # Record start time # Call the C++ function to perform recovery - imputed_matrix = native_algo(contamination, param) + recov_data = native_algo(contamination, param) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation algo - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/iim.py b/build/lib/imputegap/algorithms/iim.py index 485b04b..2ac16dc 100644 --- a/build/lib/imputegap/algorithms/iim.py +++ b/build/lib/imputegap/algorithms/iim.py @@ -2,13 +2,13 @@ from imputegap.wrapper.AlgoPython.IIM.testerIIM import impute_with_algorithm -def iim(contamination, number_neighbor, algo_code, logs=True): +def iim(incomp_data, number_neighbor, algo_code, logs=True): """ Perform imputation using the Iterative Imputation Method (IIM) algorithm. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input matrix with contamination (missing values represented as NaNs). number_neighbor : int The number of neighbors to use for the K-Nearest Neighbors (KNN) classifier (default is 10). @@ -31,8 +31,8 @@ def iim(contamination, number_neighbor, algo_code, logs=True): Example ------- - >>> imputed_data = iim(contamination_matrix, number_neighbor=10, algo_code="iim 2") - >>> print(imputed_data) + >>> recov_data = iim(incomp_data, number_neighbor=10, algo_code="iim 2") + >>> print(recov_data) References ---------- @@ -41,10 +41,10 @@ def iim(contamination, number_neighbor, algo_code, logs=True): """ start_time = time.time() # Record start time - imputed_matrix = impute_with_algorithm(algo_code, contamination.copy(), number_neighbor) + recov_data = impute_with_algorithm(algo_code, incomp_data.copy(), number_neighbor) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation iim - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/mean_impute.py b/build/lib/imputegap/algorithms/mean_impute.py index c5d8b3b..58fbe66 100644 --- a/build/lib/imputegap/algorithms/mean_impute.py +++ b/build/lib/imputegap/algorithms/mean_impute.py @@ -1,13 +1,13 @@ import numpy as np -def mean_impute(contamination, params=None): +def mean_impute(incomp_data, params=None): """ Impute NaN values with the mean value of the time series. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input time series with contamination (missing values represented as NaNs). params : dict, optional Optional parameters for the algorithm. If None, the minimum value from the contamination is used (default is None). @@ -24,18 +24,18 @@ def mean_impute(contamination, params=None): Example ------- - >>> contamination = np.array([[5, 2, np.nan], [3, np.nan, 6]]) - >>> imputed_matrix = mean_impute(contamination) - >>> print(imputed_matrix) + >>> incomp_data = np.array([[5, 2, np.nan], [3, np.nan, 6]]) + >>> recov_data = mean_impute(incomp_data) + >>> print(recov_data) array([[5., 2., 4.], [3., 4., 6.]]) """ # logic - mean_value = np.nanmean(contamination) + mean_value = np.nanmean(incomp_data) # Imputation - imputed_matrix = np.nan_to_num(contamination, nan=mean_value) + recov_data = np.nan_to_num(incomp_data, nan=mean_value) - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/min_impute.py b/build/lib/imputegap/algorithms/min_impute.py index adf782c..c6b56d6 100644 --- a/build/lib/imputegap/algorithms/min_impute.py +++ b/build/lib/imputegap/algorithms/min_impute.py @@ -1,13 +1,13 @@ import numpy as np -def min_impute(contamination, params=None): +def min_impute(incomp_data, params=None): """ Impute NaN values with the minimum value of the time series. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input time series with contamination (missing values represented as NaNs). params : dict, optional Optional parameters for the algorithm. If None, the minimum value from the contamination is used (default is None). @@ -24,18 +24,18 @@ def min_impute(contamination, params=None): Example ------- - >>> contamination = np.array([[1, 2, np.nan], [4, np.nan, 6]]) - >>> imputed_matrix = min_impute(contamination) - >>> print(imputed_matrix) + >>> incomp_data = np.array([[1, 2, np.nan], [4, np.nan, 6]]) + >>> recov_data = min_impute(incomp_data) + >>> print(recov_data) array([[1., 2., 1.], [4., 1., 6.]]) """ # logic - min_value = np.nanmin(contamination) + min_value = np.nanmin(incomp_data) # Imputation - imputed_matrix = np.nan_to_num(contamination, nan=min_value) + recov_data = np.nan_to_num(incomp_data, nan=min_value) - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/mrnn.py b/build/lib/imputegap/algorithms/mrnn.py index ed4eb32..3b43b58 100644 --- a/build/lib/imputegap/algorithms/mrnn.py +++ b/build/lib/imputegap/algorithms/mrnn.py @@ -2,13 +2,13 @@ from imputegap.wrapper.AlgoPython.MRNN.testerMRNN import mrnn_recov -def mrnn(contamination, hidden_dim, learning_rate, iterations, sequence_length, logs=True): +def mrnn(incomp_data, hidden_dim, learning_rate, iterations, sequence_length, logs=True): """ Perform imputation using the Multivariate Recurrent Neural Network (MRNN) algorithm. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input matrix with contamination (missing values represented as NaNs). hidden_dim : int The number of hidden dimensions in the MRNN model. @@ -34,8 +34,8 @@ def mrnn(contamination, hidden_dim, learning_rate, iterations, sequence_length, Example ------- - >>> imputed_data = mrnn(contamination_matrix, hidden_dim=64, learning_rate=0.001, iterations=1000, sequence_length=7) - >>> print(imputed_data) + >>> recov_data = mrnn(incomp_data, hidden_dim=64, learning_rate=0.001, iterations=1000, sequence_length=7) + >>> print(recov_data) References ---------- @@ -43,11 +43,11 @@ def mrnn(contamination, hidden_dim, learning_rate, iterations, sequence_length, """ start_time = time.time() # Record start time - imputed_matrix = mrnn_recov(matrix_in=contamination, hidden_dim=hidden_dim, learning_rate=learning_rate, - iterations=iterations, seq_length=sequence_length) + recov_data = mrnn_recov(matrix_in=incomp_data, hidden_dim=hidden_dim, learning_rate=learning_rate, + iterations=iterations, seq_length=sequence_length) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation mrnn - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/stmvl.py b/build/lib/imputegap/algorithms/stmvl.py index c97272b..93bf409 100644 --- a/build/lib/imputegap/algorithms/stmvl.py +++ b/build/lib/imputegap/algorithms/stmvl.py @@ -76,8 +76,8 @@ def native_stmvl(__py_matrix, __py_window, __py_gamma, __py_alpha): Example ------- - >>> imputed_data = stmvl(contamination=contamination_matrix, window_size=2, gamma=0.85, alpha=7) - >>> print(imputed_data) + >>> recov_data = stmvl(incomp_data=incomp_data, window_size=2, gamma=0.85, alpha=7) + >>> print(recov_data) References ---------- @@ -120,28 +120,28 @@ def native_stmvl(__py_matrix, __py_window, __py_gamma, __py_alpha): return __py_recovered; -def stmvl(contamination, window_size, gamma, alpha, logs=True): +def stmvl(incomp_data, window_size, gamma, alpha, logs=True): """ CDREC algorithm for imputation of missing data :author: Quentin Nater - :param contamination: time series with contamination + :param incomp_data: time series with contamination :param window_size: window size for temporal component :param gamma: smoothing parameter for temporal weight :param alpha: power for spatial weight :param logs: print logs of time execution - :return: imputed_matrix, metrics : all time series with imputation data and their metrics + :return: recov_data, metrics : all time series with imputation data and their metrics """ start_time = time.time() # Record start time # Call the C++ function to perform recovery - imputed_matrix = native_stmvl(contamination, window_size, gamma, alpha) + recov_data = native_stmvl(incomp_data, window_size, gamma, alpha) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation stvml - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/algorithms/zero_impute.py b/build/lib/imputegap/algorithms/zero_impute.py index e6a64d3..da456e4 100644 --- a/build/lib/imputegap/algorithms/zero_impute.py +++ b/build/lib/imputegap/algorithms/zero_impute.py @@ -1,13 +1,13 @@ import numpy as np -def zero_impute(contamination, params=None): +def zero_impute(incomp_data, params=None): """ Impute missing values (NaNs) with zeros in the time series. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input time series matrix with missing values represented as NaNs. params : dict, optional Optional parameters for the algorithm. This is not used in the current implementation but can be passed for future extensions (default is None). @@ -23,14 +23,14 @@ def zero_impute(contamination, params=None): Example ------- - >>> contamination = np.array([[1, 2, np.nan], [4, np.nan, 6]]) - >>> imputed_matrix = zero_impute(contamination) - >>> print(imputed_matrix) + >>> incomp_data = np.array([[1, 2, np.nan], [4, np.nan, 6]]) + >>> recov_data = zero_impute(incomp_data) + >>> print(recov_data) array([[1., 2., 0.], [4., 0., 6.]]) :author: Quentin Nater """ - imputed_matrix = np.nan_to_num(contamination, nan=0) + recov_data = np.nan_to_num(incomp_data, nan=0) - return imputed_matrix + return recov_data diff --git a/build/lib/imputegap/assets/25_01_06_14_32_52_plot.jpg b/build/lib/imputegap/assets/25_01_06_14_32_52_plot.jpg new file mode 100644 index 0000000..bd7b4b8 Binary files /dev/null and b/build/lib/imputegap/assets/25_01_06_14_32_52_plot.jpg differ diff --git a/build/lib/imputegap/assets/logo_imputegab.png b/build/lib/imputegap/assets/logo_imputegab.png new file mode 100644 index 0000000..7a9473b Binary files /dev/null and b/build/lib/imputegap/assets/logo_imputegab.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png index 1b94847..fc1bed5 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png index 826dda3..ab7d8fc 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_results.txt b/build/lib/imputegap/assets/shap/chlorine_cdrec_results.txt index 2265b48..015b837 100644 --- a/build/lib/imputegap/assets/shap/chlorine_cdrec_results.txt +++ b/build/lib/imputegap/assets/shap/chlorine_cdrec_results.txt @@ -1,22 +1,22 @@ -Feature : 1 cdrec with a score of 90.54 Geometry 10-bin histogram mode DN_HistogramMode_10 -Feature : 12 cdrec with a score of 3.99 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat -Feature : 5 cdrec with a score of 3.83 Correlation Time reversibility CO_trev_1_num -Feature : 18 cdrec with a score of 0.57 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 -Feature : 13 cdrec with a score of 0.37 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd -Feature : 3 cdrec with a score of 0.33 Correlation First minimum of the ACF CO_FirstMin_ac -Feature : 14 cdrec with a score of 0.29 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd -Feature : 6 cdrec with a score of 0.09 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40 -Feature : 0 cdrec with a score of 0.0 Geometry 5-bin histogram mode DN_HistogramMode_5 -Feature : 2 cdrec with a score of 0.0 Correlation First 1/e crossing of the ACF CO_f1ecac -Feature : 4 cdrec with a score of 0.0 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5 +Feature : 6 cdrec with a score of 58.09 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40 +Feature : 5 cdrec with a score of 9.13 Correlation Time reversibility CO_trev_1_num +Feature : 2 cdrec with a score of 6.2 Correlation First 1/e crossing of the ACF CO_f1ecac +Feature : 15 cdrec with a score of 5.34 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1 +Feature : 10 cdrec with a score of 4.95 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff +Feature : 1 cdrec with a score of 3.76 Geometry 10-bin histogram mode DN_HistogramMode_10 +Feature : 12 cdrec with a score of 3.39 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat +Feature : 0 cdrec with a score of 2.41 Geometry 5-bin histogram mode DN_HistogramMode_5 +Feature : 17 cdrec with a score of 2.32 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh +Feature : 21 cdrec with a score of 2.24 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr +Feature : 4 cdrec with a score of 1.42 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5 +Feature : 8 cdrec with a score of 0.38 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov +Feature : 13 cdrec with a score of 0.36 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd +Feature : 14 cdrec with a score of 0.01 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd +Feature : 3 cdrec with a score of 0.0 Correlation First minimum of the ACF CO_FirstMin_ac Feature : 7 cdrec with a score of 0.0 Geometry Longest stretch of above-mean values SB_BinaryStats_mean_longstretch1 -Feature : 8 cdrec with a score of 0.0 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov Feature : 9 cdrec with a score of 0.0 Trend Wangs periodicity metric PD_PeriodicityWang_th0_01 -Feature : 10 cdrec with a score of 0.0 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff Feature : 11 cdrec with a score of 0.0 Correlation First minimum of the AMI function IN_AutoMutualInfoStats_40_gaussian_fmmi -Feature : 15 cdrec with a score of 0.0 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1 Feature : 16 cdrec with a score of 0.0 Geometry Longest stretch of decreasing values SB_BinaryStats_diff_longstretch0 -Feature : 17 cdrec with a score of 0.0 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh +Feature : 18 cdrec with a score of 0.0 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 Feature : 19 cdrec with a score of 0.0 Geometry Detrended fluctuation analysis (low-scale scaling) SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1 Feature : 20 cdrec with a score of 0.0 Transformation Centroid frequency SP_Summaries_welch_rect_centroid -Feature : 21 cdrec with a score of 0.0 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png index 136ae00..d127afd 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png index 842ad10..89e7fbf 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png index e83e663..b5f67ad 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png index f0eca2d..4b01c30 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_plot.png index b64bb11..c05c662 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png index 30ad3b5..1ee24f1 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png index 9e1c31a..e273f90 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png differ diff --git a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png index 75a03f7..1ed3924 100644 Binary files a/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png and b/build/lib/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png differ diff --git a/build/lib/imputegap/dataset/README.md b/build/lib/imputegap/dataset/README.md index 18e5abb..149f35d 100644 --- a/build/lib/imputegap/dataset/README.md +++ b/build/lib/imputegap/dataset/README.md @@ -577,3 +577,5 @@ Finally, BAFU - normalized 20x400 demonstrates the impact of "MIN-MAX" normaliza | Granularity | 30 minutes | | Observations | spans years 1974 to 2015 | | Dataset dimensions | N=50000 M=10 | + + diff --git a/build/lib/imputegap/env/default_values.toml b/build/lib/imputegap/env/default_values.toml index 7135392..1fe62f3 100644 --- a/build/lib/imputegap/env/default_values.toml +++ b/build/lib/imputegap/env/default_values.toml @@ -21,13 +21,13 @@ sequence_length = 7 [greedy] n_calls = 250 -selected_metrics='RMSE' +metrics='RMSE' [bayesian] n_calls = 2 n_random_starts = 50 acq_func = 'gp_hedge' -selected_metrics='RMSE' +metrics='RMSE' [pso] n_particles = 50 @@ -36,13 +36,13 @@ c2 = 0.3 w = 0.9 iterations=10 n_processes=1 -selected_metrics='RMSE' +metrics='RMSE' [sh] num_configs = 10 num_iterations = 2 reduction_factor = 10 -selected_metrics="RMSE" +metrics="RMSE" [explainer] diff --git a/build/lib/imputegap/params/optimal_parameters_t_eeg_cdrec.toml b/build/lib/imputegap/params/optimal_parameters_t_eeg_cdrec.toml new file mode 100644 index 0000000..e47a8de --- /dev/null +++ b/build/lib/imputegap/params/optimal_parameters_t_eeg_cdrec.toml @@ -0,0 +1,4 @@ +[cdrec] +rank = 6 +epsilon = 7.662399122383144e-5 +iteration = 329 diff --git a/build/lib/imputegap/recovery/README.md b/build/lib/imputegap/recovery/README.md index 8325be0..96570b2 100644 --- a/build/lib/imputegap/recovery/README.md +++ b/build/lib/imputegap/recovery/README.md @@ -2,7 +2,7 @@

# CONTAMINATION -## Scenarios +## Patterns @@ -14,7 +14,7 @@ - + @@ -29,7 +29,7 @@ ### MCAR MCAR selects random series and remove block at random positions until a total of W of all points of time series are missing. -This scenario uses random number generator with fixed seed and will produce the same blocks every run. +This pattern uses random number generator with fixed seed and will produce the same blocks every run.
MNumber of time seriesPStarting position (protection)
RMissing rate of the scenarioRMissing rate of the pattern
Spercentage of series selected
Definition @@ -82,7 +82,7 @@ This scenario uses random number generator with fixed seed and will produce the ### BLACKOUT -The **BLACKOUT** scenario selects all time series to introduce missing values. It removes a set percentage of data points from all series, creating gaps for further analysis. +The **BLACKOUT** pattern selects all time series to introduce missing values. It removes a set percentage of data points from all series, creating gaps for further analysis.
diff --git a/imputegap/recovery/benchmarking.py b/build/lib/imputegap/recovery/benchmark.py similarity index 57% rename from imputegap/recovery/benchmarking.py rename to build/lib/imputegap/recovery/benchmark.py index a4034ca..29a5d74 100644 --- a/imputegap/recovery/benchmarking.py +++ b/build/lib/imputegap/recovery/benchmark.py @@ -1,21 +1,26 @@ +import datetime +import importlib import os +import math import time import numpy as np import matplotlib.pyplot as plt +import xlsxwriter + from imputegap.tools import utils from imputegap.recovery.imputation import Imputation from imputegap.recovery.manager import TimeSeries -class Benchmarking: +class Benchmark: """ - A class to evaluate the performance of imputation algorithms through benchmarking across datasets and scenarios. + A class to evaluate the performance of imputation algorithms through benchmarking across datasets and patterns. Methods ------- _config_optimization(): - Configure and execute optimization for a selected imputation algorithm and contamination scenario. + Configure and execute optimization for a selected imputation algorithm and contamination pattern. avg_results(): Calculate average metrics (e.g., RMSE) across multiple datasets and algorithm runs. generate_matrix(): @@ -23,7 +28,7 @@ class Benchmarking: generate_reports(): Create detailed text-based reports summarizing metrics and timing results for all evaluations. generate_plots(): - Visualize metrics (e.g., RMSE, MAE) and timing (e.g., imputation, optimization) across scenarios and datasets. + Visualize metrics (e.g., RMSE, MAE) and timing (e.g., imputation, optimization) across patterns and datasets. comprehensive_evaluation(): Perform a complete benchmarking pipeline, including contamination, imputation, evaluation, and reporting. @@ -32,9 +37,9 @@ class Benchmarking: output : {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010309219360351562, 'optimization': 0, 'imputation': 0.0005755424499511719}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0020699501037597656, 'optimization': 0, 'imputation': 0.00048422813415527344}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007096290588378906, 'optimization': 0, 'imputation': 0.000461578369140625}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.043192148208618164, 'optimization': 0, 'imputation': 0.0005095005035400391}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.17184901237487793, 'optimization': 0, 'imputation': 0.0005536079406738281}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6064670085906982, 'optimization': 0, 'imputation': 0.0005743503570556641}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873978, 'MAE': 0.13619797235197734, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0009615421295166016, 'optimization': 0, 'imputation': 0.09218788146972656}}, '0.1': {'scores': {'RMSE': 0.18152059329152104, 'MAE': 0.09925566629402761, 'MI': 1.1516089897042538, 'CORRELATION': 0.9829398352220718}, 'times': {'contamination': 0.00482487678527832, 'optimization': 0, 'imputation': 0.09549617767333984}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.08459032692102293, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01713728904724121, 'optimization': 0, 'imputation': 0.1129295825958252}}, '0.4': {'scores': {'RMSE': 0.7544523683503829, 'MAE': 0.11218049973594252, 'MI': 0.021165172206064526, 'CORRELATION': 0.814120507570725}, 'times': {'contamination': 0.10881781578063965, 'optimization': 0, 'imputation': 1.9378046989440918}}, '0.6': {'scores': {'RMSE': 0.4355197572001326, 'MAE': 0.1380846624733049, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122915}, 'times': {'contamination': 0.2380077838897705, 'optimization': 0, 'imputation': 1.8785057067871094}}, '0.8': {'scores': {'RMSE': 0.7672558930795506, 'MAE': 0.32988968428439397, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323675}, 'times': {'contamination': 0.6805167198181152, 'optimization': 0, 'imputation': 1.9562773704528809}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0022056102752685547, 'optimization': 0, 'imputation': 52.07010293006897}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.002231597900390625, 'optimization': 0, 'imputation': 52.543020248413086}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.0072672367095947266, 'optimization': 0, 'imputation': 52.88247036933899}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.04321551322937012, 'optimization': 0, 'imputation': 54.10793352127075}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.1728806495666504, 'optimization': 0, 'imputation': 40.53373336791992}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.6077785491943359, 'optimization': 0, 'imputation': 35.151907444000244}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0010058879852294922, 'optimization': 0, 'imputation': 0.7380530834197998}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019745826721191406, 'optimization': 0, 'imputation': 4.7826457023620605}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.00801849365234375, 'optimization': 0, 'imputation': 33.94813060760498}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.04391813278198242, 'optimization': 0, 'imputation': 255.31524085998535}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.17044353485107422, 'optimization': 0, 'imputation': 840.7470128536224}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.5999574661254883, 'optimization': 0, 'imputation': 1974.6101157665253}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9458508648057621, 'MAE': 0.7019459696903068, 'MI': 0.11924522547609226, 'CORRELATION': 0.02915935932568557}, 'times': {'contamination': 0.001056671142578125, 'optimization': 0, 'imputation': 49.42237901687622}}, '0.1': {'scores': {'RMSE': 1.0125309431502871, 'MAE': 0.761136543268339, 'MI': 0.12567590499764303, 'CORRELATION': -0.037161060882302754}, 'times': {'contamination': 0.003415822982788086, 'optimization': 0, 'imputation': 49.04829454421997}}, '0.2': {'scores': {'RMSE': 1.0317754516097355, 'MAE': 0.7952869439926, 'MI': 0.10908095436833125, 'CORRELATION': -0.04155403791391449}, 'times': {'contamination': 0.007429599761962891, 'optimization': 0, 'imputation': 49.42568325996399}}, '0.4': {'scores': {'RMSE': 1.0807965786089415, 'MAE': 0.7326965517264863, 'MI': 0.006171770470542263, 'CORRELATION': -0.020630168509677818}, 'times': {'contamination': 0.042899370193481445, 'optimization': 0, 'imputation': 49.479795694351196}}, '0.6': {'scores': {'RMSE': 1.0441472017887297, 'MAE': 0.7599852461729673, 'MI': 0.01121013333181846, 'CORRELATION': -0.007513931343350665}, 'times': {'contamination': 0.17329692840576172, 'optimization': 0, 'imputation': 50.439927101135254}}, '0.8': {'scores': {'RMSE': 1.0379347892718205, 'MAE': 0.757440007226372, 'MI': 0.0035880775657246428, 'CORRELATION': -0.0014975078469404196}, 'times': {'contamination': 0.6166613101959229, 'optimization': 0, 'imputation': 50.66455388069153}}}}}}} """ - def _config_optimization(self, opti_mean, ts_test, scenario, algorithm, block_size_mcar): + def _config_optimization(self, opti_mean, ts_test, pattern, algorithm, block_size_mcar): """ - Configure and execute optimization for selected imputation algorithm and scenario. + Configure and execute optimization for selected imputation algorithm and pattern. Parameters ---------- @@ -42,8 +47,8 @@ def _config_optimization(self, opti_mean, ts_test, scenario, algorithm, block_si Mean parameter for contamination. ts_test : TimeSeries TimeSeries object containing dataset. - scenario : str - Type of contamination scenario (e.g., "mcar", "mp", "blackout"). + pattern : str + Type of contamination pattern (e.g., "mcar", "mp", "blackout"). algorithm : str Imputation algorithm to use. block_size_mcar : int @@ -55,27 +60,27 @@ def _config_optimization(self, opti_mean, ts_test, scenario, algorithm, block_si Configured imputer instance with optimal parameters. """ - if scenario == "mcar": - infected_matrix_opti = ts_test.Contaminate.mcar(ts=ts_test.data, series_impacted=opti_mean, - missing_rate=opti_mean, block_size=block_size_mcar, - use_seed=True, seed=42) - elif scenario == "mp": - infected_matrix_opti = ts_test.Contaminate.missing_percentage(ts=ts_test.data, series_impacted=opti_mean, - missing_rate=opti_mean) + if pattern == "mcar": + incomp_data_opti = ts_test.Contamination.mcar(input_data=ts_test.data, series_rate=opti_mean, + missing_rate=opti_mean, block_size=block_size_mcar, + seed=True) + elif pattern == "mp": + incomp_data_opti = ts_test.Contamination.missing_percentage(input_data=ts_test.data, series_rate=opti_mean, + missing_rate=opti_mean) else: - infected_matrix_opti = ts_test.Contaminate.blackout(ts=ts_test.data, missing_rate=opti_mean) + incomp_data_opti = ts_test.Contamination.blackout(input_data=ts_test.data, missing_rate=opti_mean) i_opti = None if algorithm == "cdrec": - i_opti = Imputation.MatrixCompletion.CDRec(infected_matrix_opti) + i_opti = Imputation.MatrixCompletion.CDRec(incomp_data_opti) elif algorithm == "stmvl": - i_opti = Imputation.PatternSearch.STMVL(infected_matrix_opti) + i_opti = Imputation.PatternSearch.STMVL(incomp_data_opti) elif algorithm == "iim": - i_opti = Imputation.Statistics.IIM(infected_matrix_opti) + i_opti = Imputation.Statistics.IIM(incomp_data_opti) elif algorithm == "mrnn": - i_opti = Imputation.DeepLearning.MRNN(infected_matrix_opti) + i_opti = Imputation.DeepLearning.MRNN(incomp_data_opti) elif algorithm == "mean": - i_opti = Imputation.Statistics.MeanImpute(infected_matrix_opti) + i_opti = Imputation.Statistics.MeanImpute(incomp_data_opti) return i_opti @@ -90,8 +95,8 @@ def avg_results(self, *datasets): Returns ------- - dict - Dictionary with averaged scores and times for all levels. + List + Matrix with averaged scores and times for all levels, list of algorithms, list of datasets """ # Step 1: Compute average RMSE across runs for each dataset and algorithm @@ -102,8 +107,8 @@ def avg_results(self, *datasets): if dataset not in aggregated_data: aggregated_data[dataset] = {} - for scenario, scenario_items in dataset_items.items(): - for algo, algo_data in scenario_items.items(): + for pattern, pattern_items in dataset_items.items(): + for algo, algo_data in pattern_items.items(): if algo not in aggregated_data[dataset]: aggregated_data[dataset][algo] = [] @@ -139,7 +144,7 @@ def avg_results(self, *datasets): return comprehensive_matrix, algorithms_list, datasets_list - def generate_matrix(self, scores_list, algos, sets, save_dir="./reports", display=True): + def generate_heatmap(self, scores_list, algos, sets, save_dir="./reports", display=True): """ Generate and save RMSE matrix in HD quality. @@ -205,18 +210,20 @@ def generate_matrix(self, scores_list, algos, sets, save_dir="./reports", displa return True - def generate_reports(self, runs_plots_scores, save_dir="./reports", dataset=""): + def generate_reports_txt(self, runs_plots_scores, save_dir="./reports", dataset="", run=-1): """ - Generate and save a text reports of metrics and timing for each dataset, algorithm, and scenario. + Generate and save a text report of metrics and timing for each dataset, algorithm, and pattern. Parameters ---------- runs_plots_scores : dict - Dictionary containing scores and timing information for each dataset, scenario, and algorithm. + Dictionary containing scores and timing information for each dataset, pattern, and algorithm. save_dir : str, optional Directory to save the reports file (default is "./reports"). dataset : str, optional - Name of the data for the reports name. + Name of the data for the report name. + run : int, optional + Number of the run. Returns ------- @@ -224,54 +231,180 @@ def generate_reports(self, runs_plots_scores, save_dir="./reports", dataset=""): Notes ----- - The reports is saved in a "reports.txt" file in `save_dir`, organized in tabular format. + The report is saved in a "report.txt" file in `save_dir`, organized in sections with headers and results. """ - + print("run", run) os.makedirs(save_dir, exist_ok=True) - save_path = os.path.join(save_dir, "report_" + str(dataset) + ".txt") + save_path = os.path.join(save_dir, f"report_{dataset}.txt") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + with open(save_path, "w") as file: - file.write("dictionary of results : " + str(runs_plots_scores) + "\n\n") + # Write an overall header for the report + file.write(f"Report for Dataset: {dataset}\n") + file.write(f"Generated on: {current_time}\n") + if run >= 0: + file.write(f"Run number: {run}\n") + file.write("=" * 120 + "\n\n") + + metrics = { + "RMSE": "Root Mean Square Error - Measures the average magnitude of error.", + "MAE": "Mean Absolute Error - Measures the average absolute error.", + "MI": "Mutual Information - Indicates dependency between variables.", + "CORRELATION": "Correlation Coefficient - Indicates linear relationship between variables." + } + + for metric, description in metrics.items(): + # Write the metric description + file.write(f"{metric}: {description}\n\n") + + column_widths = [15, 15, 15, 15, 12, 25] + + # Create a table header + headers = ["Dataset", "Algorithm", "Optimizer", "Pattern", "X Value", metric] + header_row = "|".join(f" {header:^{width}} " for header, width in zip(headers, column_widths)) + separator_row = "+" + "+".join(f"{'-' * (width + 2)}" for width in column_widths) + "+" + file.write(f"{separator_row}\n") + file.write(f"|{header_row}|\n") + file.write(f"{separator_row}\n") + + # Extract and write results for the current metric + for dataset, algo_items in runs_plots_scores.items(): + for algorithm, optimizer_items in algo_items.items(): + for optimizer, pattern_data in optimizer_items.items(): + for pattern, x_data_items in pattern_data.items(): + for x, values in x_data_items.items(): + value = values.get("scores", {}).get(metric, None) + if value is not None: + value = f"{value:.10f}" # Limit to 10 decimal places + row_values = [dataset, algorithm, optimizer, pattern, str(x), value] + row = "|".join(f" {value:^{width}} " for value, width in zip(row_values, column_widths)) + file.write(f"|{row}|\n") + file.write(f"{separator_row}\n\n") + + file.write("Dictionary of Results:\n") + file.write(str(runs_plots_scores) + "\n") + + print(f"\nReport recorded in {save_path}") + + def generate_reports_excel(self, runs_plots_scores, save_dir="./reports", dataset="", run=-1): + """ + Generate and save an Excel-like text report of metrics and timing for each dataset, algorithm, and pattern. - # Define header with time columns included - header = "| dataset_value | algorithm_value | optimizer_value | scenario_value | x_value | RMSE | MAE | MI | CORRELATION | time_contamination | time_optimization | time_imputation |\n" - file.write(header) + Parameters + ---------- + runs_plots_scores : dict + Dictionary containing scores and timing information for each dataset, pattern, and algorithm. + save_dir : str, optional + Directory to save the Excel-like file (default is "./reports"). + dataset : str, optional + Name of the data for the Excel-like file name. + run : int, optional + Number of the run + Returns + ------- + None + """ + os.makedirs(save_dir, exist_ok=True) + save_path = os.path.join(save_dir, f"report_{dataset}.xlsx") + + # Create an Excel workbook + workbook = xlsxwriter.Workbook(save_path) + + # Add a summary sheet with the header, creation date, dictionary content, and links to other sheets + summary_sheet = workbook.add_worksheet("Summary") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + summary_sheet.set_column(0, 1, 50) + + # Add the logo using importlib.resources + logo_path = importlib.resources.files("imputegap.assets").joinpath("logo_imputegap.png") + summary_sheet.insert_image("A1", str(logo_path), {"x_scale": 0.5, "y_scale": 0.5}) + + # Title and header + summary_sheet.write(5, 0, "IMPUTEGAP") # Title below the logo + summary_sheet.write(7, 0, "Report for Dataset") + summary_sheet.write(7, 1, dataset) + summary_sheet.write(8, 0, "Generated on") + summary_sheet.write(8, 1, current_time) + if run >= 0: + summary_sheet.write(9, 0, "Run Number") + summary_sheet.write(9, 1, run) + + # Add links to metric sheets + row = 12 + summary_sheet.write(row, 0, "Metric Sheets:") + row += 1 + metrics = { + "RMSE": "Root Mean Square Error - Measures the average magnitude of error.", + "MAE": "Mean Absolute Error - Measures the average absolute error.", + "MI": "Mutual Information - Indicates dependency between variables.", + "CORRELATION": "Correlation Coefficient - Indicates linear relationship between variables." + } + for metric in metrics.keys(): + summary_sheet.write_url(row, 0, f"internal:'{metric}'!A1", string=f"Go to {metric} Sheet") + row += 1 + + # Write the dictionary content + summary_sheet.write(row + 1, 0, "Dictionary of Results") + row += 2 + + for key, value in runs_plots_scores.items(): + summary_sheet.write(row, 0, str(key)) + summary_sheet.write(row, 1, str(value)) + row += 1 + + for metric, description in metrics.items(): + # Create a worksheet for each metric + worksheet = workbook.add_worksheet(metric) + + # Add the logo to each metric sheet + worksheet.insert_image("A1", str(logo_path), {"x_scale": 0.5, "y_scale": 0.5}) + + # Write the metric description at the top and add IMPUTEGAP header + worksheet.write(5, 0, "IMPUTEGAP") + worksheet.write(7, 0, f"{metric}: {description}") + + # Define consistent column headers and widths + headers = ["Dataset", "Algorithm", "Optimizer", "Pattern", "X Value", metric] + column_widths = [15, 15, 15, 15, 12, 20] # Adjust widths for Excel + + # Write the headers + for col, (header, width) in enumerate(zip(headers, column_widths)): + worksheet.set_column(col, col, width) + worksheet.write(8, col, header) + + # Populate the data + row = 9 for dataset, algo_items in runs_plots_scores.items(): for algorithm, optimizer_items in algo_items.items(): - for optimizer, scenario_data in optimizer_items.items(): - for scenario, x_data_items in scenario_data.items(): + for optimizer, pattern_data in optimizer_items.items(): + for pattern, x_data_items in pattern_data.items(): for x, values in x_data_items.items(): - metrics = values["scores"] - times = values["times"] - - # Retrieve each timing value, defaulting to None if absent - contamination_time = times.get("contamination", None) - optimization_time = times.get("optimization", None) - imputation_time = times.get("imputation", None) - - # Create a reports line with timing details - line = ( - f"| {dataset} | {algorithm} | {optimizer} | {scenario} | {x} " - f"| {metrics.get('RMSE')} | {metrics.get('MAE')} | {metrics.get('MI')} " - f"| {metrics.get('CORRELATION')} | {contamination_time} sec | {optimization_time} sec" - f"| {imputation_time} sec |\n" - ) - file.write(line) - - print("\nReport recorded in", save_path) - - def generate_plots(self, runs_plots_scores, s="M", v="N", save_dir="./reports"): + value = values.get("scores", {}).get(metric, None) + if value is not None: + value = f"{value:.10f}" + data = [dataset, algorithm, optimizer, pattern, str(x), value] + for col, cell_value in enumerate(data): + worksheet.write(row, col, cell_value) + row += 1 + + # Close the workbook + workbook.close() + + print(f"\nExcel report recorded in {save_path}") + + def generate_plots(self, runs_plots_scores, ticks, subplot=False, save_dir="./reports"): """ - Generate and save plots for each metric and scenario based on provided scores. + Generate and save plots for each metric and pattern based on provided scores. Parameters ---------- runs_plots_scores : dict - Dictionary containing scores and timing information for each dataset, scenario, and algorithm. - s : str - display the number of series in graphs - v : sts - display the number of values in graphs + Dictionary containing scores and timing information for each dataset, pattern, and algorithm. + ticks : list of float + List of missing rates for contamination. + subplot : bool, optional + If True, generates a single figure with subplots for all metrics (default is False). save_dir : str, optional Directory to save generated plots (default is "./reports"). @@ -281,16 +414,30 @@ def generate_plots(self, runs_plots_scores, s="M", v="N", save_dir="./reports"): Notes ----- - Saves generated plots in `save_dir`, categorized by dataset, scenario, and metric. + Saves generated plots in `save_dir`, categorized by dataset, pattern, and metric. """ os.makedirs(save_dir, exist_ok=True) + metrics = ["RMSE", "MAE", "MI", "CORRELATION", "imputation_time", "log_imputation"] + + for dataset, pattern_items in runs_plots_scores.items(): + for pattern, algo_items in pattern_items.items(): + + if subplot: + fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 12)) # Adjusted figsize + axes = axes.ravel() # Flatten the 2D array of axes to a 1D array - for dataset, scenario_items in runs_plots_scores.items(): - for scenario, algo_items in scenario_items.items(): # Iterate over each metric, generating separate plots, including new timing metrics - for metric in ["RMSE", "MAE", "MI", "CORRELATION", "imputation_time", "optimization_time", - "contamination_time"]: - plt.figure(figsize=(10, 4)) # Fixed height set by second parameter + for i, metric in enumerate(metrics): + + if subplot: + if i < len(axes): + ax = axes[i] + else: + break # Prevent index out of bounds if metrics exceed subplot slots + else: + plt.figure(figsize=(10, 4)) + ax = plt.gca() + has_data = False # Flag to check if any data is added to the plot # Iterate over each algorithm and plot them in the same figure @@ -303,12 +450,9 @@ def generate_plots(self, runs_plots_scores, s="M", v="N", save_dir="./reports"): if metric == "imputation_time" and "imputation" in values["times"]: x_vals.append(float(x)) y_vals.append(values["times"]["imputation"]) - elif metric == "optimization_time" and "optimization" in values["times"]: + elif metric == "log_imputation" and "log_imputation" in values["times"]: x_vals.append(float(x)) - y_vals.append(values["times"]["optimization"]) - elif metric == "contamination_time" and "contamination" in values["times"]: - x_vals.append(float(x)) - y_vals.append(values["times"]["contamination"]) + y_vals.append(values["times"]["log_imputation"]) elif metric in values["scores"]: x_vals.append(float(x)) y_vals.append(values["scores"][metric]) @@ -320,87 +464,82 @@ def generate_plots(self, runs_plots_scores, s="M", v="N", save_dir="./reports"): x_vals, y_vals = zip(*sorted_pairs) # Plot each algorithm as a line with scattered points - plt.plot(x_vals, y_vals, label=f"{algorithm}") - plt.scatter(x_vals, y_vals) + ax.plot(x_vals, y_vals, label=f"{algorithm}") + ax.scatter(x_vals, y_vals) has_data = True # Save plot only if there is data to display if has_data: - # Set plot titles and labels based on metric - title_metric = { - "imputation_time": "Imputation Time", - "optimization_time": "Optimization Time", - "contamination_time": "Contamination Time" - }.get(metric, metric) ylabel_metric = { - "imputation_time": "Imputation Time (seconds)", - "optimization_time": "Optimization Time (seconds)", - "contamination_time": "Contamination Time (seconds)" + "imputation_time": "Imputation Time (sec)", + "log_imputation": "Imputation Time (log)", }.get(metric, metric) - plt.title(f"{dataset} | {scenario} | {title_metric} | ({s}x{v})") - plt.xlabel(f"{scenario} rate of missing values and missing series") - plt.ylabel(ylabel_metric) - plt.xlim(0.0, 0.85) + ax.set_title(metric) + ax.set_xlabel("Rates") + ax.set_ylabel(ylabel_metric) + ax.set_xlim(0.0, 0.85) # Set y-axis limits with padding below 0 for visibility if metric == "imputation_time": - plt.ylim(-10, 90) - elif metric == "contamination_time": - plt.ylim(-0.01, 0.59) + ax.set_ylim(-10, 90) + elif metric == "log_imputation": + ax.set_ylim(-10, 90) elif metric == "MAE": - plt.ylim(-0.1, 2.4) + ax.set_ylim(-0.1, 2.4) elif metric == "MI": - plt.ylim(-0.1, 1.85) + ax.set_ylim(-0.1, 1.85) elif metric == "RMSE": - plt.ylim(-0.1, 2.6) + ax.set_ylim(-0.1, 2.6) elif metric == "CORRELATION": - plt.ylim(-0.75, 1.1) + ax.set_ylim(-0.75, 1.1) # Customize x-axis ticks - x_points = [0.0, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8] - plt.xticks(x_points, [f"{int(tick * 100)}%" for tick in x_points]) - plt.grid(True, zorder=0) - plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) + ax.set_xticks(ticks) + ax.set_xticklabels([f"{int(tick * 100)}%" for tick in ticks]) + ax.grid(True, zorder=0) + ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) - # Define a unique filename - filename = f"{dataset}_{scenario}_{metric}.jpg" + if not subplot: + filename = f"{dataset}_{pattern}_{metric}.jpg" filepath = os.path.join(save_dir, filename) - - # Save the figure plt.savefig(filepath) - plt.close() # Close to avoid memory issues + plt.close() + + if subplot: + plt.tight_layout() + filename = f"{dataset}_{pattern}_metrics_subplot.jpg" + filepath = os.path.join(save_dir, filename) + plt.savefig(filepath) + plt.close() print("\nAll plots recorded in", save_dir) - def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], scenarios=[], - x_axis=[0.05, 0.1, 0.2, 0.4, 0.6, 0.8], save_dir="./reports", already_optimized=False, - reports=1): + def eval(self, algorithms=["cdrec"], datasets=["eeg-alcohol"], patterns=["mcar"], x_axis=[0.05, 0.1, 0.2, 0.4, 0.6, 0.8], optimizers=["user_def"], save_dir="./reports", runs=1): """ - Execute a comprehensive evaluation of imputation algorithms over multiple datasets and scenarios. + Execute a comprehensive evaluation of imputation algorithms over multiple datasets and patterns. Parameters ---------- - datasets : list of str - List of dataset names to evaluate. - optimizers : list of dict - List of optimizers with their configurations. algorithms : list of str List of imputation algorithms to test. - scenarios : list of str - List of contamination scenarios to apply. + datasets : list of str + List of dataset names to evaluate. + patterns : list of str + List of contamination patterns to apply. x_axis : list of float List of missing rates for contamination. + optimizers : list of dict + List of optimizers with their configurations. save_dir : str, optional Directory to save reports and plots (default is "./reports"). - already_optimized : bool, optional - If True, skip parameter optimization (default is False). - reports : int, optional + runs : int, optional Number of executions with a view to averaging them Returns ------- - None + List + List of all runs results, matrix with averaged scores and times for all levels Notes ----- @@ -408,8 +547,10 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc """ print("initialization of the comprehensive evaluation. It can take time...\n") + run_storage = [] + scores_list, algos, sets = None, None, None - for runs in range(0, abs(reports)): + for i_run in range(0, abs(runs)): for dataset in datasets: runs_plots_scores = {} limitation_series, limitation_values = 100, 1000 @@ -429,7 +570,7 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc elif dataset == "fmri-stoptask": limitation_series = 360 - if reports == -1: + if runs == -1: limitation_series = 10 limitation_values = 110 print("TEST LOADED...") @@ -446,8 +587,8 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc print("1. normalization of ", dataset, "\n") ts_test.normalize() - for scenario in scenarios: - print("\t2. contamination of", dataset, "with scenario", scenario, "\n") + for pattern in patterns: + print("\t2. contamination of", dataset, "with pattern", pattern, "\n") for algorithm in algorithms: has_been_optimized = False @@ -457,71 +598,80 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc print("\t\t4. missing values (series&values) set to", x, "for x_axis\n") start_time_contamination = time.time() # Record start time - if scenario == "mcar": - infected_matrix = ts_test.Contaminate.mcar(ts=ts_test.data, series_impacted=x, - missing_rate=x, block_size=block_size_mcar, - use_seed=True, seed=42) - elif scenario == "mp": - infected_matrix = ts_test.Contaminate.missing_percentage(ts=ts_test.data, - series_impacted=x, - missing_rate=x) + if pattern == "mcar": + incomp_data = ts_test.Contamination.mcar(input_data=ts_test.data, series_rate=x, + missing_rate=x, block_size=block_size_mcar, + seed=True) + elif pattern == "mp": + incomp_data = ts_test.Contamination.missing_percentage(input_data=ts_test.data, + series_rate=x, + missing_rate=x) else: - infected_matrix = ts_test.Contaminate.blackout(ts=ts_test.data, missing_rate=x) + incomp_data = ts_test.Contamination.blackout(input_data=ts_test.data, missing_rate=x) end_time_contamination = time.time() for optimizer in optimizers: algo = None - optimizer_gt = {"ground_truth": ts_test.data, **optimizer} if algorithm == "cdrec": - algo = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo = Imputation.MatrixCompletion.CDRec(incomp_data) elif algorithm == "stmvl": - algo = Imputation.PatternSearch.STMVL(infected_matrix) + algo = Imputation.PatternSearch.STMVL(incomp_data) elif algorithm == "iim": - algo = Imputation.Statistics.IIM(infected_matrix) + algo = Imputation.Statistics.IIM(incomp_data) elif algorithm == "mrnn": - algo = Imputation.DeepLearning.MRNN(infected_matrix) + algo = Imputation.DeepLearning.MRNN(incomp_data) elif algorithm == "mean": - algo = Imputation.Statistics.MeanImpute(infected_matrix) - - if not has_been_optimized and not already_optimized and algorithm != "mean": - print("\t\t5. AutoML to set the parameters", optimizer, "\n") - start_time_opti = time.time() # Record start time - i_opti = self._config_optimization(0.25, ts_test, scenario, algorithm, - block_size_mcar) - i_opti.impute(user_defined=False, params=optimizer_gt) - utils.save_optimization(optimal_params=i_opti.parameters, algorithm=algorithm, - dataset=dataset, optimizer="e") - has_been_optimized = True - end_time_opti = time.time() - - if algorithm != "mean": - opti_params = utils.load_parameters(query="optimal", algorithm=algorithm, - dataset=dataset, optimizer="e") - print("\t\t6. imputation", algorithm, "with optimal parameters", *opti_params) - + algo = Imputation.Statistics.MeanImpute(incomp_data) + + if isinstance(optimizer, dict): + optimizer_gt = {"input_data": ts_test.data, **optimizer} + optimizer_value = optimizer.get('optimizer') # or optimizer['optimizer'] + + if not has_been_optimized and algorithm != "mean": + print("\t\t5. AutoML to set the parameters", optimizer, "\n") + start_time_opti = time.time() # Record start time + i_opti = self._config_optimization(0.25, ts_test, pattern, algorithm, + block_size_mcar) + i_opti.impute(user_def=False, params=optimizer_gt) + utils.save_optimization(optimal_params=i_opti.parameters, algorithm=algorithm, + dataset=dataset, optimizer="e") + + has_been_optimized = True + end_time_opti = time.time() + else: + print("\t\t5. AutoML already optimized : ", optimizer, "\n") + + if algorithm != "mean": + opti_params = utils.load_parameters(query="optimal", algorithm=algorithm, + dataset=dataset, optimizer="e") + print("\t\t6. imputation", algorithm, "with optimal parameters", *opti_params) + + else: + print("\t\t5. MeanImpute launches without optimal params", optimizer, "for", algorithm, "\n") + opti_params = None else: + print("\t\t5. Default parameters have been set the parameters", optimizer, "for", algorithm, "\n") + optimizer_value = optimizer opti_params = None start_time_imputation = time.time() algo.impute(params=opti_params) end_time_imputation = time.time() - algo.score(raw_matrix=ts_test.data, imputed_matrix=algo.imputed_matrix) + algo.score(input_data=ts_test.data, recov_data=algo.recov_data) time_contamination = end_time_contamination - start_time_contamination time_opti = end_time_opti - start_time_opti time_imputation = end_time_imputation - start_time_imputation + log_time_imputation = math.log(time_imputation) if time_imputation > 0 else None - dic_timing = {"contamination": time_contamination, "optimization": time_opti, - "imputation": time_imputation} + dic_timing = {"contamination": time_contamination, "optimization": time_opti, "imputation": time_imputation, "log_imputation": log_time_imputation} dataset_s = dataset if "-" in dataset: dataset_s = dataset.replace("-", "") - optimizer_value = optimizer.get('optimizer') # or optimizer['optimizer'] - - runs_plots_scores.setdefault(str(dataset_s), {}).setdefault(str(scenario), + runs_plots_scores.setdefault(str(dataset_s), {}).setdefault(str(pattern), {}).setdefault( str(algorithm), {}).setdefault(str(optimizer_value), {})[str(x)] = { "scores": algo.metrics, @@ -531,12 +681,17 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc print("\t\truns_plots_scores", runs_plots_scores) print("\truns_plots_scores : ", runs_plots_scores) - save_dir_runs = save_dir + "/report_" + str(runs) + save_dir_runs = save_dir + "/run_" + str(i_run) print("\truns saved in : ", save_dir_runs) - self.generate_plots(runs_plots_scores=runs_plots_scores, s=str(M), v=str(N), save_dir=save_dir_runs) - self.generate_reports(runs_plots_scores, save_dir_runs, dataset) + self.generate_plots(runs_plots_scores=runs_plots_scores, ticks=x_axis, subplot=True, save_dir=save_dir_runs) + self.generate_plots(runs_plots_scores=runs_plots_scores, ticks=x_axis, subplot=False, save_dir=save_dir_runs) + self.generate_reports_txt(runs_plots_scores, save_dir_runs, dataset, i_run) + self.generate_reports_excel(runs_plots_scores, save_dir_runs, dataset, i_run) + run_storage.append(runs_plots_scores) + + print("============================================================================\n\n\n\n\n\n") - print( - "======================================================================================\n\n\n\n\n\n") + scores_list, algos, sets = self.avg_results(*run_storage) + _ = Benchmark().generate_heatmap(scores_list, algos, sets, save_dir=save_dir) - return runs_plots_scores + return run_storage, scores_list diff --git a/build/lib/imputegap/recovery/benchmarking.py b/build/lib/imputegap/recovery/benchmarking.py index a4034ca..b1329e1 100644 --- a/build/lib/imputegap/recovery/benchmarking.py +++ b/build/lib/imputegap/recovery/benchmarking.py @@ -56,14 +56,14 @@ def _config_optimization(self, opti_mean, ts_test, scenario, algorithm, block_si """ if scenario == "mcar": - infected_matrix_opti = ts_test.Contaminate.mcar(ts=ts_test.data, series_impacted=opti_mean, - missing_rate=opti_mean, block_size=block_size_mcar, - use_seed=True, seed=42) + infected_matrix_opti = ts_test.Contamination.mcar(input_data=ts_test.data, series_rate=opti_mean, + missing_rate=opti_mean, block_size=block_size_mcar, + use_seed=True, seed=42) elif scenario == "mp": - infected_matrix_opti = ts_test.Contaminate.missing_percentage(ts=ts_test.data, series_impacted=opti_mean, - missing_rate=opti_mean) + infected_matrix_opti = ts_test.Contamination.missing_percentage(input_data=ts_test.data, series_rate=opti_mean, + missing_rate=opti_mean) else: - infected_matrix_opti = ts_test.Contaminate.blackout(ts=ts_test.data, missing_rate=opti_mean) + infected_matrix_opti = ts_test.Contamination.blackout(input_data=ts_test.data, missing_rate=opti_mean) i_opti = None if algorithm == "cdrec": @@ -458,15 +458,15 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc start_time_contamination = time.time() # Record start time if scenario == "mcar": - infected_matrix = ts_test.Contaminate.mcar(ts=ts_test.data, series_impacted=x, - missing_rate=x, block_size=block_size_mcar, - use_seed=True, seed=42) + infected_matrix = ts_test.Contamination.mcar(input_data=ts_test.data, series_rate=x, + missing_rate=x, block_size=block_size_mcar, + use_seed=True, seed=42) elif scenario == "mp": - infected_matrix = ts_test.Contaminate.missing_percentage(ts=ts_test.data, - series_impacted=x, - missing_rate=x) + infected_matrix = ts_test.Contamination.missing_percentage(input_data=ts_test.data, + series_rate=x, + missing_rate=x) else: - infected_matrix = ts_test.Contaminate.blackout(ts=ts_test.data, missing_rate=x) + infected_matrix = ts_test.Contamination.blackout(input_data=ts_test.data, missing_rate=x) end_time_contamination = time.time() for optimizer in optimizers: @@ -488,7 +488,7 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc start_time_opti = time.time() # Record start time i_opti = self._config_optimization(0.25, ts_test, scenario, algorithm, block_size_mcar) - i_opti.impute(user_defined=False, params=optimizer_gt) + i_opti.impute(user_def=False, params=optimizer_gt) utils.save_optimization(optimal_params=i_opti.parameters, algorithm=algorithm, dataset=dataset, optimizer="e") has_been_optimized = True @@ -506,7 +506,7 @@ def comprehensive_evaluation(self, datasets=[], optimizers=[], algorithms=[], sc algo.impute(params=opti_params) end_time_imputation = time.time() - algo.score(raw_matrix=ts_test.data, imputed_matrix=algo.imputed_matrix) + algo.score(input_data=ts_test.data, recov_data=algo.imputed_matrix) time_contamination = end_time_contamination - start_time_contamination time_opti = end_time_opti - start_time_opti diff --git a/build/lib/imputegap/recovery/evaluation.py b/build/lib/imputegap/recovery/evaluation.py index 3f42194..4a44a71 100644 --- a/build/lib/imputegap/recovery/evaluation.py +++ b/build/lib/imputegap/recovery/evaluation.py @@ -22,26 +22,26 @@ class Evaluation: """ - def __init__(self, ground_truth, imputation, contamination): + def __init__(self, input_data, recov_data, incomp_data): """ - Initialize the Evaluation class with ground truth, imputation, and contamination time series. + Initialize the Evaluation class with ground truth, imputation, and incomp_data time series. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The original time series without contamination. - imputation : numpy.ndarray + recov_data : numpy.ndarray The imputed time series. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The time series with contamination (NaN values). Returns ------- None """ - self.ground_truth = ground_truth - self.imputation = imputation - self.contamination = contamination + self.input_data = input_data + self.recov_data = recov_data + self.incomp_data = incomp_data def metrics_computation(self): """ @@ -79,9 +79,9 @@ def compute_rmse(self): float The RMSE value for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) + nan_locations = np.isnan(self.incomp_data) - mse = np.mean((self.ground_truth[nan_locations] - self.imputation[nan_locations]) ** 2) + mse = np.mean((self.input_data[nan_locations] - self.recov_data[nan_locations]) ** 2) rmse = np.sqrt(mse) return float(rmse) @@ -97,9 +97,9 @@ def compute_mae(self): float The MAE value for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) + nan_locations = np.isnan(self.incomp_data) - absolute_error = np.abs(self.ground_truth[nan_locations] - self.imputation[nan_locations]) + absolute_error = np.abs(self.input_data[nan_locations] - self.recov_data[nan_locations]) mean_absolute_error = np.mean(absolute_error) return mean_absolute_error @@ -116,16 +116,16 @@ def compute_mi(self): float The mutual information (MI) score for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) + nan_locations = np.isnan(self.incomp_data) # Discretize the continuous data into bins - ground_truth_binned = np.digitize(self.ground_truth[nan_locations], - bins=np.histogram_bin_edges(self.ground_truth[nan_locations], bins=10)) - imputation_binned = np.digitize(self.imputation[nan_locations], - bins=np.histogram_bin_edges(self.imputation[nan_locations], bins=10)) + input_data_binned = np.digitize(self.input_data[nan_locations], + bins=np.histogram_bin_edges(self.input_data[nan_locations], bins=10)) + imputation_binned = np.digitize(self.recov_data[nan_locations], + bins=np.histogram_bin_edges(self.recov_data[nan_locations], bins=10)) - mi_discrete = mutual_info_score(ground_truth_binned, imputation_binned) - # mi_continuous = mutual_info_score(self.ground_truth[nan_locations], self.ground_truth[nan_locations]) + mi_discrete = mutual_info_score(input_data_binned, imputation_binned) + # mi_continuous = mutual_info_score(self.input_data[nan_locations], self.input_data[nan_locations]) return mi_discrete @@ -141,11 +141,11 @@ def compute_correlation(self): float The Pearson correlation coefficient for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) - ground_truth_values = self.ground_truth[nan_locations] - imputed_values = self.imputation[nan_locations] + nan_locations = np.isnan(self.incomp_data) + input_data_values = self.input_data[nan_locations] + imputed_values = self.recov_data[nan_locations] - correlation, _ = pearsonr(ground_truth_values, imputed_values) + correlation, _ = pearsonr(input_data_values, imputed_values) if np.isnan(correlation): correlation = 0 diff --git a/build/lib/imputegap/recovery/explainer.py b/build/lib/imputegap/recovery/explainer.py index e39ec29..3befc86 100644 --- a/build/lib/imputegap/recovery/explainer.py +++ b/build/lib/imputegap/recovery/explainer.py @@ -547,10 +547,10 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar print("\tContamination ", current_series, "...") if contamination == "mcar": - obfuscated_matrix = TimeSeries().Contaminate.mcar(ts=raw_data, series_impacted=current_series, - missing_rate=missing_rate, block_size=block_size, - protection=protection, use_seed=use_seed, seed=seed, - explainer=True) + obfuscated_matrix = TimeSeries().Contamination.mcar(input_data=raw_data, series_rate=current_series, + missing_rate=missing_rate, block_size=block_size, + offset=protection, use_seed=use_seed, seed=seed, + explainer=True) else: print("Contamination proposed not found : ", contamination, " >> BREAK") return None @@ -575,7 +575,7 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar algo = Imputation.DeepLearning.MRNN(obfuscated_matrix) algo.logs = False - algo.impute(user_defined=True, params=params) + algo.impute(user_def=True, params=params) algo.score(raw_data) imputation_results = algo.metrics diff --git a/build/lib/imputegap/recovery/imputation.py b/build/lib/imputegap/recovery/imputation.py index c7187d2..0d9ef79 100644 --- a/build/lib/imputegap/recovery/imputation.py +++ b/build/lib/imputegap/recovery/imputation.py @@ -22,9 +22,9 @@ class BaseImputer: ------- impute(params=None): Abstract method to perform the imputation. - score(raw_matrix, imputed_matrix=None): + score(input_data, recov_data=None): Compute metrics for the imputed time series. - _check_params(user_defined, params): + _check_params(user_def, params): Check and format parameters for imputation. _optimize(parameters={}): Optimize hyperparameters for the imputation algorithm. @@ -32,17 +32,17 @@ class BaseImputer: algorithm = "" # Class variable to hold the algorithm name logs = True - def __init__(self, infected_matrix): + def __init__(self, incomp_data): """ Initialize the BaseImputer with an infected time series matrix. Parameters ---------- - infected_matrix : numpy.ndarray + incomp_data : numpy.ndarray Matrix used during the imputation of the time series. """ - self.infected_matrix = infected_matrix - self.imputed_matrix = None + self.incomp_data = incomp_data + self.recov_data = None self.metrics = None self.parameters = None @@ -62,33 +62,33 @@ def impute(self, params=None): """ raise NotImplementedError("This method should be overridden by subclasses") - def score(self, raw_matrix, imputed_matrix=None): + def score(self, input_data, recov_data=None): """ Compute evaluation metrics for the imputed time series. Parameters ---------- - raw_matrix : numpy.ndarray + input_data : numpy.ndarray The original time series without contamination. - imputed_matrix : numpy.ndarray, optional + recov_data : numpy.ndarray, optional The imputed time series (default is None). Returns ------- None """ - if self.imputed_matrix is None: - self.imputed_matrix = imputed_matrix + if self.recov_data is None: + self.recov_data = recov_data - self.metrics = Evaluation(raw_matrix, self.imputed_matrix, self.infected_matrix).metrics_computation() + self.metrics = Evaluation(input_data, self.recov_data, self.incomp_data).compute_all_metrics() - def _check_params(self, user_defined, params): + def _check_params(self, user_def, params): """ Format the parameters for optimization or imputation. Parameters ---------- - user_defined : bool + user_def : bool Whether the parameters are user-defined or not. params : dict or list List or dictionary of parameters. @@ -100,7 +100,7 @@ def _check_params(self, user_defined, params): """ if params is not None: - if not user_defined: + if not user_def: self._optimize(params) if isinstance(self.parameters, dict): @@ -133,7 +133,7 @@ def _optimize(self, parameters={}): Parameters ---------- parameters : dict - Dictionary containing optimization configurations such as ground_truth, optimizer, and options. + Dictionary containing optimization configurations such as input_data, optimizer, and options. Returns ------- @@ -141,9 +141,9 @@ def _optimize(self, parameters={}): """ from imputegap.recovery.optimization import Optimization - raw_data = parameters.get('ground_truth') - if raw_data is None: - raise ValueError(f"Need ground_truth to be able to adapt the hyper-parameters: {raw_data}") + input_data = parameters.get('input_data') + if input_data is None: + raise ValueError(f"Need input_data to be able to adapt the hyper-parameters: {input_data}") optimizer = parameters.get('optimizer', "bayesian") defaults = utils.load_parameters(query="default", algorithm=optimizer) @@ -157,13 +157,13 @@ def _optimize(self, parameters={}): n_calls = options.get('n_calls', n_calls_d) random_starts = options.get('n_random_starts', n_random_starts_d) func = options.get('acq_func', acq_func_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) bo_optimizer = Optimization.Bayesian() - optimal_params, _ = bo_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, + optimal_params, _ = bo_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, n_calls=n_calls, n_random_starts=random_starts, @@ -179,13 +179,13 @@ def _optimize(self, parameters={}): w = options.get('w', w_d) iterations = options.get('iterations', iterations_d) n_processes = options.get('n_processes', n_processes_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) swarm_optimizer = Optimization.ParticleSwarm() - optimal_params, _ = swarm_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, algorithm=self.algorithm, + optimal_params, _ = swarm_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, n_particles=n_particles, c1=c1, c2=c2, w=w, iterations=iterations, n_processes=n_processes) @@ -197,13 +197,13 @@ def _optimize(self, parameters={}): num_configs = options.get('num_configs', num_configs_d) num_iterations = options.get('num_iterations', num_iterations_d) reduction_factor = options.get('reduction_factor', reduction_factor_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) sh_optimizer = Optimization.SuccessiveHalving() - optimal_params, _ = sh_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, algorithm=self.algorithm, + optimal_params, _ = sh_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, num_configs=num_configs, num_iterations=num_iterations, reduction_factor=reduction_factor) @@ -212,13 +212,13 @@ def _optimize(self, parameters={}): options = parameters.get('options', {}) n_calls = options.get('n_calls', n_calls_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) go_optimizer = Optimization.Greedy() - optimal_params, _ = go_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, algorithm=self.algorithm, + optimal_params, _ = go_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, n_calls=n_calls) self.parameters = optimal_params @@ -230,19 +230,19 @@ class Imputation: Methods ------- - evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec"): + evaluate_params(input_data, incomp_data, configuration, algorithm="cdrec"): Evaluate imputation performance using given parameters and algorithm. """ - def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec"): + def evaluate_params(input_data, incomp_data, configuration, algorithm="cdrec"): """ Evaluate various metrics for given parameters and imputation algorithm. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The original time series without contamination. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The time series with contamination. configuration : tuple Tuple of the configuration of the algorithm. @@ -260,9 +260,9 @@ def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec if algorithm == 'cdrec': rank, epsilon, iterations = configuration - algo = Imputation.MatrixCompletion.CDRec(contamination) + algo = Imputation.MatrixCompletion.CDRec(incomp_data) algo.logs = False - algo.impute(user_defined=True, params={"rank": rank, "epsilon": epsilon, "iterations": iterations}) + algo.impute(user_def=True, params={"rank": rank, "epsilon": epsilon, "iterations": iterations}) elif algorithm == 'iim': if not isinstance(configuration, list): @@ -270,30 +270,30 @@ def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec learning_neighbours = configuration[0] alg_code = "iim " + re.sub(r'[\W_]', '', str(learning_neighbours)) - algo = Imputation.Statistics.IIM(contamination) + algo = Imputation.Statistics.IIM(incomp_data) algo.logs = False - algo.impute(user_defined=True, params={"learning_neighbours": learning_neighbours, "alg_code": alg_code}) + algo.impute(user_def=True, params={"learning_neighbours": learning_neighbours, "alg_code": alg_code}) elif algorithm == 'mrnn': hidden_dim, learning_rate, iterations = configuration - algo = Imputation.DeepLearning.MRNN(contamination) + algo = Imputation.DeepLearning.MRNN(incomp_data) algo.logs = False - algo.impute(user_defined=True, + algo.impute(user_def=True, params={"hidden_dim": hidden_dim, "learning_rate": learning_rate, "iterations": iterations, "seq_length": 7}) elif algorithm == 'stmvl': window_size, gamma, alpha = configuration - algo = Imputation.PatternSearch.STMVL(contamination) + algo = Imputation.PatternSearch.STMVL(incomp_data) algo.logs = False - algo.impute(user_defined=True, params={"window_size": window_size, "gamma": gamma, "alpha": alpha}) + algo.impute(user_def=True, params={"window_size": window_size, "gamma": gamma, "alpha": alpha}) else: raise ValueError(f"Invalid algorithm: {algorithm}") - algo.score(ground_truth) + algo.score(input_data) error_measures = algo.metrics return error_measures @@ -334,9 +334,9 @@ def impute(self, params=None): Returns ------- self : ZeroImpute - The object with `imputed_matrix` set. + The object with `recov_data` set. """ - self.imputed_matrix = zero_impute(self.infected_matrix, params) + self.recov_data = zero_impute(self.incomp_data, params) return self @@ -364,9 +364,9 @@ def impute(self, params=None): Returns ------- self : MinImpute - The object with `imputed_matrix` set. + The object with `recov_data` set. """ - self.imputed_matrix = min_impute(self.infected_matrix, params) + self.recov_data = min_impute(self.incomp_data, params) return self @@ -394,9 +394,9 @@ def impute(self, params=None): Returns ------- self : MinImpute - The object with `imputed_matrix` set. + The object with `recov_data` set. """ - self.imputed_matrix = mean_impute(self.infected_matrix, params) + self.recov_data = mean_impute(self.incomp_data, params) return self @@ -406,18 +406,18 @@ class IIM(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the IIM algorithm. """ algorithm = "iim" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the IIM algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the IIM algorithm, if None, default ones are loaded. @@ -430,15 +430,15 @@ def impute(self, user_defined=True, params=None): Returns ------- self : IIM - The object with `imputed_matrix` set. + The object with `recov_data` set. Example ------- - >>> iim_imputer = Imputation.Statistics.IIM(infected_matrix) + >>> iim_imputer = Imputation.Statistics.IIM(incomp_data) >>> iim_imputer.impute() # default parameters for imputation > or - >>> iim_imputer.impute(user_defined=True, params={'learning_neighbors': 10}) # user-defined > or - >>> iim_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = iim_imputer.imputed_matrix + >>> iim_imputer.impute(user_def=True, params={'learning_neighbors': 10}) # user-defined > or + >>> iim_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = iim_imputer.recov_data References ---------- @@ -446,12 +446,12 @@ def impute(self, user_defined=True, params=None): keywords: {Data models;Adaptation models;Computational modeling;Predictive models;Numerical models;Aggregates;Regression tree analysis;Missing values;Data imputation} """ if params is not None: - learning_neighbours, algo_code = self._check_params(user_defined, params) + learning_neighbours, algo_code = self._check_params(user_def, params) else: learning_neighbours, algo_code = utils.load_parameters(query="default", algorithm=self.algorithm) - self.imputed_matrix = iim(contamination=self.infected_matrix, number_neighbor=learning_neighbours, - algo_code=algo_code, logs=self.logs) + self.recov_data = iim(incomp_data=self.incomp_data, number_neighbor=learning_neighbours, + algo_code=algo_code, logs=self.logs) return self @@ -471,19 +471,19 @@ class CDRec(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the CDRec algorithm. """ algorithm = "cdrec" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the CDRec algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the CDRec algorithm or Auto-ML configuration, if None, default ones are loaded. @@ -499,7 +499,7 @@ def impute(self, user_defined=True, params=None): **Auto-ML parameters:** - - ground_truth : numpy.ndarray + - input_data : numpy.ndarray The original time series dataset without contamination. - optimizer : str The optimizer to use for parameter optimization. Valid values are "bayesian", "greedy", "pso", or "sh". @@ -510,7 +510,7 @@ def impute(self, user_defined=True, params=None): - n_calls : int, optional Number of calls to the objective function. Default is 3. - - selected_metrics : list, optional + - metrics : list, optional List of selected metrics to consider for optimization. Default is ["RMSE"]. - n_random_starts : int, optional Number of initial calls to the objective function, from random points. Default is 50. @@ -521,7 +521,7 @@ def impute(self, user_defined=True, params=None): - n_calls : int, optional Number of calls to the objective function. Default is 3. - - selected_metrics : list, optional + - metrics : list, optional List of selected metrics to consider for optimization. Default is ["RMSE"]. **PSO:** @@ -551,15 +551,15 @@ def impute(self, user_defined=True, params=None): Returns ------- self : CDRec - CDRec object with `imputed_matrix` set. + CDRec object with `recov_data` set. Example ------- - >>> cdrec_imputer = Imputation.MatrixCompletion.CDRec(infected_matrix) + >>> cdrec_imputer = Imputation.MatrixCompletion.CDRec(incomp_data) >>> cdrec_imputer.impute() # default parameters for imputation > or - >>> cdrec_imputer.impute(user_defined=True, params={'rank': 5, 'epsilon': 0.01, 'iterations': 100}) # user-defined > or - >>> cdrec_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = cdrec_imputer.imputed_matrix + >>> cdrec_imputer.impute(user_def=True, params={'rank': 5, 'epsilon': 0.01, 'iterations': 100}) # user-defined > or + >>> cdrec_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = cdrec_imputer.recov_data References ---------- @@ -567,12 +567,12 @@ def impute(self, user_defined=True, params=None): """ if params is not None: - rank, epsilon, iterations = self._check_params(user_defined, params) + rank, epsilon, iterations = self._check_params(user_def, params) else: rank, epsilon, iterations = utils.load_parameters(query="default", algorithm=self.algorithm) - self.imputed_matrix = cdrec(contamination=self.infected_matrix, truncation_rank=rank, - iterations=iterations, epsilon=epsilon, logs=self.logs) + self.recov_data = cdrec(incomp_data=self.incomp_data, truncation_rank=rank, + iterations=iterations, epsilon=epsilon, logs=self.logs) return self @@ -593,18 +593,18 @@ class MRNN(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the MRNN algorithm. """ algorithm = "mrnn" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the MRNN algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the MRNN algorithm, if None, default ones are loaded. @@ -621,29 +621,29 @@ def impute(self, user_defined=True, params=None): Returns ------- self : MRNN - The object with `imputed_matrix` set. + The object with `recov_data` set. Example ------- - >>> mrnn_imputer = Imputation.DeepLearning.MRNN(infected_matrix) + >>> mrnn_imputer = Imputation.DeepLearning.MRNN(incomp_data) >>> mrnn_imputer.impute() # default parameters for imputation > or - >>> mrnn_imputer.impute(user_defined=True, params={'hidden_dim': 10, 'learning_rate':0.01, 'iterations':50, 'sequence_length': 7}) # user-defined > or - >>> mrnn_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = mrnn_imputer.imputed_matrix + >>> mrnn_imputer.impute(user_def=True, params={'hidden_dim': 10, 'learning_rate':0.01, 'iterations':50, 'sequence_length': 7}) # user-defined > or + >>> mrnn_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = mrnn_imputer.recov_data References ---------- J. Yoon, W. R. Zame and M. van der Schaar, "Estimating Missing Data in Temporal Data Streams Using Multi-Directional Recurrent Neural Networks," in IEEE Transactions on Biomedical Engineering, vol. 66, no. 5, pp. 1477-1490, May 2019, doi: 10.1109/TBME.2018.2874712. keywords: {Time measurement;Interpolation;Estimation;Medical diagnostic imaging;Correlation;Recurrent neural networks;Biomedical measurement;Missing data;temporal data streams;imputation;recurrent neural nets} """ if params is not None: - hidden_dim, learning_rate, iterations, sequence_length = self._check_params(user_defined, params) + hidden_dim, learning_rate, iterations, sequence_length = self._check_params(user_def, params) else: hidden_dim, learning_rate, iterations, sequence_length = utils.load_parameters(query="default", algorithm="mrnn") - self.imputed_matrix = mrnn(contamination=self.infected_matrix, hidden_dim=hidden_dim, - learning_rate=learning_rate, iterations=iterations, - sequence_length=sequence_length, logs=self.logs) + self.recov_data = mrnn(incomp_data=self.incomp_data, hidden_dim=hidden_dim, + learning_rate=learning_rate, iterations=iterations, + sequence_length=sequence_length, logs=self.logs) return self @@ -663,18 +663,18 @@ class STMVL(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the STMVL algorithm. """ algorithm = "stmvl" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the STMVL algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the STMVL algorithm, if None, default ones are loaded. @@ -689,15 +689,15 @@ def impute(self, user_defined=True, params=None): Returns ------- self : STMVL - The object with `imputed_matrix` set. + The object with `recov_data` set. Example ------- - >>> stmvl_imputer = Imputation.PatternSearch.STMVL(infected_matrix) + >>> stmvl_imputer = Imputation.PatternSearch.STMVL(incomp_data) >>> stmvl_imputer.impute() # default parameters for imputation > or - >>> stmvl_imputer.impute(user_defined=True, params={'window_size': 7, 'learning_rate':0.01, 'gamma':0.85, 'alpha': 7}) # user-defined > or - >>> stmvl_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = stmvl_imputer.imputed_matrix + >>> stmvl_imputer.impute(user_def=True, params={'window_size': 7, 'learning_rate':0.01, 'gamma':0.85, 'alpha': 7}) # user-defined > or + >>> stmvl_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = stmvl_imputer.recov_data References ---------- @@ -705,12 +705,12 @@ def impute(self, user_defined=True, params=None): School of Information Science and Technology, Southwest Jiaotong University; Microsoft Research; Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences. """ if params is not None: - window_size, gamma, alpha = self._check_params(user_defined, params) + window_size, gamma, alpha = self._check_params(user_def, params) else: window_size, gamma, alpha = utils.load_parameters(query="default", algorithm="stmvl") - self.imputed_matrix = stmvl(contamination=self.infected_matrix, window_size=window_size, gamma=gamma, - alpha=alpha, logs=self.logs) + self.recov_data = stmvl(incomp_data=self.incomp_data, window_size=window_size, gamma=gamma, + alpha=alpha, logs=self.logs) return self diff --git a/build/lib/imputegap/recovery/manager.py b/build/lib/imputegap/recovery/manager.py index 92437b2..0334334 100644 --- a/build/lib/imputegap/recovery/manager.py +++ b/build/lib/imputegap/recovery/manager.py @@ -1,3 +1,4 @@ +import datetime import os import time import numpy as np @@ -50,12 +51,11 @@ class TimeSeries: normalize(normalizer="z_score") : Normalizes the time series dataset. - plot(raw_data, infected_data=None, imputed_data=None, title="Time Series Data", - max_series=None, max_values=None, size=(16, 8), save_path="", display=True) : + plot(input_data, incomp_data=None, recov_data=None, max_series=None, max_values=None, size=(16, 8), save_path="", display=True) : Plots the time series data, including raw, contaminated, or imputed data. - Contaminate : - Class containing methods to contaminate time series data with missing values based on different scenarios. + Contamination : + Class containing methods to contaminate time series data with missing values based on different patterns. """ @@ -286,26 +286,23 @@ def normalize(self, normalizer="z_score"): print(f"\n\t\t> logs, normalization {normalizer} - Execution Time: {(end_time - start_time):.4f} seconds\n") - def plot(self, raw_data, infected_data=None, imputed_data=None, title="Time Series Data", max_series=None, - max_values=None, series_x=None, size=(16, 8), save_path="", display=True): + def plot(self, input_data, incomp_data=None, recov_data=None, max_series=None, max_values=None, series_range=None, size=(16, 8), save_path="", display=True): """ Plot the time series data, including raw, contaminated, or imputed data. Parameters ---------- - raw_data : numpy.ndarray + input_data : numpy.ndarray The original time series data without contamination. - infected_data : numpy.ndarray, optional + incomp_data : numpy.ndarray, optional The contaminated time series data. - imputed_data : numpy.ndarray, optional + recov_data : numpy.ndarray, optional The imputed time series data. - title : str, optional - Title of the plot. Default is "Time Series Data". max_series : int, optional The maximum number of series to plot. max_values : int, optional The maximum number of values per series to plot. - series_x : int, optional + series_range : int, optional The index of a specific series to plot. If set, only this series will be plotted. size : tuple, optional Size of the plot in inches. Default is (16, 8). @@ -324,42 +321,42 @@ def plot(self, raw_data, infected_data=None, imputed_data=None, title="Time Seri plt.grid(True, linestyle='--', color='#d3d3d3', linewidth=0.6) if max_series is None: - max_series, _ = raw_data.shape + max_series, _ = input_data.shape if max_values is None: - _, max_values = raw_data.shape + _, max_values = input_data.shape - if raw_data is not None: + if input_data is not None: colors = utils.load_parameters("default", algorithm="colors") # Determine range of series to plot - series_indices = [series_x] if series_x is not None else range(raw_data.shape[0]) + series_indices = [series_range] if series_range is not None else range(input_data.shape[0]) for i in series_indices: color = colors[i % len(colors)] - if infected_data is None and imputed_data is None: # plot only raw matrix - plt.plot(np.arange(min(raw_data.shape[1], max_values)), raw_data[i, :max_values], linewidth=2.5, + if incomp_data is None and recov_data is None: # plot only raw matrix + plt.plot(np.arange(min(input_data.shape[1], max_values)), input_data[i, :max_values], linewidth=2.5, color=color, linestyle='-', label=f'TS {i + 1}') - if infected_data is not None and imputed_data is None: # plot infected matrix - if np.isnan(infected_data[i, :]).any(): - plt.plot(np.arange(min(raw_data.shape[1], max_values)), raw_data[i, :max_values], linewidth=1.5, + if incomp_data is not None and recov_data is None: # plot infected matrix + if np.isnan(incomp_data[i, :]).any(): + plt.plot(np.arange(min(input_data.shape[1], max_values)), input_data[i, :max_values], linewidth=1.5, color='r', linestyle='--', label=f'TS-MB {i + 1}') - plt.plot(np.arange(min(infected_data.shape[1], max_values)), infected_data[i, :max_values], + plt.plot(np.arange(min(incomp_data.shape[1], max_values)), incomp_data[i, :max_values], color=color, linewidth=2.5, linestyle='-', label=f'TS-RAW {i + 1}') - if imputed_data is not None: # plot imputed matrix - if np.isnan(infected_data[i, :]).any(): - plt.plot(np.arange(min(imputed_data.shape[1], max_values)), imputed_data[i, :max_values], + if recov_data is not None: # plot imputed matrix + if np.isnan(incomp_data[i, :]).any(): + plt.plot(np.arange(min(recov_data.shape[1], max_values)), recov_data[i, :max_values], linestyle='-', color="r", label=f'TS-IMP {i + 1}') - if np.isnan(infected_data[i, :]).any(): - plt.plot(np.arange(min(raw_data.shape[1], max_values)), raw_data[i, :max_values], linewidth=1.5, + if np.isnan(incomp_data[i, :]).any(): + plt.plot(np.arange(min(input_data.shape[1], max_values)), input_data[i, :max_values], linewidth=1.5, linestyle='--', color=color, label=f'TS-MB {i + 1}') - plt.plot(np.arange(min(infected_data.shape[1], max_values)), infected_data[i, :max_values], + plt.plot(np.arange(min(incomp_data.shape[1], max_values)), incomp_data[i, :max_values], color=color, linewidth=2.5, linestyle='-', label=f'TS-RAW {i + 1}') number_of_series += 1 @@ -368,7 +365,6 @@ def plot(self, raw_data, infected_data=None, imputed_data=None, title="Time Seri plt.xlabel('Timestamp') plt.ylabel('Values') - plt.title(title) plt.legend( loc='upper left', fontsize=12, @@ -382,54 +378,53 @@ def plot(self, raw_data, infected_data=None, imputed_data=None, title="Time Seri file_path = None if save_path: os.makedirs(save_path, exist_ok=True) - file_path = os.path.join(save_path + "/" + title.replace(" ", "") + "_plot.jpg") + + now = datetime.datetime.now() + current_time = now.strftime("%y_%m_%d_%H_%M_%S") + + file_path = os.path.join(save_path + "/" + current_time + "_plot.jpg") plt.savefig(file_path, bbox_inches='tight') print("plots saved in ", file_path) if display: plt.show() - # plt.close() - return file_path - class Contaminate: + class Contamination: """ - Inner class to apply contamination scenarios to the time series data. + Inner class to apply contamination patterns to the time series data. Methods ------- - mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0.1, use_seed=True, seed=42, explainer=False) : + mcar(ts, series_rate=0.2, missing_rate=0.2, block_size=10, offset=0.1, seed=True, explainer=False) : Apply Missing Completely at Random (MCAR) contamination to the time series data. - missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1) : + missing_percentage(ts, series_rate=0.2, missing_rate=0.2, offset=0.1) : Apply missing percentage contamination to the time series data. - blackout(ts, missing_rate=0.2, protection=0.1) : + blackout(ts, missing_rate=0.2, offset=0.1) : Apply blackout contamination to the time series data. """ - def mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0.1, use_seed=True, seed=42, - explainer=False): + def mcar(input_data, series_rate=0.2, missing_rate=0.2, block_size=10, offset=0.1, seed=True, explainer=False): """ Apply Missing Completely at Random (MCAR) contamination to the time series data. Parameters ---------- - ts : numpy.ndarray + input_data : numpy.ndarray The time series dataset to contaminate. - series_impacted : float, optional + series_rate : float, optional Percentage of series to contaminate (default is 0.2). missing_rate : float, optional Percentage of missing values per series (default is 0.2). block_size : int, optional Size of the block of missing data (default is 10). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the series (default is 0.1). - use_seed : bool, optional + seed : bool, optional Whether to use a seed for reproducibility (default is True). - seed : int, optional - The value of the seed (default is 42). explainer : bool, optional Whether to apply MCAR to specific series for explanation purposes (default is False). @@ -439,37 +434,38 @@ def mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0. The contaminated time series data. """ - if use_seed: - np.random.seed(seed) + if seed: + seed_value = 42 + np.random.seed(seed_value) - ts_contaminated = ts.copy() + ts_contaminated = input_data.copy() M, _ = ts_contaminated.shape if not explainer: # use random series missing_rate = utils.verification_limitation(missing_rate) - series_impacted = utils.verification_limitation(series_impacted) - protection = utils.verification_limitation(protection) + series_rate = utils.verification_limitation(series_rate) + offset = utils.verification_limitation(offset) - nbr_series_impacted = int(np.ceil(M * series_impacted)) + nbr_series_impacted = int(np.ceil(M * series_rate)) series_selected = [str(idx) for idx in np.random.choice(M, nbr_series_impacted, replace=False)] else: # use fix series - series_selected = [str(series_impacted)] + series_selected = [str(series_rate)] if not explainer: print("\n\nMCAR contamination has been called with :" - "\n\ta number of series impacted ", series_impacted * 100, "%", + "\n\ta number of series impacted ", series_rate * 100, "%", "\n\ta missing rate of ", missing_rate * 100, "%", - "\n\ta starting position at ", protection, + "\n\ta starting position at ", offset, "\n\ta block size of ", block_size, - "\n\twith a seed option set to ", use_seed, + "\n\twith a seed option set to ", seed, "\n\tshape of the set ", ts_contaminated.shape, "\n\tthis selection of series", *series_selected, "\n\n") for series in series_selected: S = int(series) N = len(ts_contaminated[S]) # number of values in the series - P = int(N * protection) # values to protect in the beginning of the series + P = int(N * offset) # values to protect in the beginning of the series W = int((N - P) * missing_rate) # number of data to remove B = int(W / block_size) # number of block to remove @@ -498,19 +494,19 @@ def mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0. return ts_contaminated - def missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1): + def missing_percentage(input_data, series_rate=0.2, missing_rate=0.2, offset=0.1): """ Apply missing percentage contamination to the time series data. Parameters ---------- - ts : numpy.ndarray + input_data : numpy.ndarray The time series dataset to contaminate. - series_impacted : float, optional + series_rate : float, optional Percentage of series to contaminate (default is 0.2). missing_rate : float, optional Percentage of missing values per series (default is 0.2). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the series (default is 0.1). Returns @@ -519,26 +515,26 @@ def missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1 The contaminated time series data. """ - ts_contaminated = ts.copy() + ts_contaminated = input_data.copy() M, _ = ts_contaminated.shape missing_rate = utils.verification_limitation(missing_rate) - series_impacted = utils.verification_limitation(series_impacted) - protection = utils.verification_limitation(protection) + series_rate = utils.verification_limitation(series_rate) + offset = utils.verification_limitation(offset) - nbr_series_impacted = int(np.ceil(M * series_impacted)) + nbr_series_impacted = int(np.ceil(M * series_rate)) print("\n\nMISSING PERCENTAGE contamination has been called with :" - "\n\ta number of series impacted ", series_impacted * 100, "%", + "\n\ta number of series impacted ", series_rate * 100, "%", "\n\ta missing rate of ", missing_rate * 100, "%", - "\n\ta starting position at ", protection, + "\n\ta starting position at ", offset, "\n\tshape of the set ", ts_contaminated.shape, "\n\tthis selection of series 0 to ", nbr_series_impacted, "\n\n") for series in range(0, nbr_series_impacted): S = int(series) N = len(ts_contaminated[S]) # number of values in the series - P = int(N * protection) # values to protect in the beginning of the series + P = int(N * offset) # values to protect in the beginning of the series W = int((N - P) * missing_rate) # number of data to remove for to_remove in range(0, W): @@ -547,17 +543,17 @@ def missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1 return ts_contaminated - def blackout(ts, missing_rate=0.2, protection=0.1): + def blackout(input_data, missing_rate=0.2, offset=0.1): """ Apply blackout contamination to the time series data. Parameters ---------- - ts : numpy.ndarray + input_data : numpy.ndarray The time series dataset to contaminate. missing_rate : float, optional Percentage of missing values per series (default is 0.2). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the series (default is 0.1). Returns @@ -565,5 +561,5 @@ def blackout(ts, missing_rate=0.2, protection=0.1): numpy.ndarray The contaminated time series data. """ - return TimeSeries.Contaminate.missing_percentage(ts, series_impacted=1, missing_rate=missing_rate, - protection=protection) + return TimeSeries.Contamination.missing_percentage(input_data, series_rate=1, missing_rate=missing_rate, + offset=offset) diff --git a/build/lib/imputegap/recovery/optimization.py b/build/lib/imputegap/recovery/optimization.py index 9bf549d..bb212c5 100644 --- a/build/lib/imputegap/recovery/optimization.py +++ b/build/lib/imputegap/recovery/optimization.py @@ -28,7 +28,7 @@ class BaseOptimizer: _objective(**kwargs): Abstract method to evaluate the imputation algorithm with the provided parameters. Must be implemented by subclasses. - optimize(ground_truth, contamination, selected_metrics, algorithm, **kwargs): + optimize(input_data, incomp_data, metrics, algorithm, **kwargs): Abstract method for the main optimization process. Must be implemented by subclasses. """ @@ -50,13 +50,13 @@ def _objective(self, **kwargs): ---------- **kwargs : dict Parameters needed to evaluate the imputation algorithm, such as: - - ground_truth : numpy.ndarray + - input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray The contaminated time series dataset to impute. - algorithm : str The imputation algorithm name. - - selected_metrics : list of str + - metrics : list of str List of selected metrics for optimization. - params : dict or list Parameter values for the optimization. @@ -68,7 +68,7 @@ def _objective(self, **kwargs): """ raise NotImplementedError("Subclasses must implement the _objective method") - def optimize(self, ground_truth, contamination, selected_metrics, algorithm, **kwargs): + def optimize(self, input_data, incomp_data, metrics, algorithm, **kwargs): """ Abstract method for optimization. Must be implemented in subclasses. @@ -78,11 +78,11 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, **k Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. algorithm : str The imputation algorithm to optimize. @@ -106,16 +106,16 @@ class Optimization: Methods ------- - Greedy.optimize(ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=250): + Greedy.optimize(input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=250): Perform greedy optimization for hyperparameters. - Bayesian.optimize(ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=100, n_random_starts=50, acq_func='gp_hedge'): + Bayesian.optimize(input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=100, n_random_starts=50, acq_func='gp_hedge'): Perform Bayesian optimization for hyperparameters. - ParticleSwarm.optimize(ground_truth, contamination, selected_metrics, algorithm, n_particles, c1, c2, w, iterations, n_processes): + ParticleSwarm.optimize(input_data, incomp_data, metrics, algorithm, n_particles, c1, c2, w, iterations, n_processes): Perform Particle Swarm Optimization (PSO) for hyperparameters. - SuccessiveHalving.optimize(ground_truth, contamination, selected_metrics, algorithm, num_configs, num_iterations, reduction_factor): + SuccessiveHalving.optimize(input_data, incomp_data, metrics, algorithm, num_configs, num_iterations, reduction_factor): Perform Successive Halving optimization for hyperparameters. """ @@ -124,19 +124,19 @@ class Greedy(BaseOptimizer): Greedy optimization strategy for hyperparameters. """ - def _objective(self, ground_truth, contamination, algorithm, selected_metrics, params): + def _objective(self, input_data, incomp_data, algorithm, metrics, params): """ Objective function for Greedy optimization. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. algorithm : str The imputation algorithm name. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. params : dict The parameters for the imputation algorithm. @@ -146,24 +146,24 @@ def _objective(self, ground_truth, contamination, algorithm, selected_metrics, p float Mean error for the selected metrics. """ - errors = Imputation.evaluate_params(ground_truth, contamination, params, algorithm) + errors = Imputation.evaluate_params(input_data, incomp_data, params, algorithm) - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] - return np.mean([errors[metric] for metric in selected_metrics]) + return np.mean([errors[metric] for metric in metrics]) - def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=250): + def optimize(self, input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=250): """ Perform greedy optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -202,7 +202,7 @@ def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algor params_dict = {name: value for name, value in zip(param_names, params)} # Calculate the score for the current set of parameters - score = self._objective(ground_truth, contamination, algorithm, selected_metrics, params_dict) + score = self._objective(input_data, incomp_data, algorithm, metrics, params_dict) # Update the best parameters if the current score is better if score < best_score: @@ -222,19 +222,19 @@ class Bayesian(BaseOptimizer): Bayesian optimization strategy for hyperparameters. """ - def _objective(self, ground_truth, contamination, algorithm, selected_metrics, params): + def _objective(self, input_data, incomp_data, algorithm, metrics, params): """ Objective function for Bayesian optimization. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. algorithm : str The imputation algorithm name. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. params : dict Parameter values for the optimization. @@ -250,24 +250,24 @@ def _objective(self, ground_truth, contamination, algorithm, selected_metrics, p else: param_values = tuple(params) - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] - errors = Imputation.evaluate_params(ground_truth, contamination, param_values, algorithm) - return np.mean([errors[metric] for metric in selected_metrics]) + errors = Imputation.evaluate_params(input_data, incomp_data, param_values, algorithm) + return np.mean([errors[metric] for metric in metrics]) - def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=100, + def optimize(self, input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=100, n_random_starts=50, acq_func='gp_hedge'): """ Perform Bayesian optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -287,9 +287,9 @@ def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algor search_spaces = SEARCH_SPACES - # Adjust the search space for 'cdrec' based on obfuscated_matrix + # Adjust the search space for 'cdrec' based on incomp_data if algorithm == 'cdrec': - max_rank = contamination.shape[1] - 1 + max_rank = incomp_data.shape[1] - 1 SEARCH_SPACES['cdrec'][0] = Integer(0, min(9, max_rank), name='rank') # Update the rank range # Define the search space @@ -299,7 +299,7 @@ def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algor optimizer = skopt.Optimizer(dimensions=space, n_initial_points=n_random_starts, acq_func=acq_func) for i in range(n_calls): suggested_params = optimizer.ask() - score = self._objective(ground_truth, contamination, algorithm, selected_metrics, suggested_params) + score = self._objective(input_data, incomp_data, algorithm, metrics, suggested_params) optimizer.tell(suggested_params, score) # Optimal parameters @@ -343,19 +343,19 @@ def _format_params(self, particle_params, algorithm): return particle_params - def _objective(self, ground_truth, contamination, algorithm, selected_metrics, params): + def _objective(self, input_data, incomp_data, algorithm, metrics, params): """ Objective function for Particle Swarm Optimization. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. algorithm : str The imputation algorithm name. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. params : numpy.ndarray Parameter values for the optimization. @@ -373,22 +373,22 @@ def _objective(self, ground_truth, contamination, algorithm, selected_metrics, p for i in range(n_particles): # Iterate over each particle particle_params = self._format_params(params[i], algorithm) # Get the parameters for this particle - errors = Imputation.evaluate_params(ground_truth, contamination, tuple(particle_params), algorithm) - errors_for_all_particles[i] = np.mean([errors[metric] for metric in selected_metrics]) + errors = Imputation.evaluate_params(input_data, incomp_data, tuple(particle_params), algorithm) + errors_for_all_particles[i] = np.mean([errors[metric] for metric in metrics]) return errors_for_all_particles - def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_particles, c1, c2, w, iterations, + def optimize(self, input_data, incomp_data, metrics, algorithm, n_particles, c1, c2, w, iterations, n_processes): """ Perform Particle Swarm Optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -412,14 +412,14 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_p """ start_time = time.time() # Record start time - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] # Define the search space search_space = SEARCH_SPACES_PSO if algorithm == 'cdrec': - max_rank = contamination.shape[1] - 1 + max_rank = incomp_data.shape[1] - 1 search_space['cdrec'][0] = (search_space['cdrec'][0][0], min(search_space['cdrec'][0][1], max_rank)) # Select the correct search space based on the algorithm @@ -434,7 +434,7 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_p options={'c1': c1, 'c2': c2, 'w': w}, bounds=bounds) # Perform optimization - objective_with_args = partial(self._objective, ground_truth, contamination, algorithm, selected_metrics) + objective_with_args = partial(self._objective, input_data, incomp_data, algorithm, metrics) cost, pos = optimizer.optimize(objective_with_args, iters=iterations, n_processes=n_processes) param_names = PARAM_NAMES @@ -450,7 +450,7 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_p class SuccessiveHalving(BaseOptimizer): - def _objective(self, errors_dict, selected_metrics): + def _objective(self, errors_dict, metrics): """ Objective function for Successive Halving optimization. @@ -458,7 +458,7 @@ def _objective(self, errors_dict, selected_metrics): ---------- errors_dict : dict Dictionary containing error metrics. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. Returns @@ -466,21 +466,21 @@ def _objective(self, errors_dict, selected_metrics): float Mean error for the selected metrics. """ - selected_errors = [errors_dict[metric] for metric in selected_metrics] + selected_errors = [errors_dict[metric] for metric in metrics] return np.mean(selected_errors) - def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num_configs, num_iterations, + def optimize(self, input_data, incomp_data, metrics, algorithm, num_configs, num_iterations, reduction_factor): """ Perform Successive Halving optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -498,18 +498,18 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num """ start_time = time.time() # Record start time - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] # Define the parameter names for each algorithm param_names = PARAM_NAMES - data_length = len(ground_truth) + data_length = len(input_data) chunk_size = data_length // num_iterations # prepare configurations for each algorithm separately if algorithm == 'cdrec': - max_rank = contamination.shape[1] - 1 + max_rank = incomp_data.shape[1] - 1 temp_rank_range = [i for i in sh_params.CDREC_RANK_RANGE if i < max_rank] if not temp_rank_range: @@ -535,12 +535,12 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num for i in range(num_iterations): # Calculate how much data to use in this iteration end_idx = (i + 1) * chunk_size - partial_ground_truth = ground_truth[:end_idx] - partial_obfuscated = contamination[:end_idx] + partial_input_data = input_data[:end_idx] + partial_obfuscated = incomp_data[:end_idx] scores = [self._objective( - Imputation.evaluate_params(partial_ground_truth, partial_obfuscated, config, algorithm), - selected_metrics) for config in configs] + Imputation.evaluate_params(partial_input_data, partial_obfuscated, config, algorithm), + metrics) for config in configs] top_configs_idx = np.argsort(scores)[:max(1, len(configs) // reduction_factor)] configs = [configs[i] for i in top_configs_idx] @@ -552,14 +552,14 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num if algorithm == 'iim': best_config = min(configs, key=lambda single_config: self._objective( - Imputation.evaluate_params(ground_truth, contamination, [single_config], algorithm), - selected_metrics)) + Imputation.evaluate_params(input_data, incomp_data, [single_config], algorithm), + metrics)) else: best_config = min(configs, key=lambda config: self._objective( - Imputation.evaluate_params(ground_truth, contamination, config, algorithm), selected_metrics)) + Imputation.evaluate_params(input_data, incomp_data, config, algorithm), metrics)) best_score = self._objective( - Imputation.evaluate_params(ground_truth, contamination, best_config, algorithm), selected_metrics) + Imputation.evaluate_params(input_data, incomp_data, best_config, algorithm), metrics) # Check the size of param_names[algorithm] if len(param_names[algorithm]) == 1: diff --git a/build/lib/imputegap/report.log b/build/lib/imputegap/report.log index c845a08..142fb64 100644 --- a/build/lib/imputegap/report.log +++ b/build/lib/imputegap/report.log @@ -1,2 +1,14 @@ 2024-11-05 13:35:23,270 - pyswarms.single.global_best - INFO - Optimize for 2 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9} 2024-11-05 13:35:25,844 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.30811485946437683, best pos: [8.02117616e+00 4.16577767e-02 4.57882822e+02] +2025-01-06 15:35:47,715 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:35:48,117 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:35:48,272 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:35:48,417 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:29,357 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:30,118 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:30,554 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:31,014 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:01,844 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:02,387 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:02,642 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:02,826 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. diff --git a/build/lib/imputegap/reports/benchmarking_rmse.jpg b/build/lib/imputegap/reports/benchmarking_rmse.jpg index 0a14d94..75b3ed7 100644 Binary files a/build/lib/imputegap/reports/benchmarking_rmse.jpg and b/build/lib/imputegap/reports/benchmarking_rmse.jpg differ diff --git a/imputegap/runner_benchmarking.py b/build/lib/imputegap/runner_benchmark.py similarity index 98% rename from imputegap/runner_benchmarking.py rename to build/lib/imputegap/runner_benchmark.py index 53aaaf1..3ac76aa 100644 --- a/imputegap/runner_benchmarking.py +++ b/build/lib/imputegap/runner_benchmark.py @@ -1,39 +1,45 @@ -from imputegap.recovery.benchmarking import Benchmarking +from imputegap.recovery.benchmark import Benchmark -reconstruction = True -matrix = True +reconstruction = False +matrix = False datasets_full = ["eeg-alcohol", "eeg-reading", "fmri-objectviewing", "fmri-stoptask", "chlorine", "drift"] +dataset_test = ["eeg-alcohol"] -opti_bayesian = {"optimizer": "bayesian", "options": {"n_calls": 15, "n_random_starts": 50, "acq_func": "gp_hedge", "selected_metrics": "RMSE"}} -opti_greedy = {"optimizer": "greedy", "options": {"n_calls": 250, "selected_metrics": "RMSE"}} -opti_pso = {"optimizer": "pso", "options": {"n_particles": 50, "iterations": 10, "selected_metrics": "RMSE"}} -opti_sh = {"optimizer": "sh", "options": {"num_configs": 10, "num_iterations": 5, "selected_metrics": "RMSE"}} +opti_bayesian = {"optimizer": "bayesian", "options": {"n_calls": 15, "n_random_starts": 50, "acq_func": "gp_hedge", "metrics": "RMSE"}} +opti_greedy = {"optimizer": "greedy", "options": {"n_calls": 250, "metrics": "RMSE"}} +opti_pso = {"optimizer": "pso", "options": {"n_particles": 50, "iterations": 10, "metrics": "RMSE"}} +opti_sh = {"optimizer": "sh", "options": {"num_configs": 10, "num_iterations": 5, "metrics": "RMSE"}} optimizers = [opti_bayesian] algorithms_full = ["mean", "cdrec", "stmvl", "iim", "mrnn"] +algorithms_test = ["mean", "cdrec", "stmvl"] -scenarios_small = ["mcar"] -scenarios_full = ["mcar", "missing_percentage"] +patterns_small = ["mcar"] +patterns_full = ["mcar", "missing_percentage"] x_axis = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8] if not reconstruction: - results = Benchmarking().comprehensive_evaluation(datasets=datasets_full, optimizers=optimizers, algorithms=algorithms_full, scenarios=scenarios_small, x_axis=x_axis, already_optimized=False, reports=3) - print("\n\n\nresults:", results) + runs_results, avg_scores_list = Benchmark().eval(algorithms=algorithms_test, datasets=dataset_test, patterns=patterns_small, x_axis=x_axis, optimizers=optimizers, save_dir="test_naterq", runs=3) + print("\n\n\nresults:", runs_results) + elif reconstruction and not matrix: test_plots = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001201629638671875, 'optimization': 0, 'imputation': 0.0005724430084228516}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001814126968383789, 'optimization': 0, 'imputation': 0.0004563331604003906}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.005623817443847656, 'optimization': 0, 'imputation': 0.0004363059997558594}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03413963317871094, 'optimization': 0, 'imputation': 0.0005552768707275391}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13074183464050293, 'optimization': 0, 'imputation': 0.0005936622619628906}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.4494190216064453, 'optimization': 0, 'imputation': 0.0005834102630615234}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19555801767314038, 'MAE': 0.14379634965165344, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770377315860114}, 'times': {'contamination': 0.0010943412780761719, 'optimization': 1.6249148845672607, 'imputation': 0.09233546257019043}}, '0.1': {'scores': {'RMSE': 0.22211329096601584, 'MAE': 0.13682609208383795, 'MI': 1.225240202380491, 'CORRELATION': 0.9627754587047338}, 'times': {'contamination': 0.005602359771728516, 'optimization': 1.6249148845672607, 'imputation': 0.1246938705444336}}, '0.2': {'scores': {'RMSE': 0.26890140517000855, 'MAE': 0.16983555417798818, 'MI': 1.0650037012869458, 'CORRELATION': 0.945331872005451}, 'times': {'contamination': 0.017725229263305664, 'optimization': 1.6249148845672607, 'imputation': 0.1363234519958496}}, '0.4': {'scores': {'RMSE': 0.3143181342292365, 'MAE': 0.2041263696093189, 'MI': 0.9133456774887369, 'CORRELATION': 0.9309636417166443}, 'times': {'contamination': 0.1031486988067627, 'optimization': 1.6249148845672607, 'imputation': 0.2686195373535156}}, '0.6': {'scores': {'RMSE': 0.37514780116434926, 'MAE': 0.22156474038385332, 'MI': 0.7775541845220788, 'CORRELATION': 0.9078517283026865}, 'times': {'contamination': 0.20231366157531738, 'optimization': 1.6249148845672607, 'imputation': 0.8690693378448486}}, '0.8': {'scores': {'RMSE': 0.9117409046445515, 'MAE': 0.4801132374733116, 'MI': 0.2576488533530952, 'CORRELATION': 0.6589813814462316}, 'times': {'contamination': 0.5354366302490234, 'optimization': 1.6249148845672607, 'imputation': 2.865450143814087}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.3033328648259709, 'MAE': 0.2644983508914945, 'MI': 1.2263963519649825, 'CORRELATION': 0.9611641055318173}, 'times': {'contamination': 0.0029397010803222656, 'optimization': 500.0222601890564, 'imputation': 23.88236165046692}}, '0.1': {'scores': {'RMSE': 0.27434099749552526, 'MAE': 0.22744969879475732, 'MI': 1.0873378350271077, 'CORRELATION': 0.9481608575454046}, 'times': {'contamination': 0.001943349838256836, 'optimization': 500.0222601890564, 'imputation': 24.082878351211548}}, '0.2': {'scores': {'RMSE': 0.3354154243946063, 'MAE': 0.2667902544729111, 'MI': 0.9040935528948765, 'CORRELATION': 0.9224394175345223}, 'times': {'contamination': 0.007236480712890625, 'optimization': 500.0222601890564, 'imputation': 27.05676031112671}}, '0.4': {'scores': {'RMSE': 0.3663147584695216, 'MAE': 0.2683992893683706, 'MI': 0.7945562213511235, 'CORRELATION': 0.9086873163095024}, 'times': {'contamination': 0.03319692611694336, 'optimization': 500.0222601890564, 'imputation': 24.969536066055298}}, '0.6': {'scores': {'RMSE': 0.49178356901493514, 'MAE': 0.3590429489696727, 'MI': 0.568068131156551, 'CORRELATION': 0.8240735290572155}, 'times': {'contamination': 0.13401484489440918, 'optimization': 500.0222601890564, 'imputation': 17.722254991531372}}, '0.8': {'scores': {'RMSE': 5.286373452119497, 'MAE': 3.0120315981628085, 'MI': 0.0877803352414065, 'CORRELATION': 0.4417418016734377}, 'times': {'contamination': 0.46097803115844727, 'optimization': 500.0222601890564, 'imputation': 17.994383335113525}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.2246776140243064, 'MAE': 0.16265112492381306, 'MI': 1.0875116207955637, 'CORRELATION': 0.9694504836799154}, 'times': {'contamination': 0.0009558200836181641, 'optimization': 4871.80725812912, 'imputation': 1.680412769317627}}, '0.1': {'scores': {'RMSE': 0.3034580006710775, 'MAE': 0.20388299260278156, 'MI': 1.0526306210784155, 'CORRELATION': 0.9337303655141744}, 'times': {'contamination': 0.0018503665924072266, 'optimization': 4871.80725812912, 'imputation': 10.345388412475586}}, '0.2': {'scores': {'RMSE': 0.4104578379330223, 'MAE': 0.2785159738696005, 'MI': 0.7986686024303655, 'CORRELATION': 0.8658822456465257}, 'times': {'contamination': 0.0055084228515625, 'optimization': 4871.80725812912, 'imputation': 65.17643117904663}}, '0.4': {'scores': {'RMSE': 0.4911437971846393, 'MAE': 0.32455728476996504, 'MI': 0.6429014104572732, 'CORRELATION': 0.8180219110130202}, 'times': {'contamination': 0.032411813735961914, 'optimization': 4871.80725812912, 'imputation': 474.7696805000305}}, '0.6': {'scores': {'RMSE': 0.579715388344659, 'MAE': 0.4144431747763777, 'MI': 0.45413696197432313, 'CORRELATION': 0.7431519134806602}, 'times': {'contamination': 0.1278684139251709, 'optimization': 4871.80725812912, 'imputation': 1531.380850315094}}, '0.8': {'scores': {'RMSE': 0.8100585330320411, 'MAE': 0.6124983237048439, 'MI': 0.1600984202902365, 'CORRELATION': 0.48808679305097513}, 'times': {'contamination': 0.4592604637145996, 'optimization': 4871.80725812912, 'imputation': 3588.4590351581573}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0889986961845628, 'MAE': 0.8825193440526788, 'MI': 0.569311657025473, 'CORRELATION': 0.006110871130276294}, 'times': {'contamination': 0.0009238719940185547, 'optimization': 474.33066391944885, 'imputation': 37.89777088165283}}, '0.1': {'scores': {'RMSE': 0.8750845974360951, 'MAE': 0.7897191908914645, 'MI': 0.36542131337202255, 'CORRELATION': 0.1776164808833599}, 'times': {'contamination': 0.0020151138305664062, 'optimization': 474.33066391944885, 'imputation': 36.68788194656372}}, '0.2': {'scores': {'RMSE': 1.3935692458593014, 'MAE': 1.1278169009994172, 'MI': 0.23278876704617288, 'CORRELATION': -0.0043224216288866475}, 'times': {'contamination': 0.006083011627197266, 'optimization': 474.33066391944885, 'imputation': 34.238656997680664}}, '0.4': {'scores': {'RMSE': 1.2198343626008104, 'MAE': 1.004323747843723, 'MI': 0.11694146418635429, 'CORRELATION': -2.8855554502904036e-05}, 'times': {'contamination': 0.03404045104980469, 'optimization': 474.33066391944885, 'imputation': 37.132654428482056}}, '0.6': {'scores': {'RMSE': 1.1924360263528335, 'MAE': 0.9838535398356899, 'MI': 0.0794767096848362, 'CORRELATION': -0.06570944989748748}, 'times': {'contamination': 0.1405935287475586, 'optimization': 474.33066391944885, 'imputation': 37.741902351379395}}, '0.8': {'scores': {'RMSE': 1.3728850685938416, 'MAE': 1.1227443270722774, 'MI': 0.08611037233596197, 'CORRELATION': -0.012424819834313067}, 'times': {'contamination': 0.47881627082824707, 'optimization': 474.33066391944885, 'imputation': 37.675835847854614}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="50", v="1000") + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") + Benchmark().generate_reports_txt(runs_plots_scores=test_plots, save_dir="./test_naterq", dataset="chlorine", run=0) + Benchmark().generate_reports_excel(runs_plots_scores=test_plots, save_dir="./test_naterq", dataset="chlorine", run=0) + """ test_plots = {'eeg_reading': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6937095315379215, 'MAE': 0.5871322524124026, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013728141784667969, 'optimization': 0, 'imputation': 0.0005629062652587891}}, '0.1': {'scores': {'RMSE': 0.8825047928812179, 'MAE': 0.7058469910884912, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0016565322875976562, 'optimization': 0, 'imputation': 0.00047278404235839844}}, '0.2': {'scores': {'RMSE': 1.0076040625030085, 'MAE': 0.8133998806656898, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00404810905456543, 'optimization': 0, 'imputation': 0.00045371055603027344}}, '0.4': {'scores': {'RMSE': 1.014301846668858, 'MAE': 0.8219008090987252, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03703451156616211, 'optimization': 0, 'imputation': 0.0006351470947265625}}, '0.6': {'scores': {'RMSE': 1.0158383459630567, 'MAE': 0.8210620770500036, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.11827206611633301, 'optimization': 0, 'imputation': 0.000629425048828125}}, '0.8': {'scores': {'RMSE': 1.01877327240803, 'MAE': 0.8157442592731639, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.39914441108703613, 'optimization': 0, 'imputation': 0.0005762577056884766}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6092168096877171, 'MAE': 0.43725473329243575, 'MI': 0.8113862111415893, 'CORRELATION': 0.6669628813645995}, 'times': {'contamination': 0.0009872913360595703, 'optimization': -0.284501314163208, 'imputation': 0.19295310974121094}}, '0.1': {'scores': {'RMSE': 0.7694804794390454, 'MAE': 0.46934521855095135, 'MI': 0.6303931303314629, 'CORRELATION': 0.6338704662419556}, 'times': {'contamination': 0.004288911819458008, 'optimization': -0.284501314163208, 'imputation': 0.23847365379333496}}, '0.2': {'scores': {'RMSE': 0.54163559631001, 'MAE': 0.3838909357504076, 'MI': 0.6804417798137956, 'CORRELATION': 0.8550799708158655}, 'times': {'contamination': 0.01486515998840332, 'optimization': -0.284501314163208, 'imputation': 0.4856741428375244}}, '0.4': {'scores': {'RMSE': 0.6150678993354384, 'MAE': 0.3994113839683473, 'MI': 0.5964930437182837, 'CORRELATION': 0.8282842809048951}, 'times': {'contamination': 0.10318613052368164, 'optimization': -0.284501314163208, 'imputation': 0.5878500938415527}}, '0.6': {'scores': {'RMSE': 0.8559878849846194, 'MAE': 0.4875679606049892, 'MI': 0.4352238530939769, 'CORRELATION': 0.7114520144242487}, 'times': {'contamination': 0.1801285743713379, 'optimization': -0.284501314163208, 'imputation': 2.260394811630249}}, '0.8': {'scores': {'RMSE': 1.0028418021086185, 'MAE': 0.6478458585388304, 'MI': 0.26800404550676565, 'CORRELATION': 0.6191696179492259}, 'times': {'contamination': 0.45122456550598145, 'optimization': -0.284501314163208, 'imputation': 2.1127378940582275}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.38913260498789515, 'MAE': 0.28887981808629887, 'MI': 0.9003693740232723, 'CORRELATION': 0.8305251080007574}, 'times': {'contamination': 0.004096508026123047, 'optimization': 474.1963918209076, 'imputation': 28.061330318450928}}, '0.1': {'scores': {'RMSE': 0.42262809349896036, 'MAE': 0.31228067649415225, 'MI': 0.8330304029808546, 'CORRELATION': 0.8802327685495391}, 'times': {'contamination': 0.0014801025390625, 'optimization': 474.1963918209076, 'imputation': 28.245431184768677}}, '0.2': {'scores': {'RMSE': 0.4299898931605415, 'MAE': 0.2914674774962624, 'MI': 0.8303895697315763, 'CORRELATION': 0.9049819009058613}, 'times': {'contamination': 0.0040132999420166016, 'optimization': 474.1963918209076, 'imputation': 30.743361473083496}}, '0.4': {'scores': {'RMSE': 0.4658583297277367, 'MAE': 0.32456738916683475, 'MI': 0.7105191885562022, 'CORRELATION': 0.8900070341144635}, 'times': {'contamination': 0.02961254119873047, 'optimization': 474.1963918209076, 'imputation': 29.556389808654785}}, '0.6': {'scores': {'RMSE': 0.5970596677005412, 'MAE': 0.40317626348969443, 'MI': 0.5057637077329502, 'CORRELATION': 0.8092444114848254}, 'times': {'contamination': 0.10307097434997559, 'optimization': 474.1963918209076, 'imputation': 20.913992404937744}}, '0.8': {'scores': {'RMSE': 4.099584545523784, 'MAE': 1.4360755142687804, 'MI': 0.03924813725195477, 'CORRELATION': 0.21658071586750138}, 'times': {'contamination': 0.38839101791381836, 'optimization': 474.1963918209076, 'imputation': 18.921329736709595}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.738070963229811, 'MAE': 0.5586987523761138, 'MI': 0.9549505679325584, 'CORRELATION': 0.4719896373208298}, 'times': {'contamination': 0.00078582763671875, 'optimization': 3513.147577047348, 'imputation': 1.0417201519012451}}, '0.1': {'scores': {'RMSE': 0.6776044115374216, 'MAE': 0.4883939650690726, 'MI': 0.6051652352756725, 'CORRELATION': 0.7008457537827716}, 'times': {'contamination': 0.001641988754272461, 'optimization': 3513.147577047348, 'imputation': 6.6430745124816895}}, '0.2': {'scores': {'RMSE': 0.642538776211307, 'MAE': 0.45068800736093795, 'MI': 0.5847752699836343, 'CORRELATION': 0.7870826275047371}, 'times': {'contamination': 0.003993034362792969, 'optimization': 3513.147577047348, 'imputation': 43.09042835235596}}, '0.4': {'scores': {'RMSE': 0.595402838774376, 'MAE': 0.4200046319465559, 'MI': 0.5780737300771779, 'CORRELATION': 0.8157719741321808}, 'times': {'contamination': 0.037882328033447266, 'optimization': 3513.147577047348, 'imputation': 325.3523244857788}}, '0.6': {'scores': {'RMSE': 0.6457758226280373, 'MAE': 0.465851861042097, 'MI': 0.4940897071221384, 'CORRELATION': 0.7797841684978442}, 'times': {'contamination': 0.1108400821685791, 'optimization': 3513.147577047348, 'imputation': 1001.1619775295258}}, '0.8': {'scores': {'RMSE': 0.7031022809975706, 'MAE': 0.5292159877681492, 'MI': 0.3802525627714059, 'CORRELATION': 0.7224487387493247}, 'times': {'contamination': 0.3698101043701172, 'optimization': 3513.147577047348, 'imputation': 2408.869615316391}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.347580032956689, 'MAE': 1.1601095334550815, 'MI': 0.1586603634624117, 'CORRELATION': -0.18399931939875533}, 'times': {'contamination': 0.0010280609130859375, 'optimization': 294.0501501560211, 'imputation': 24.799844980239868}}, '0.1': {'scores': {'RMSE': 1.4429520609208166, 'MAE': 1.1748671084301718, 'MI': 0.21482702111483185, 'CORRELATION': -0.3608701962716392}, 'times': {'contamination': 0.0016400814056396484, 'optimization': 294.0501501560211, 'imputation': 27.284573793411255}}, '0.2': {'scores': {'RMSE': 1.2876145426625936, 'MAE': 1.0269096650749077, 'MI': 0.07484247431469719, 'CORRELATION': -0.007533643863897717}, 'times': {'contamination': 0.004055500030517578, 'optimization': 294.0501501560211, 'imputation': 26.624001264572144}}, '0.4': {'scores': {'RMSE': 1.4608458720939472, 'MAE': 1.1594757078481346, 'MI': 0.03787981276790102, 'CORRELATION': 0.011173417358467087}, 'times': {'contamination': 0.03002309799194336, 'optimization': 294.0501501560211, 'imputation': 27.17277193069458}}, '0.6': {'scores': {'RMSE': 1.634424595829425, 'MAE': 1.3356121929070988, 'MI': 0.02225643037919471, 'CORRELATION': -0.02299352560191792}, 'times': {'contamination': 0.10050559043884277, 'optimization': 294.0501501560211, 'imputation': 27.999096632003784}}, '0.8': {'scores': {'RMSE': 1.3047427885621508, 'MAE': 1.05104242568594, 'MI': 0.01007462604941533, 'CORRELATION': -0.004969975534923902}, 'times': {'contamination': 0.3871951103210449, 'optimization': 294.0501501560211, 'imputation': 27.538389205932617}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="33", v="1201") + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") test_plots = {'eeg_alcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.008088350296020508, 'optimization': 0, 'imputation': 0.0003597736358642578}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013017654418945312, 'optimization': 0, 'imputation': 0.00038313865661621094}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017611980438232422, 'optimization': 0, 'imputation': 0.00021719932556152344}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.012012958526611328, 'optimization': 0, 'imputation': 0.0003046989440917969}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04199981689453125, 'optimization': 0, 'imputation': 0.000354766845703125}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.11134958267211914, 'optimization': 0, 'imputation': 0.00029206275939941406}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.0006604194641113281, 'optimization': 1.5429341793060303, 'imputation': 0.054087162017822266}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.0025806427001953125, 'optimization': 1.5429341793060303, 'imputation': 0.07224416732788086}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.0056035518646240234, 'optimization': 1.5429341793060303, 'imputation': 0.039177656173706055}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.03652334213256836, 'optimization': 1.5429341793060303, 'imputation': 0.0999898910522461}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.10041642189025879, 'optimization': 1.5429341793060303, 'imputation': 0.1369919776916504}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.17512726783752441, 'optimization': 1.5429341793060303, 'imputation': 0.38109540939331055}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7434750032306926, 'MAE': 0.5711687107703531, 'MI': 1.0614546580642759, 'CORRELATION': 0.7570103181096193}, 'times': {'contamination': 0.001224517822265625, 'optimization': 25.973577737808228, 'imputation': 1.827949047088623}}, '0.1': {'scores': {'RMSE': 0.6079049353979786, 'MAE': 0.4565071330548986, 'MI': 0.5897845472515851, 'CORRELATION': 0.7033347467102922}, 'times': {'contamination': 0.0011165142059326172, 'optimization': 25.973577737808228, 'imputation': 1.8397388458251953}}, '0.2': {'scores': {'RMSE': 0.5938200686690087, 'MAE': 0.4583475323523134, 'MI': 0.5238356117195857, 'CORRELATION': 0.789556744168648}, 'times': {'contamination': 0.0017132759094238281, 'optimization': 25.973577737808228, 'imputation': 1.8568992614746094}}, '0.4': {'scores': {'RMSE': 0.6922622994445695, 'MAE': 0.5327565871766037, 'MI': 0.3842117779328253, 'CORRELATION': 0.738304743934084}, 'times': {'contamination': 0.009068012237548828, 'optimization': 25.973577737808228, 'imputation': 2.0719306468963623}}, '0.6': {'scores': {'RMSE': 0.7719376402414535, 'MAE': 0.5756544384278333, 'MI': 0.268745121385816, 'CORRELATION': 0.6398387148302656}, 'times': {'contamination': 0.02822709083557129, 'optimization': 25.973577737808228, 'imputation': 1.5673530101776123}}, '0.8': {'scores': {'RMSE': 1.0218833589128922, 'MAE': 0.8012134667654269, 'MI': 0.0051679642909252645, 'CORRELATION': 0.06083718960882358}, 'times': {'contamination': 0.09586524963378906, 'optimization': 25.973577737808228, 'imputation': 1.7056498527526855}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.26665906759668434, 'MAE': 0.21589657916392105, 'MI': 1.4930024107375521, 'CORRELATION': 0.9704001503125854}, 'times': {'contamination': 0.0005829334259033203, 'optimization': 563.972785949707, 'imputation': 0.05102872848510742}}, '0.1': {'scores': {'RMSE': 0.28425094570125403, 'MAE': 0.22787684897303442, 'MI': 1.0594854362146846, 'CORRELATION': 0.9444192673990515}, 'times': {'contamination': 0.0008502006530761719, 'optimization': 563.972785949707, 'imputation': 0.2425684928894043}}, '0.2': {'scores': {'RMSE': 0.334887339804727, 'MAE': 0.25851830743811066, 'MI': 0.9711245925356778, 'CORRELATION': 0.9390073163681255}, 'times': {'contamination': 0.001627206802368164, 'optimization': 563.972785949707, 'imputation': 1.4222276210784912}}, '0.4': {'scores': {'RMSE': 0.4719169787140248, 'MAE': 0.35026878431372477, 'MI': 0.7196112128770917, 'CORRELATION': 0.8858920655062363}, 'times': {'contamination': 0.008496761322021484, 'optimization': 563.972785949707, 'imputation': 12.960479974746704}}, '0.6': {'scores': {'RMSE': 0.47736733503847095, 'MAE': 0.35628454418236766, 'MI': 0.6157654491357567, 'CORRELATION': 0.8790867703136753}, 'times': {'contamination': 0.026967287063598633, 'optimization': 563.972785949707, 'imputation': 35.622944831848145}}, '0.8': {'scores': {'RMSE': 0.5747595088880484, 'MAE': 0.4242587159311907, 'MI': 0.4843046739917606, 'CORRELATION': 0.8188927905931169}, 'times': {'contamination': 0.08214735984802246, 'optimization': 563.972785949707, 'imputation': 87.41280603408813}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 2.5423293855369917, 'MAE': 2.2141103663578803, 'MI': 0.6072901854577394, 'CORRELATION': -0.6360397852133122}, 'times': {'contamination': 0.0005042552947998047, 'optimization': 59585.917899131775, 'imputation': 33.87153220176697}}, '0.1': {'scores': {'RMSE': 1.6013078224502717, 'MAE': 1.2201563721098412, 'MI': 0.2626533928770599, 'CORRELATION': -0.17448314526813025}, 'times': {'contamination': 0.0009407997131347656, 'optimization': 59585.917899131775, 'imputation': 33.780593156814575}}, '0.2': {'scores': {'RMSE': 1.1980243320030763, 'MAE': 0.9205437185390488, 'MI': 0.12523228756285484, 'CORRELATION': 0.053025850949979476}, 'times': {'contamination': 0.0020706653594970703, 'optimization': 59585.917899131775, 'imputation': 33.35025191307068}}, '0.4': {'scores': {'RMSE': 1.3154357320206076, 'MAE': 1.0563143800308983, 'MI': 0.04771994618237419, 'CORRELATION': 0.04644513674411651}, 'times': {'contamination': 0.009225606918334961, 'optimization': 59585.917899131775, 'imputation': 32.76318073272705}}, '0.6': {'scores': {'RMSE': 1.409745596231954, 'MAE': 1.1320098137715748, 'MI': 0.013200537946505414, 'CORRELATION': -0.051391074582830536}, 'times': {'contamination': 0.028786659240722656, 'optimization': 59585.917899131775, 'imputation': 32.95982527732849}}, '0.8': {'scores': {'RMSE': 1.3165198817323216, 'MAE': 1.0603105471734755, 'MI': 0.006831327215000855, 'CORRELATION': -0.010253125321586447}, 'times': {'contamination': 0.11717653274536133, 'optimization': 59585.917899131775, 'imputation': 31.418609857559204}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="64", v="256") + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") test_plots = {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.008000850677490234, 'optimization': 0, 'imputation': 0.0005795955657958984}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0019245147705078125, 'optimization': 0, 'imputation': 0.0005664825439453125}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009830474853515625, 'optimization': 0, 'imputation': 0.0005776882171630859}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04627418518066406, 'optimization': 0, 'imputation': 0.0005333423614501953}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.16058969497680664, 'optimization': 0, 'imputation': 0.0005693435668945312}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.49263739585876465, 'optimization': 0, 'imputation': 0.0005679130554199219}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873972, 'MAE': 0.1361979723519773, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0011115074157714844, 'optimization': 2.84889817237854, 'imputation': 0.1434638500213623}}, '0.1': {'scores': {'RMSE': 0.18152059329152107, 'MAE': 0.09925566629402763, 'MI': 1.1516089897042538, 'CORRELATION': 0.982939835222072}, 'times': {'contamination': 0.004365444183349609, 'optimization': 2.84889817237854, 'imputation': 0.14118409156799316}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.0845903269210229, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01644587516784668, 'optimization': 2.84889817237854, 'imputation': 0.16940855979919434}}, '0.4': {'scores': {'RMSE': 0.7544523683503815, 'MAE': 0.1121804997359425, 'MI': 0.021165172206064526, 'CORRELATION': 0.8141205075707254}, 'times': {'contamination': 0.10604023933410645, 'optimization': 2.84889817237854, 'imputation': 2.0186331272125244}}, '0.6': {'scores': {'RMSE': 0.4355197572001314, 'MAE': 0.13808466247330484, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122918}, 'times': {'contamination': 0.2030637264251709, 'optimization': 2.84889817237854, 'imputation': 2.0608761310577393}}, '0.8': {'scores': {'RMSE': 0.7672558930795491, 'MAE': 0.3298896842843935, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323682}, 'times': {'contamination': 0.5499897003173828, 'optimization': 2.84889817237854, 'imputation': -0.47277092933654785}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0021185874938964844, 'optimization': 514.5863847732544, 'imputation': 34.6202232837677}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.0018591880798339844, 'optimization': 514.5863847732544, 'imputation': 35.190133810043335}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.005822181701660156, 'optimization': 514.5863847732544, 'imputation': 35.46649789810181}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.03864097595214844, 'optimization': 514.5863847732544, 'imputation': 34.30042386054993}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.14191699028015137, 'optimization': 514.5863847732544, 'imputation': 29.5026593208313}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.46815061569213867, 'optimization': 514.5863847732544, 'imputation': 22.864952564239502}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0008444786071777344, 'optimization': 5050.300735235214, 'imputation': 0.6499700546264648}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019328594207763672, 'optimization': 5050.300735235214, 'imputation': 4.424615383148193}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.005669116973876953, 'optimization': 5050.300735235214, 'imputation': 28.64192819595337}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.03133583068847656, 'optimization': 5050.300735235214, 'imputation': 215.96445870399475}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.1293776035308838, 'optimization': 5050.300735235214, 'imputation': 711.7917039394379}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.45540356636047363, 'optimization': 5050.300735235214, 'imputation': 1666.3830137252808}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9458168886934889, 'MAE': 0.7087024488997395, 'MI': 0.11924522547609226, 'CORRELATION': -0.04225238590482719}, 'times': {'contamination': 0.0010085105895996094, 'optimization': 478.6599726676941, 'imputation': 41.931705474853516}}, '0.1': {'scores': {'RMSE': 1.012708832814332, 'MAE': 0.7612398956786116, 'MI': 0.125135259797581, 'CORRELATION': -0.037524204443007164}, 'times': {'contamination': 0.0019328594207763672, 'optimization': 478.6599726676941, 'imputation': 37.289856910705566}}, '0.2': {'scores': {'RMSE': 1.0293662762879399, 'MAE': 0.79543999581101, 'MI': 0.10908095436833125, 'CORRELATION': -0.03892162998680425}, 'times': {'contamination': 0.005481719970703125, 'optimization': 478.6599726676941, 'imputation': 39.732287645339966}}, '0.4': {'scores': {'RMSE': 1.08276653737942, 'MAE': 0.7324224949731254, 'MI': 0.008689250019683584, 'CORRELATION': -0.020719639766949276}, 'times': {'contamination': 0.032985687255859375, 'optimization': 478.6599726676941, 'imputation': 40.06472086906433}}, '0.6': {'scores': {'RMSE': 1.0436806660629465, 'MAE': 0.7612577768282424, 'MI': 0.011650658060022669, 'CORRELATION': -0.0069952780339244845}, 'times': {'contamination': 0.13504815101623535, 'optimization': 478.6599726676941, 'imputation': 41.86172533035278}}, '0.8': {'scores': {'RMSE': 1.0386764847922278, 'MAE': 0.7580243538074385, 'MI': 0.0035404637707733143, 'CORRELATION': -0.0010165957084160128}, 'times': {'contamination': 0.4962472915649414, 'optimization': 478.6599726676941, 'imputation': 44.58724093437195}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="50", v="1000") - + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") + """ if matrix : run_1_chlorine = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0009789466857910156, 'optimization': 0, 'imputation': 0.000560760498046875}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.002305746078491211, 'optimization': 0, 'imputation': 0.0004634857177734375}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007703065872192383, 'optimization': 0, 'imputation': 0.0004649162292480469}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.047789812088012695, 'optimization': 0, 'imputation': 0.0005023479461669922}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.19488763809204102, 'optimization': 0, 'imputation': 0.0005488395690917969}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6890411376953125, 'optimization': 0, 'imputation': 0.0005776882171630859}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19554703625817557, 'MAE': 0.1437913973228053, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770406565915004}, 'times': {'contamination': 0.0009171962738037109, 'optimization': 0, 'imputation': 0.05464982986450195}}, '0.1': {'scores': {'RMSE': 0.22212985201492597, 'MAE': 0.1368378161074427, 'MI': 1.225240202380491, 'CORRELATION': 0.9627706895400587}, 'times': {'contamination': 0.004944562911987305, 'optimization': 0, 'imputation': 0.070037841796875}}, '0.2': {'scores': {'RMSE': 0.268910630576598, 'MAE': 0.16983805083071585, 'MI': 1.0636573662919013, 'CORRELATION': 0.9453283753208437}, 'times': {'contamination': 0.01749396324157715, 'optimization': 0, 'imputation': 0.07790756225585938}}, '0.4': {'scores': {'RMSE': 0.31430310541683426, 'MAE': 0.2041005558473225, 'MI': 0.9124259582934485, 'CORRELATION': 0.9309696942537548}, 'times': {'contamination': 0.11426258087158203, 'optimization': 0, 'imputation': 0.1478443145751953}}, '0.6': {'scores': {'RMSE': 0.3737964229023613, 'MAE': 0.22131322530176772, 'MI': 0.7775995167572279, 'CORRELATION': 0.9083977308218121}, 'times': {'contamination': 0.2614400386810303, 'optimization': 0, 'imputation': 0.4230384826660156}}, '0.8': {'scores': {'RMSE': 0.9290440261799385, 'MAE': 0.4933255678502781, 'MI': 0.2021428083194056, 'CORRELATION': 0.6461059842947307}, 'times': {'contamination': 0.7493531703948975, 'optimization': 0, 'imputation': 4.412551164627075}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.16435641817881824, 'MAE': 0.13990340223545955, 'MI': 1.3785977665357232, 'CORRELATION': 0.9868224741901116}, 'times': {'contamination': 0.0036211013793945312, 'optimization': 0, 'imputation': 39.150184869766235}}, '0.1': {'scores': {'RMSE': 0.2228247553722344, 'MAE': 0.16815959364081734, 'MI': 1.2340069760129087, 'CORRELATION': 0.9623151173186535}, 'times': {'contamination': 0.002553224563598633, 'optimization': 0, 'imputation': 39.25465536117554}}, '0.2': {'scores': {'RMSE': 0.27923604567760596, 'MAE': 0.19211165697030474, 'MI': 1.0043820035861775, 'CORRELATION': 0.9430094313080399}, 'times': {'contamination': 0.008016109466552734, 'optimization': 0, 'imputation': 39.86703276634216}}, '0.4': {'scores': {'RMSE': 0.3255775619246775, 'MAE': 0.2194073917812186, 'MI': 0.8847163339667148, 'CORRELATION': 0.9259001258177321}, 'times': {'contamination': 0.04792189598083496, 'optimization': 0, 'imputation': 41.36716914176941}}, '0.6': {'scores': {'RMSE': 0.44447910257331374, 'MAE': 0.30600741310945195, 'MI': 0.6723738452451481, 'CORRELATION': 0.857466472714002}, 'times': {'contamination': 0.19208693504333496, 'optimization': 0, 'imputation': 30.92500948905945}}, '0.8': {'scores': {'RMSE': 2.9806206255800913, 'MAE': 1.530963982498524, 'MI': 0.05121884841141813, 'CORRELATION': 0.2903624430928721}, 'times': {'contamination': 0.6799006462097168, 'optimization': 0, 'imputation': 28.389225006103516}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.1560886685592231, 'MAE': 0.10320394166419149, 'MI': 1.2780123906233032, 'CORRELATION': 0.9851724611327715}, 'times': {'contamination': 0.0010797977447509766, 'optimization': 0, 'imputation': 0.8159213066101074}}, '0.1': {'scores': {'RMSE': 0.3006324748663841, 'MAE': 0.17773178955210425, 'MI': 1.2119149147233643, 'CORRELATION': 0.9321993026569703}, 'times': {'contamination': 0.0021529197692871094, 'optimization': 0, 'imputation': 5.404278039932251}}, '0.2': {'scores': {'RMSE': 0.30708253455892426, 'MAE': 0.18786443029344255, 'MI': 1.0350247745925767, 'CORRELATION': 0.9270935540980816}, 'times': {'contamination': 0.007862567901611328, 'optimization': 0, 'imputation': 39.23897194862366}}, '0.4': {'scores': {'RMSE': 0.36627844349732885, 'MAE': 0.23513471435395922, 'MI': 0.8536501396545491, 'CORRELATION': 0.9028949327632931}, 'times': {'contamination': 0.04749464988708496, 'optimization': 0, 'imputation': 291.0960524082184}}, '0.6': {'scores': {'RMSE': 0.44187263450733627, 'MAE': 0.3005295255111392, 'MI': 0.7070128664004881, 'CORRELATION': 0.8600506431175654}, 'times': {'contamination': 0.19056296348571777, 'optimization': 0, 'imputation': 961.3684046268463}}, '0.8': {'scores': {'RMSE': 0.6162987723847368, 'MAE': 0.4408568111584791, 'MI': 0.38562262881823584, 'CORRELATION': 0.7078269987710476}, 'times': {'contamination': 0.6741812229156494, 'optimization': 0, 'imputation': 2265.02947473526}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.2157597331971723, 'MAE': 1.0542417765804475, 'MI': 0.569311657025473, 'CORRELATION': -0.41037521809198385}, 'times': {'contamination': 0.0011243820190429688, 'optimization': 0, 'imputation': 18.271201610565186}}, '0.1': {'scores': {'RMSE': 1.1799455746309517, 'MAE': 1.0537900828112892, 'MI': 0.3698854611544671, 'CORRELATION': -0.30580392001607287}, 'times': {'contamination': 0.0025169849395751953, 'optimization': 0, 'imputation': 18.178789377212524}}, '0.2': {'scores': {'RMSE': 1.341883829249102, 'MAE': 1.1116623537227253, 'MI': 0.22703785144726024, 'CORRELATION': -0.13139818884461385}, 'times': {'contamination': 0.008020877838134766, 'optimization': 0, 'imputation': 18.227224111557007}}, '0.4': {'scores': {'RMSE': 1.4574773306729822, 'MAE': 1.221059892905018, 'MI': 0.1526121106442972, 'CORRELATION': -0.06171770589679702}, 'times': {'contamination': 0.04882335662841797, 'optimization': 0, 'imputation': 18.527106523513794}}, '0.6': {'scores': {'RMSE': 1.4501476980845394, 'MAE': 1.1589217747122664, 'MI': 0.08174182790842249, 'CORRELATION': -0.028201438478978574}, 'times': {'contamination': 0.19412755966186523, 'optimization': 0, 'imputation': 19.096518754959106}}, '0.8': {'scores': {'RMSE': 1.204799199247893, 'MAE': 1.002446633752256, 'MI': 0.08875526330977121, 'CORRELATION': -0.02097728376019728}, 'times': {'contamination': 0.6939215660095215, 'optimization': 0, 'imputation': 19.685445308685303}}}}}}} run_2_chlorine = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001043081283569336, 'optimization': 0, 'imputation': 0.0008816719055175781}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.002270221710205078, 'optimization': 0, 'imputation': 0.00047469139099121094}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007225513458251953, 'optimization': 0, 'imputation': 0.0004715919494628906}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04382967948913574, 'optimization': 0, 'imputation': 0.0005059242248535156}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.17531085014343262, 'optimization': 0, 'imputation': 0.0005536079406738281}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6192398071289062, 'optimization': 0, 'imputation': 0.0005943775177001953}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19554703625817557, 'MAE': 0.1437913973228053, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770406565915004}, 'times': {'contamination': 0.0009462833404541016, 'optimization': 0, 'imputation': 0.060041189193725586}}, '0.1': {'scores': {'RMSE': 0.22212985201492597, 'MAE': 0.1368378161074427, 'MI': 1.225240202380491, 'CORRELATION': 0.9627706895400587}, 'times': {'contamination': 0.004572868347167969, 'optimization': 0, 'imputation': 0.0699300765991211}}, '0.2': {'scores': {'RMSE': 0.268910630576598, 'MAE': 0.16983805083071585, 'MI': 1.0636573662919013, 'CORRELATION': 0.9453283753208437}, 'times': {'contamination': 0.016742944717407227, 'optimization': 0, 'imputation': 0.07609176635742188}}, '0.4': {'scores': {'RMSE': 0.31430310541683426, 'MAE': 0.2041005558473225, 'MI': 0.9124259582934485, 'CORRELATION': 0.9309696942537548}, 'times': {'contamination': 0.10860323905944824, 'optimization': 0, 'imputation': 0.15946102142333984}}, '0.6': {'scores': {'RMSE': 0.3737964229023613, 'MAE': 0.22131322530176772, 'MI': 0.7775995167572279, 'CORRELATION': 0.9083977308218121}, 'times': {'contamination': 0.2411816120147705, 'optimization': 0, 'imputation': 0.43070363998413086}}, '0.8': {'scores': {'RMSE': 0.9290440261799385, 'MAE': 0.4933255678502781, 'MI': 0.2021428083194056, 'CORRELATION': 0.6461059842947307}, 'times': {'contamination': 0.6789627075195312, 'optimization': 0, 'imputation': 4.46994161605835}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.16435641817881824, 'MAE': 0.13990340223545955, 'MI': 1.3785977665357232, 'CORRELATION': 0.9868224741901116}, 'times': {'contamination': 0.002485513687133789, 'optimization': 0, 'imputation': 39.508928298950195}}, '0.1': {'scores': {'RMSE': 0.2228247553722344, 'MAE': 0.16815959364081734, 'MI': 1.2340069760129087, 'CORRELATION': 0.9623151173186535}, 'times': {'contamination': 0.0023517608642578125, 'optimization': 0, 'imputation': 39.52970552444458}}, '0.2': {'scores': {'RMSE': 0.27923604567760596, 'MAE': 0.19211165697030474, 'MI': 1.0043820035861775, 'CORRELATION': 0.9430094313080399}, 'times': {'contamination': 0.007275581359863281, 'optimization': 0, 'imputation': 39.95721387863159}}, '0.4': {'scores': {'RMSE': 0.3255775619246775, 'MAE': 0.2194073917812186, 'MI': 0.8847163339667148, 'CORRELATION': 0.9259001258177321}, 'times': {'contamination': 0.042914390563964844, 'optimization': 0, 'imputation': 41.303142786026}}, '0.6': {'scores': {'RMSE': 0.44447910257331374, 'MAE': 0.30600741310945195, 'MI': 0.6723738452451481, 'CORRELATION': 0.857466472714002}, 'times': {'contamination': 0.17032194137573242, 'optimization': 0, 'imputation': 30.968651294708252}}, '0.8': {'scores': {'RMSE': 2.9806206255800913, 'MAE': 1.530963982498524, 'MI': 0.05121884841141813, 'CORRELATION': 0.2903624430928721}, 'times': {'contamination': 0.6045393943786621, 'optimization': 0, 'imputation': 28.36435556411743}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.1560886685592231, 'MAE': 0.10320394166419149, 'MI': 1.2780123906233032, 'CORRELATION': 0.9851724611327715}, 'times': {'contamination': 0.000980377197265625, 'optimization': 0, 'imputation': 0.7417066097259521}}, '0.1': {'scores': {'RMSE': 0.3006324748663841, 'MAE': 0.17773178955210425, 'MI': 1.2119149147233643, 'CORRELATION': 0.9321993026569703}, 'times': {'contamination': 0.0019462108612060547, 'optimization': 0, 'imputation': 4.773505687713623}}, '0.2': {'scores': {'RMSE': 0.30708253455892426, 'MAE': 0.18786443029344255, 'MI': 1.0350247745925767, 'CORRELATION': 0.9270935540980816}, 'times': {'contamination': 0.008093833923339844, 'optimization': 0, 'imputation': 34.58026099205017}}, '0.4': {'scores': {'RMSE': 0.36627844349732885, 'MAE': 0.23513471435395922, 'MI': 0.8536501396545491, 'CORRELATION': 0.9028949327632931}, 'times': {'contamination': 0.04369974136352539, 'optimization': 0, 'imputation': 253.98769640922546}}, '0.6': {'scores': {'RMSE': 0.44187263450733627, 'MAE': 0.3005295255111392, 'MI': 0.7070128664004881, 'CORRELATION': 0.8600506431175654}, 'times': {'contamination': 0.16975879669189453, 'optimization': 0, 'imputation': 835.3046026229858}}, '0.8': {'scores': {'RMSE': 0.6162987723847368, 'MAE': 0.4408568111584791, 'MI': 0.38562262881823584, 'CORRELATION': 0.7078269987710476}, 'times': {'contamination': 0.5958583354949951, 'optimization': 0, 'imputation': 1967.7639136314392}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0136434251178998, 'MAE': 0.8848324237744947, 'MI': 0.569311657025473, 'CORRELATION': 0.29914348963401916}, 'times': {'contamination': 0.0010654926300048828, 'optimization': 0, 'imputation': 18.329164743423462}}, '0.1': {'scores': {'RMSE': 1.2969727084789213, 'MAE': 1.096550700485976, 'MI': 0.4844113002067355, 'CORRELATION': -0.14524582877234712}, 'times': {'contamination': 0.0023641586303710938, 'optimization': 0, 'imputation': 18.21089506149292}}, '0.2': {'scores': {'RMSE': 1.0905397356299984, 'MAE': 0.8836097265712998, 'MI': 0.173773514607323, 'CORRELATION': -0.11890703333812934}, 'times': {'contamination': 0.007399082183837891, 'optimization': 0, 'imputation': 18.337430715560913}}, '0.4': {'scores': {'RMSE': 1.4069154761905174, 'MAE': 1.1643367090708647, 'MI': 0.09571825537518668, 'CORRELATION': -0.022364037624607463}, 'times': {'contamination': 0.043769121170043945, 'optimization': 0, 'imputation': 18.840161323547363}}, '0.6': {'scores': {'RMSE': 1.382829866742193, 'MAE': 1.1269958882289104, 'MI': 0.09215558384208698, 'CORRELATION': -0.032372544249182615}, 'times': {'contamination': 0.1728811264038086, 'optimization': 0, 'imputation': 19.076626300811768}}, '0.8': {'scores': {'RMSE': 1.5039750591991847, 'MAE': 1.2211771463532568, 'MI': 0.08522464337328965, 'CORRELATION': 0.002752327584939554}, 'times': {'contamination': 0.6081700325012207, 'optimization': 0, 'imputation': 19.578737258911133}}}}}}} @@ -59,9 +65,9 @@ run_2_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017461776733398438, 'optimization': 0, 'imputation': 0.001100778579711914}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0038170814514160156, 'optimization': 0, 'imputation': 0.0006277561187744141}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009348630905151367, 'optimization': 0, 'imputation': 0.0006661415100097656}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03160667419433594, 'optimization': 0, 'imputation': 0.0009412765502929688}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.07952380180358887, 'optimization': 0, 'imputation': 0.001110076904296875}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.18988037109375, 'optimization': 0, 'imputation': 0.0012199878692626953}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.00153350830078125, 'optimization': 222.17338752746582, 'imputation': 0.007252931594848633}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.00762939453125, 'optimization': 222.17338752746582, 'imputation': 0.006178379058837891}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.020302534103393555, 'optimization': 222.17338752746582, 'imputation': 0.006856203079223633}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07085132598876953, 'optimization': 222.17338752746582, 'imputation': 0.006796836853027344}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14228367805480957, 'optimization': 222.17338752746582, 'imputation': 0.006799221038818359}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.24705862998962402, 'optimization': 222.17338752746582, 'imputation': 0.005573272705078125}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.002927064895629883, 'optimization': 109.24887442588806, 'imputation': 10.111968994140625}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.0034782886505126953, 'optimization': 109.24887442588806, 'imputation': 10.12447738647461}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009154081344604492, 'optimization': 109.24887442588806, 'imputation': 10.325854778289795}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.031117677688598633, 'optimization': 109.24887442588806, 'imputation': 11.087183237075806}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07905244827270508, 'optimization': 109.24887442588806, 'imputation': 8.649941444396973}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18860864639282227, 'optimization': 109.24887442588806, 'imputation': 8.43183708190918}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0014863014221191406, 'optimization': 5088.581882238388, 'imputation': 10.688252687454224}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.0037031173706054688, 'optimization': 5088.581882238388, 'imputation': 50.06313109397888}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.00922083854675293, 'optimization': 5088.581882238388, 'imputation': 257.213321685791}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.03141498565673828, 'optimization': 5088.581882238388, 'imputation': 1488.7819337844849}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.07847213745117188, 'optimization': 5088.581882238388, 'imputation': 4525.959330558777}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18671298027038574, 'optimization': 5088.581882238388, 'imputation': 9460.7878510952}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.122220535003296, 'MAE': 0.9644508995813553, 'MI': 0.2759436355942961, 'CORRELATION': 0.09245761750327637}, 'times': {'contamination': 0.0015985965728759766, 'optimization': 4112.733412027359, 'imputation': 338.0099182128906}}, '0.1': {'scores': {'RMSE': 1.0832970108643896, 'MAE': 0.8823888940960694, 'MI': 0.0722893609050923, 'CORRELATION': -0.019930274489311815}, 'times': {'contamination': 0.0035643577575683594, 'optimization': 4112.733412027359, 'imputation': 337.6157658100128}}, '0.2': {'scores': {'RMSE': 1.0767155565632924, 'MAE': 0.8684991669552922, 'MI': 0.009245255133377466, 'CORRELATION': 0.0027516812337193518}, 'times': {'contamination': 0.009638309478759766, 'optimization': 4112.733412027359, 'imputation': 328.19135212898254}}, '0.4': {'scores': {'RMSE': 1.0934522863869605, 'MAE': 0.8840570779852788, 'MI': 0.003369568798431563, 'CORRELATION': -0.021061682051014274}, 'times': {'contamination': 0.03281116485595703, 'optimization': 4112.733412027359, 'imputation': 346.8224673271179}}, '0.6': {'scores': {'RMSE': 1.0783671319985777, 'MAE': 0.8704278560665365, 'MI': 0.00169355769499049, 'CORRELATION': -0.019325646685601}, 'times': {'contamination': 0.08042550086975098, 'optimization': 4112.733412027359, 'imputation': 340.2620213031769}}, '0.8': {'scores': {'RMSE': 1.081513280302422, 'MAE': 0.8746519908670293, 'MI': 0.0011728245783709944, 'CORRELATION': -0.016826349565356294}, 'times': {'contamination': 0.18836283683776855, 'optimization': 4112.733412027359, 'imputation': 341.4021186828613}}}}}}} run_3_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001561880111694336, 'optimization': 0, 'imputation': 0.0010650157928466797}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0035762786865234375, 'optimization': 0, 'imputation': 0.0006108283996582031}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009912252426147461, 'optimization': 0, 'imputation': 0.000682830810546875}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.033663034439086914, 'optimization': 0, 'imputation': 0.0008401870727539062}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.08425664901733398, 'optimization': 0, 'imputation': 0.0010020732879638672}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1884922981262207, 'optimization': 0, 'imputation': 0.0009903907775878906}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.0014843940734863281, 'optimization': 216.33094692230225, 'imputation': 0.006989240646362305}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.008179664611816406, 'optimization': 216.33094692230225, 'imputation': 0.0062677860260009766}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.02096843719482422, 'optimization': 216.33094692230225, 'imputation': 0.006853580474853516}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07195258140563965, 'optimization': 216.33094692230225, 'imputation': 0.00666499137878418}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14317655563354492, 'optimization': 216.33094692230225, 'imputation': 0.006315708160400391}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.2480306625366211, 'optimization': 216.33094692230225, 'imputation': 0.005487203598022461}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.0031473636627197266, 'optimization': 110.04800581932068, 'imputation': 10.122947692871094}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.003419160842895508, 'optimization': 110.04800581932068, 'imputation': 10.181205034255981}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009185314178466797, 'optimization': 110.04800581932068, 'imputation': 10.448293685913086}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.030958890914916992, 'optimization': 110.04800581932068, 'imputation': 11.198593139648438}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07897067070007324, 'optimization': 110.04800581932068, 'imputation': 8.581665992736816}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18915915489196777, 'optimization': 110.04800581932068, 'imputation': 8.440712690353394}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0015034675598144531, 'optimization': 5124.588714838028, 'imputation': 10.759928226470947}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.003936767578125, 'optimization': 5124.588714838028, 'imputation': 50.354418992996216}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.009255409240722656, 'optimization': 5124.588714838028, 'imputation': 259.3400568962097}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.0312647819519043, 'optimization': 5124.588714838028, 'imputation': 1500.3178548812866}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.07852554321289062, 'optimization': 5124.588714838028, 'imputation': 4581.28284406662}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18776154518127441, 'optimization': 5124.588714838028, 'imputation': 9590.927385091782}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.146433389804167, 'MAE': 0.9770400477715633, 'MI': 0.3372765709259859, 'CORRELATION': 0.0330859633180261}, 'times': {'contamination': 0.001608133316040039, 'optimization': 4109.78501701355, 'imputation': 347.9514887332916}}, '0.1': {'scores': {'RMSE': 1.0805589422598818, 'MAE': 0.8789487774083494, 'MI': 0.06450452519706741, 'CORRELATION': 0.0050948685955938995}, 'times': {'contamination': 0.0037963390350341797, 'optimization': 4109.78501701355, 'imputation': 342.1326117515564}}, '0.2': {'scores': {'RMSE': 1.113302451577659, 'MAE': 0.8972310309254206, 'MI': 0.013539230335286593, 'CORRELATION': -0.010746184336502297}, 'times': {'contamination': 0.010583162307739258, 'optimization': 4109.78501701355, 'imputation': 347.8061354160309}}, '0.4': {'scores': {'RMSE': 1.1059062825212693, 'MAE': 0.8920096539260874, 'MI': 0.0039427922204060845, 'CORRELATION': -0.021280076256874978}, 'times': {'contamination': 0.03199410438537598, 'optimization': 4109.78501701355, 'imputation': 351.9458327293396}}, '0.6': {'scores': {'RMSE': 1.0740866766668984, 'MAE': 0.8664850080628724, 'MI': 0.0015316126887234942, 'CORRELATION': -0.021487493774034198}, 'times': {'contamination': 0.08084416389465332, 'optimization': 4109.78501701355, 'imputation': 349.9893400669098}}, '0.8': {'scores': {'RMSE': 1.075891210325233, 'MAE': 0.8695393935351904, 'MI': 0.0011319165672490211, 'CORRELATION': -0.017885852991857847}, 'times': {'contamination': 0.19720029830932617, 'optimization': 4109.78501701355, 'imputation': 349.96222448349}}}}}}} - scores_list, algos, sets = Benchmarking().avg_results(run_1_chlorine, run_2_chlorine, run_3_chlorine, run_1_drift, run_2_drift, run_3_drift, run_1_eeg_a, run_2_eeg_a, run_3_eeg_a, run_1_eeg_r, run_2_eeg_r, run_3_eeg_r, run_1_fmri_o, run_2_fmri_o, run_3_fmri_o, run_1_fmri_s, run_2_fmri_s, run_3_fmri_s) + scores_list, algos, sets = Benchmark().avg_results(run_1_chlorine, run_2_chlorine, run_3_chlorine, run_1_drift, run_2_drift, run_3_drift, run_1_eeg_a, run_2_eeg_a, run_3_eeg_a, run_1_eeg_r, run_2_eeg_r, run_3_eeg_r, run_1_fmri_o, run_2_fmri_o, run_3_fmri_o, run_1_fmri_s, run_2_fmri_s, run_3_fmri_s) - result = Benchmarking().generate_matrix(scores_list, algos, sets) + result = Benchmark().generate_heatmap(scores_list, algos, sets) diff --git a/build/lib/imputegap/runner_benchmarking.py b/build/lib/imputegap/runner_benchmarking.py index 53aaaf1..86ce30d 100644 --- a/build/lib/imputegap/runner_benchmarking.py +++ b/build/lib/imputegap/runner_benchmarking.py @@ -1,4 +1,4 @@ -from imputegap.recovery.benchmarking import Benchmarking +from imputegap.recovery.benchmark import Benchmark reconstruction = True matrix = True @@ -19,20 +19,20 @@ x_axis = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8] if not reconstruction: - results = Benchmarking().comprehensive_evaluation(datasets=datasets_full, optimizers=optimizers, algorithms=algorithms_full, scenarios=scenarios_small, x_axis=x_axis, already_optimized=False, reports=3) + results = Benchmark().eval(datasets=datasets_full, optimizers=optimizers, algorithms=algorithms_full, patterns=scenarios_small, x_axis=x_axis, already_optimized=False, runs=3) print("\n\n\nresults:", results) elif reconstruction and not matrix: test_plots = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001201629638671875, 'optimization': 0, 'imputation': 0.0005724430084228516}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001814126968383789, 'optimization': 0, 'imputation': 0.0004563331604003906}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.005623817443847656, 'optimization': 0, 'imputation': 0.0004363059997558594}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03413963317871094, 'optimization': 0, 'imputation': 0.0005552768707275391}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13074183464050293, 'optimization': 0, 'imputation': 0.0005936622619628906}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.4494190216064453, 'optimization': 0, 'imputation': 0.0005834102630615234}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19555801767314038, 'MAE': 0.14379634965165344, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770377315860114}, 'times': {'contamination': 0.0010943412780761719, 'optimization': 1.6249148845672607, 'imputation': 0.09233546257019043}}, '0.1': {'scores': {'RMSE': 0.22211329096601584, 'MAE': 0.13682609208383795, 'MI': 1.225240202380491, 'CORRELATION': 0.9627754587047338}, 'times': {'contamination': 0.005602359771728516, 'optimization': 1.6249148845672607, 'imputation': 0.1246938705444336}}, '0.2': {'scores': {'RMSE': 0.26890140517000855, 'MAE': 0.16983555417798818, 'MI': 1.0650037012869458, 'CORRELATION': 0.945331872005451}, 'times': {'contamination': 0.017725229263305664, 'optimization': 1.6249148845672607, 'imputation': 0.1363234519958496}}, '0.4': {'scores': {'RMSE': 0.3143181342292365, 'MAE': 0.2041263696093189, 'MI': 0.9133456774887369, 'CORRELATION': 0.9309636417166443}, 'times': {'contamination': 0.1031486988067627, 'optimization': 1.6249148845672607, 'imputation': 0.2686195373535156}}, '0.6': {'scores': {'RMSE': 0.37514780116434926, 'MAE': 0.22156474038385332, 'MI': 0.7775541845220788, 'CORRELATION': 0.9078517283026865}, 'times': {'contamination': 0.20231366157531738, 'optimization': 1.6249148845672607, 'imputation': 0.8690693378448486}}, '0.8': {'scores': {'RMSE': 0.9117409046445515, 'MAE': 0.4801132374733116, 'MI': 0.2576488533530952, 'CORRELATION': 0.6589813814462316}, 'times': {'contamination': 0.5354366302490234, 'optimization': 1.6249148845672607, 'imputation': 2.865450143814087}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.3033328648259709, 'MAE': 0.2644983508914945, 'MI': 1.2263963519649825, 'CORRELATION': 0.9611641055318173}, 'times': {'contamination': 0.0029397010803222656, 'optimization': 500.0222601890564, 'imputation': 23.88236165046692}}, '0.1': {'scores': {'RMSE': 0.27434099749552526, 'MAE': 0.22744969879475732, 'MI': 1.0873378350271077, 'CORRELATION': 0.9481608575454046}, 'times': {'contamination': 0.001943349838256836, 'optimization': 500.0222601890564, 'imputation': 24.082878351211548}}, '0.2': {'scores': {'RMSE': 0.3354154243946063, 'MAE': 0.2667902544729111, 'MI': 0.9040935528948765, 'CORRELATION': 0.9224394175345223}, 'times': {'contamination': 0.007236480712890625, 'optimization': 500.0222601890564, 'imputation': 27.05676031112671}}, '0.4': {'scores': {'RMSE': 0.3663147584695216, 'MAE': 0.2683992893683706, 'MI': 0.7945562213511235, 'CORRELATION': 0.9086873163095024}, 'times': {'contamination': 0.03319692611694336, 'optimization': 500.0222601890564, 'imputation': 24.969536066055298}}, '0.6': {'scores': {'RMSE': 0.49178356901493514, 'MAE': 0.3590429489696727, 'MI': 0.568068131156551, 'CORRELATION': 0.8240735290572155}, 'times': {'contamination': 0.13401484489440918, 'optimization': 500.0222601890564, 'imputation': 17.722254991531372}}, '0.8': {'scores': {'RMSE': 5.286373452119497, 'MAE': 3.0120315981628085, 'MI': 0.0877803352414065, 'CORRELATION': 0.4417418016734377}, 'times': {'contamination': 0.46097803115844727, 'optimization': 500.0222601890564, 'imputation': 17.994383335113525}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.2246776140243064, 'MAE': 0.16265112492381306, 'MI': 1.0875116207955637, 'CORRELATION': 0.9694504836799154}, 'times': {'contamination': 0.0009558200836181641, 'optimization': 4871.80725812912, 'imputation': 1.680412769317627}}, '0.1': {'scores': {'RMSE': 0.3034580006710775, 'MAE': 0.20388299260278156, 'MI': 1.0526306210784155, 'CORRELATION': 0.9337303655141744}, 'times': {'contamination': 0.0018503665924072266, 'optimization': 4871.80725812912, 'imputation': 10.345388412475586}}, '0.2': {'scores': {'RMSE': 0.4104578379330223, 'MAE': 0.2785159738696005, 'MI': 0.7986686024303655, 'CORRELATION': 0.8658822456465257}, 'times': {'contamination': 0.0055084228515625, 'optimization': 4871.80725812912, 'imputation': 65.17643117904663}}, '0.4': {'scores': {'RMSE': 0.4911437971846393, 'MAE': 0.32455728476996504, 'MI': 0.6429014104572732, 'CORRELATION': 0.8180219110130202}, 'times': {'contamination': 0.032411813735961914, 'optimization': 4871.80725812912, 'imputation': 474.7696805000305}}, '0.6': {'scores': {'RMSE': 0.579715388344659, 'MAE': 0.4144431747763777, 'MI': 0.45413696197432313, 'CORRELATION': 0.7431519134806602}, 'times': {'contamination': 0.1278684139251709, 'optimization': 4871.80725812912, 'imputation': 1531.380850315094}}, '0.8': {'scores': {'RMSE': 0.8100585330320411, 'MAE': 0.6124983237048439, 'MI': 0.1600984202902365, 'CORRELATION': 0.48808679305097513}, 'times': {'contamination': 0.4592604637145996, 'optimization': 4871.80725812912, 'imputation': 3588.4590351581573}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0889986961845628, 'MAE': 0.8825193440526788, 'MI': 0.569311657025473, 'CORRELATION': 0.006110871130276294}, 'times': {'contamination': 0.0009238719940185547, 'optimization': 474.33066391944885, 'imputation': 37.89777088165283}}, '0.1': {'scores': {'RMSE': 0.8750845974360951, 'MAE': 0.7897191908914645, 'MI': 0.36542131337202255, 'CORRELATION': 0.1776164808833599}, 'times': {'contamination': 0.0020151138305664062, 'optimization': 474.33066391944885, 'imputation': 36.68788194656372}}, '0.2': {'scores': {'RMSE': 1.3935692458593014, 'MAE': 1.1278169009994172, 'MI': 0.23278876704617288, 'CORRELATION': -0.0043224216288866475}, 'times': {'contamination': 0.006083011627197266, 'optimization': 474.33066391944885, 'imputation': 34.238656997680664}}, '0.4': {'scores': {'RMSE': 1.2198343626008104, 'MAE': 1.004323747843723, 'MI': 0.11694146418635429, 'CORRELATION': -2.8855554502904036e-05}, 'times': {'contamination': 0.03404045104980469, 'optimization': 474.33066391944885, 'imputation': 37.132654428482056}}, '0.6': {'scores': {'RMSE': 1.1924360263528335, 'MAE': 0.9838535398356899, 'MI': 0.0794767096848362, 'CORRELATION': -0.06570944989748748}, 'times': {'contamination': 0.1405935287475586, 'optimization': 474.33066391944885, 'imputation': 37.741902351379395}}, '0.8': {'scores': {'RMSE': 1.3728850685938416, 'MAE': 1.1227443270722774, 'MI': 0.08611037233596197, 'CORRELATION': -0.012424819834313067}, 'times': {'contamination': 0.47881627082824707, 'optimization': 474.33066391944885, 'imputation': 37.675835847854614}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="50", v="1000") + Benchmark().generate_plots(runs_plots_scores=test_plots, s="50", v="1000") test_plots = {'eeg_reading': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6937095315379215, 'MAE': 0.5871322524124026, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013728141784667969, 'optimization': 0, 'imputation': 0.0005629062652587891}}, '0.1': {'scores': {'RMSE': 0.8825047928812179, 'MAE': 0.7058469910884912, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0016565322875976562, 'optimization': 0, 'imputation': 0.00047278404235839844}}, '0.2': {'scores': {'RMSE': 1.0076040625030085, 'MAE': 0.8133998806656898, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00404810905456543, 'optimization': 0, 'imputation': 0.00045371055603027344}}, '0.4': {'scores': {'RMSE': 1.014301846668858, 'MAE': 0.8219008090987252, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03703451156616211, 'optimization': 0, 'imputation': 0.0006351470947265625}}, '0.6': {'scores': {'RMSE': 1.0158383459630567, 'MAE': 0.8210620770500036, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.11827206611633301, 'optimization': 0, 'imputation': 0.000629425048828125}}, '0.8': {'scores': {'RMSE': 1.01877327240803, 'MAE': 0.8157442592731639, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.39914441108703613, 'optimization': 0, 'imputation': 0.0005762577056884766}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6092168096877171, 'MAE': 0.43725473329243575, 'MI': 0.8113862111415893, 'CORRELATION': 0.6669628813645995}, 'times': {'contamination': 0.0009872913360595703, 'optimization': -0.284501314163208, 'imputation': 0.19295310974121094}}, '0.1': {'scores': {'RMSE': 0.7694804794390454, 'MAE': 0.46934521855095135, 'MI': 0.6303931303314629, 'CORRELATION': 0.6338704662419556}, 'times': {'contamination': 0.004288911819458008, 'optimization': -0.284501314163208, 'imputation': 0.23847365379333496}}, '0.2': {'scores': {'RMSE': 0.54163559631001, 'MAE': 0.3838909357504076, 'MI': 0.6804417798137956, 'CORRELATION': 0.8550799708158655}, 'times': {'contamination': 0.01486515998840332, 'optimization': -0.284501314163208, 'imputation': 0.4856741428375244}}, '0.4': {'scores': {'RMSE': 0.6150678993354384, 'MAE': 0.3994113839683473, 'MI': 0.5964930437182837, 'CORRELATION': 0.8282842809048951}, 'times': {'contamination': 0.10318613052368164, 'optimization': -0.284501314163208, 'imputation': 0.5878500938415527}}, '0.6': {'scores': {'RMSE': 0.8559878849846194, 'MAE': 0.4875679606049892, 'MI': 0.4352238530939769, 'CORRELATION': 0.7114520144242487}, 'times': {'contamination': 0.1801285743713379, 'optimization': -0.284501314163208, 'imputation': 2.260394811630249}}, '0.8': {'scores': {'RMSE': 1.0028418021086185, 'MAE': 0.6478458585388304, 'MI': 0.26800404550676565, 'CORRELATION': 0.6191696179492259}, 'times': {'contamination': 0.45122456550598145, 'optimization': -0.284501314163208, 'imputation': 2.1127378940582275}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.38913260498789515, 'MAE': 0.28887981808629887, 'MI': 0.9003693740232723, 'CORRELATION': 0.8305251080007574}, 'times': {'contamination': 0.004096508026123047, 'optimization': 474.1963918209076, 'imputation': 28.061330318450928}}, '0.1': {'scores': {'RMSE': 0.42262809349896036, 'MAE': 0.31228067649415225, 'MI': 0.8330304029808546, 'CORRELATION': 0.8802327685495391}, 'times': {'contamination': 0.0014801025390625, 'optimization': 474.1963918209076, 'imputation': 28.245431184768677}}, '0.2': {'scores': {'RMSE': 0.4299898931605415, 'MAE': 0.2914674774962624, 'MI': 0.8303895697315763, 'CORRELATION': 0.9049819009058613}, 'times': {'contamination': 0.0040132999420166016, 'optimization': 474.1963918209076, 'imputation': 30.743361473083496}}, '0.4': {'scores': {'RMSE': 0.4658583297277367, 'MAE': 0.32456738916683475, 'MI': 0.7105191885562022, 'CORRELATION': 0.8900070341144635}, 'times': {'contamination': 0.02961254119873047, 'optimization': 474.1963918209076, 'imputation': 29.556389808654785}}, '0.6': {'scores': {'RMSE': 0.5970596677005412, 'MAE': 0.40317626348969443, 'MI': 0.5057637077329502, 'CORRELATION': 0.8092444114848254}, 'times': {'contamination': 0.10307097434997559, 'optimization': 474.1963918209076, 'imputation': 20.913992404937744}}, '0.8': {'scores': {'RMSE': 4.099584545523784, 'MAE': 1.4360755142687804, 'MI': 0.03924813725195477, 'CORRELATION': 0.21658071586750138}, 'times': {'contamination': 0.38839101791381836, 'optimization': 474.1963918209076, 'imputation': 18.921329736709595}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.738070963229811, 'MAE': 0.5586987523761138, 'MI': 0.9549505679325584, 'CORRELATION': 0.4719896373208298}, 'times': {'contamination': 0.00078582763671875, 'optimization': 3513.147577047348, 'imputation': 1.0417201519012451}}, '0.1': {'scores': {'RMSE': 0.6776044115374216, 'MAE': 0.4883939650690726, 'MI': 0.6051652352756725, 'CORRELATION': 0.7008457537827716}, 'times': {'contamination': 0.001641988754272461, 'optimization': 3513.147577047348, 'imputation': 6.6430745124816895}}, '0.2': {'scores': {'RMSE': 0.642538776211307, 'MAE': 0.45068800736093795, 'MI': 0.5847752699836343, 'CORRELATION': 0.7870826275047371}, 'times': {'contamination': 0.003993034362792969, 'optimization': 3513.147577047348, 'imputation': 43.09042835235596}}, '0.4': {'scores': {'RMSE': 0.595402838774376, 'MAE': 0.4200046319465559, 'MI': 0.5780737300771779, 'CORRELATION': 0.8157719741321808}, 'times': {'contamination': 0.037882328033447266, 'optimization': 3513.147577047348, 'imputation': 325.3523244857788}}, '0.6': {'scores': {'RMSE': 0.6457758226280373, 'MAE': 0.465851861042097, 'MI': 0.4940897071221384, 'CORRELATION': 0.7797841684978442}, 'times': {'contamination': 0.1108400821685791, 'optimization': 3513.147577047348, 'imputation': 1001.1619775295258}}, '0.8': {'scores': {'RMSE': 0.7031022809975706, 'MAE': 0.5292159877681492, 'MI': 0.3802525627714059, 'CORRELATION': 0.7224487387493247}, 'times': {'contamination': 0.3698101043701172, 'optimization': 3513.147577047348, 'imputation': 2408.869615316391}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.347580032956689, 'MAE': 1.1601095334550815, 'MI': 0.1586603634624117, 'CORRELATION': -0.18399931939875533}, 'times': {'contamination': 0.0010280609130859375, 'optimization': 294.0501501560211, 'imputation': 24.799844980239868}}, '0.1': {'scores': {'RMSE': 1.4429520609208166, 'MAE': 1.1748671084301718, 'MI': 0.21482702111483185, 'CORRELATION': -0.3608701962716392}, 'times': {'contamination': 0.0016400814056396484, 'optimization': 294.0501501560211, 'imputation': 27.284573793411255}}, '0.2': {'scores': {'RMSE': 1.2876145426625936, 'MAE': 1.0269096650749077, 'MI': 0.07484247431469719, 'CORRELATION': -0.007533643863897717}, 'times': {'contamination': 0.004055500030517578, 'optimization': 294.0501501560211, 'imputation': 26.624001264572144}}, '0.4': {'scores': {'RMSE': 1.4608458720939472, 'MAE': 1.1594757078481346, 'MI': 0.03787981276790102, 'CORRELATION': 0.011173417358467087}, 'times': {'contamination': 0.03002309799194336, 'optimization': 294.0501501560211, 'imputation': 27.17277193069458}}, '0.6': {'scores': {'RMSE': 1.634424595829425, 'MAE': 1.3356121929070988, 'MI': 0.02225643037919471, 'CORRELATION': -0.02299352560191792}, 'times': {'contamination': 0.10050559043884277, 'optimization': 294.0501501560211, 'imputation': 27.999096632003784}}, '0.8': {'scores': {'RMSE': 1.3047427885621508, 'MAE': 1.05104242568594, 'MI': 0.01007462604941533, 'CORRELATION': -0.004969975534923902}, 'times': {'contamination': 0.3871951103210449, 'optimization': 294.0501501560211, 'imputation': 27.538389205932617}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="33", v="1201") + Benchmark().generate_plots(runs_plots_scores=test_plots, s="33", v="1201") test_plots = {'eeg_alcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.008088350296020508, 'optimization': 0, 'imputation': 0.0003597736358642578}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013017654418945312, 'optimization': 0, 'imputation': 0.00038313865661621094}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017611980438232422, 'optimization': 0, 'imputation': 0.00021719932556152344}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.012012958526611328, 'optimization': 0, 'imputation': 0.0003046989440917969}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04199981689453125, 'optimization': 0, 'imputation': 0.000354766845703125}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.11134958267211914, 'optimization': 0, 'imputation': 0.00029206275939941406}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.0006604194641113281, 'optimization': 1.5429341793060303, 'imputation': 0.054087162017822266}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.0025806427001953125, 'optimization': 1.5429341793060303, 'imputation': 0.07224416732788086}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.0056035518646240234, 'optimization': 1.5429341793060303, 'imputation': 0.039177656173706055}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.03652334213256836, 'optimization': 1.5429341793060303, 'imputation': 0.0999898910522461}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.10041642189025879, 'optimization': 1.5429341793060303, 'imputation': 0.1369919776916504}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.17512726783752441, 'optimization': 1.5429341793060303, 'imputation': 0.38109540939331055}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7434750032306926, 'MAE': 0.5711687107703531, 'MI': 1.0614546580642759, 'CORRELATION': 0.7570103181096193}, 'times': {'contamination': 0.001224517822265625, 'optimization': 25.973577737808228, 'imputation': 1.827949047088623}}, '0.1': {'scores': {'RMSE': 0.6079049353979786, 'MAE': 0.4565071330548986, 'MI': 0.5897845472515851, 'CORRELATION': 0.7033347467102922}, 'times': {'contamination': 0.0011165142059326172, 'optimization': 25.973577737808228, 'imputation': 1.8397388458251953}}, '0.2': {'scores': {'RMSE': 0.5938200686690087, 'MAE': 0.4583475323523134, 'MI': 0.5238356117195857, 'CORRELATION': 0.789556744168648}, 'times': {'contamination': 0.0017132759094238281, 'optimization': 25.973577737808228, 'imputation': 1.8568992614746094}}, '0.4': {'scores': {'RMSE': 0.6922622994445695, 'MAE': 0.5327565871766037, 'MI': 0.3842117779328253, 'CORRELATION': 0.738304743934084}, 'times': {'contamination': 0.009068012237548828, 'optimization': 25.973577737808228, 'imputation': 2.0719306468963623}}, '0.6': {'scores': {'RMSE': 0.7719376402414535, 'MAE': 0.5756544384278333, 'MI': 0.268745121385816, 'CORRELATION': 0.6398387148302656}, 'times': {'contamination': 0.02822709083557129, 'optimization': 25.973577737808228, 'imputation': 1.5673530101776123}}, '0.8': {'scores': {'RMSE': 1.0218833589128922, 'MAE': 0.8012134667654269, 'MI': 0.0051679642909252645, 'CORRELATION': 0.06083718960882358}, 'times': {'contamination': 0.09586524963378906, 'optimization': 25.973577737808228, 'imputation': 1.7056498527526855}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.26665906759668434, 'MAE': 0.21589657916392105, 'MI': 1.4930024107375521, 'CORRELATION': 0.9704001503125854}, 'times': {'contamination': 0.0005829334259033203, 'optimization': 563.972785949707, 'imputation': 0.05102872848510742}}, '0.1': {'scores': {'RMSE': 0.28425094570125403, 'MAE': 0.22787684897303442, 'MI': 1.0594854362146846, 'CORRELATION': 0.9444192673990515}, 'times': {'contamination': 0.0008502006530761719, 'optimization': 563.972785949707, 'imputation': 0.2425684928894043}}, '0.2': {'scores': {'RMSE': 0.334887339804727, 'MAE': 0.25851830743811066, 'MI': 0.9711245925356778, 'CORRELATION': 0.9390073163681255}, 'times': {'contamination': 0.001627206802368164, 'optimization': 563.972785949707, 'imputation': 1.4222276210784912}}, '0.4': {'scores': {'RMSE': 0.4719169787140248, 'MAE': 0.35026878431372477, 'MI': 0.7196112128770917, 'CORRELATION': 0.8858920655062363}, 'times': {'contamination': 0.008496761322021484, 'optimization': 563.972785949707, 'imputation': 12.960479974746704}}, '0.6': {'scores': {'RMSE': 0.47736733503847095, 'MAE': 0.35628454418236766, 'MI': 0.6157654491357567, 'CORRELATION': 0.8790867703136753}, 'times': {'contamination': 0.026967287063598633, 'optimization': 563.972785949707, 'imputation': 35.622944831848145}}, '0.8': {'scores': {'RMSE': 0.5747595088880484, 'MAE': 0.4242587159311907, 'MI': 0.4843046739917606, 'CORRELATION': 0.8188927905931169}, 'times': {'contamination': 0.08214735984802246, 'optimization': 563.972785949707, 'imputation': 87.41280603408813}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 2.5423293855369917, 'MAE': 2.2141103663578803, 'MI': 0.6072901854577394, 'CORRELATION': -0.6360397852133122}, 'times': {'contamination': 0.0005042552947998047, 'optimization': 59585.917899131775, 'imputation': 33.87153220176697}}, '0.1': {'scores': {'RMSE': 1.6013078224502717, 'MAE': 1.2201563721098412, 'MI': 0.2626533928770599, 'CORRELATION': -0.17448314526813025}, 'times': {'contamination': 0.0009407997131347656, 'optimization': 59585.917899131775, 'imputation': 33.780593156814575}}, '0.2': {'scores': {'RMSE': 1.1980243320030763, 'MAE': 0.9205437185390488, 'MI': 0.12523228756285484, 'CORRELATION': 0.053025850949979476}, 'times': {'contamination': 0.0020706653594970703, 'optimization': 59585.917899131775, 'imputation': 33.35025191307068}}, '0.4': {'scores': {'RMSE': 1.3154357320206076, 'MAE': 1.0563143800308983, 'MI': 0.04771994618237419, 'CORRELATION': 0.04644513674411651}, 'times': {'contamination': 0.009225606918334961, 'optimization': 59585.917899131775, 'imputation': 32.76318073272705}}, '0.6': {'scores': {'RMSE': 1.409745596231954, 'MAE': 1.1320098137715748, 'MI': 0.013200537946505414, 'CORRELATION': -0.051391074582830536}, 'times': {'contamination': 0.028786659240722656, 'optimization': 59585.917899131775, 'imputation': 32.95982527732849}}, '0.8': {'scores': {'RMSE': 1.3165198817323216, 'MAE': 1.0603105471734755, 'MI': 0.006831327215000855, 'CORRELATION': -0.010253125321586447}, 'times': {'contamination': 0.11717653274536133, 'optimization': 59585.917899131775, 'imputation': 31.418609857559204}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="64", v="256") + Benchmark().generate_plots(runs_plots_scores=test_plots, s="64", v="256") test_plots = {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.008000850677490234, 'optimization': 0, 'imputation': 0.0005795955657958984}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0019245147705078125, 'optimization': 0, 'imputation': 0.0005664825439453125}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009830474853515625, 'optimization': 0, 'imputation': 0.0005776882171630859}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04627418518066406, 'optimization': 0, 'imputation': 0.0005333423614501953}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.16058969497680664, 'optimization': 0, 'imputation': 0.0005693435668945312}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.49263739585876465, 'optimization': 0, 'imputation': 0.0005679130554199219}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873972, 'MAE': 0.1361979723519773, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0011115074157714844, 'optimization': 2.84889817237854, 'imputation': 0.1434638500213623}}, '0.1': {'scores': {'RMSE': 0.18152059329152107, 'MAE': 0.09925566629402763, 'MI': 1.1516089897042538, 'CORRELATION': 0.982939835222072}, 'times': {'contamination': 0.004365444183349609, 'optimization': 2.84889817237854, 'imputation': 0.14118409156799316}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.0845903269210229, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01644587516784668, 'optimization': 2.84889817237854, 'imputation': 0.16940855979919434}}, '0.4': {'scores': {'RMSE': 0.7544523683503815, 'MAE': 0.1121804997359425, 'MI': 0.021165172206064526, 'CORRELATION': 0.8141205075707254}, 'times': {'contamination': 0.10604023933410645, 'optimization': 2.84889817237854, 'imputation': 2.0186331272125244}}, '0.6': {'scores': {'RMSE': 0.4355197572001314, 'MAE': 0.13808466247330484, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122918}, 'times': {'contamination': 0.2030637264251709, 'optimization': 2.84889817237854, 'imputation': 2.0608761310577393}}, '0.8': {'scores': {'RMSE': 0.7672558930795491, 'MAE': 0.3298896842843935, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323682}, 'times': {'contamination': 0.5499897003173828, 'optimization': 2.84889817237854, 'imputation': -0.47277092933654785}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0021185874938964844, 'optimization': 514.5863847732544, 'imputation': 34.6202232837677}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.0018591880798339844, 'optimization': 514.5863847732544, 'imputation': 35.190133810043335}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.005822181701660156, 'optimization': 514.5863847732544, 'imputation': 35.46649789810181}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.03864097595214844, 'optimization': 514.5863847732544, 'imputation': 34.30042386054993}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.14191699028015137, 'optimization': 514.5863847732544, 'imputation': 29.5026593208313}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.46815061569213867, 'optimization': 514.5863847732544, 'imputation': 22.864952564239502}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0008444786071777344, 'optimization': 5050.300735235214, 'imputation': 0.6499700546264648}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019328594207763672, 'optimization': 5050.300735235214, 'imputation': 4.424615383148193}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.005669116973876953, 'optimization': 5050.300735235214, 'imputation': 28.64192819595337}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.03133583068847656, 'optimization': 5050.300735235214, 'imputation': 215.96445870399475}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.1293776035308838, 'optimization': 5050.300735235214, 'imputation': 711.7917039394379}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.45540356636047363, 'optimization': 5050.300735235214, 'imputation': 1666.3830137252808}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9458168886934889, 'MAE': 0.7087024488997395, 'MI': 0.11924522547609226, 'CORRELATION': -0.04225238590482719}, 'times': {'contamination': 0.0010085105895996094, 'optimization': 478.6599726676941, 'imputation': 41.931705474853516}}, '0.1': {'scores': {'RMSE': 1.012708832814332, 'MAE': 0.7612398956786116, 'MI': 0.125135259797581, 'CORRELATION': -0.037524204443007164}, 'times': {'contamination': 0.0019328594207763672, 'optimization': 478.6599726676941, 'imputation': 37.289856910705566}}, '0.2': {'scores': {'RMSE': 1.0293662762879399, 'MAE': 0.79543999581101, 'MI': 0.10908095436833125, 'CORRELATION': -0.03892162998680425}, 'times': {'contamination': 0.005481719970703125, 'optimization': 478.6599726676941, 'imputation': 39.732287645339966}}, '0.4': {'scores': {'RMSE': 1.08276653737942, 'MAE': 0.7324224949731254, 'MI': 0.008689250019683584, 'CORRELATION': -0.020719639766949276}, 'times': {'contamination': 0.032985687255859375, 'optimization': 478.6599726676941, 'imputation': 40.06472086906433}}, '0.6': {'scores': {'RMSE': 1.0436806660629465, 'MAE': 0.7612577768282424, 'MI': 0.011650658060022669, 'CORRELATION': -0.0069952780339244845}, 'times': {'contamination': 0.13504815101623535, 'optimization': 478.6599726676941, 'imputation': 41.86172533035278}}, '0.8': {'scores': {'RMSE': 1.0386764847922278, 'MAE': 0.7580243538074385, 'MI': 0.0035404637707733143, 'CORRELATION': -0.0010165957084160128}, 'times': {'contamination': 0.4962472915649414, 'optimization': 478.6599726676941, 'imputation': 44.58724093437195}}}}}}} - Benchmarking().generate_plots(runs_plots_scores=test_plots, s="50", v="1000") + Benchmark().generate_plots(runs_plots_scores=test_plots, s="50", v="1000") if matrix : run_1_chlorine = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0009789466857910156, 'optimization': 0, 'imputation': 0.000560760498046875}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.002305746078491211, 'optimization': 0, 'imputation': 0.0004634857177734375}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007703065872192383, 'optimization': 0, 'imputation': 0.0004649162292480469}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.047789812088012695, 'optimization': 0, 'imputation': 0.0005023479461669922}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.19488763809204102, 'optimization': 0, 'imputation': 0.0005488395690917969}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6890411376953125, 'optimization': 0, 'imputation': 0.0005776882171630859}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19554703625817557, 'MAE': 0.1437913973228053, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770406565915004}, 'times': {'contamination': 0.0009171962738037109, 'optimization': 0, 'imputation': 0.05464982986450195}}, '0.1': {'scores': {'RMSE': 0.22212985201492597, 'MAE': 0.1368378161074427, 'MI': 1.225240202380491, 'CORRELATION': 0.9627706895400587}, 'times': {'contamination': 0.004944562911987305, 'optimization': 0, 'imputation': 0.070037841796875}}, '0.2': {'scores': {'RMSE': 0.268910630576598, 'MAE': 0.16983805083071585, 'MI': 1.0636573662919013, 'CORRELATION': 0.9453283753208437}, 'times': {'contamination': 0.01749396324157715, 'optimization': 0, 'imputation': 0.07790756225585938}}, '0.4': {'scores': {'RMSE': 0.31430310541683426, 'MAE': 0.2041005558473225, 'MI': 0.9124259582934485, 'CORRELATION': 0.9309696942537548}, 'times': {'contamination': 0.11426258087158203, 'optimization': 0, 'imputation': 0.1478443145751953}}, '0.6': {'scores': {'RMSE': 0.3737964229023613, 'MAE': 0.22131322530176772, 'MI': 0.7775995167572279, 'CORRELATION': 0.9083977308218121}, 'times': {'contamination': 0.2614400386810303, 'optimization': 0, 'imputation': 0.4230384826660156}}, '0.8': {'scores': {'RMSE': 0.9290440261799385, 'MAE': 0.4933255678502781, 'MI': 0.2021428083194056, 'CORRELATION': 0.6461059842947307}, 'times': {'contamination': 0.7493531703948975, 'optimization': 0, 'imputation': 4.412551164627075}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.16435641817881824, 'MAE': 0.13990340223545955, 'MI': 1.3785977665357232, 'CORRELATION': 0.9868224741901116}, 'times': {'contamination': 0.0036211013793945312, 'optimization': 0, 'imputation': 39.150184869766235}}, '0.1': {'scores': {'RMSE': 0.2228247553722344, 'MAE': 0.16815959364081734, 'MI': 1.2340069760129087, 'CORRELATION': 0.9623151173186535}, 'times': {'contamination': 0.002553224563598633, 'optimization': 0, 'imputation': 39.25465536117554}}, '0.2': {'scores': {'RMSE': 0.27923604567760596, 'MAE': 0.19211165697030474, 'MI': 1.0043820035861775, 'CORRELATION': 0.9430094313080399}, 'times': {'contamination': 0.008016109466552734, 'optimization': 0, 'imputation': 39.86703276634216}}, '0.4': {'scores': {'RMSE': 0.3255775619246775, 'MAE': 0.2194073917812186, 'MI': 0.8847163339667148, 'CORRELATION': 0.9259001258177321}, 'times': {'contamination': 0.04792189598083496, 'optimization': 0, 'imputation': 41.36716914176941}}, '0.6': {'scores': {'RMSE': 0.44447910257331374, 'MAE': 0.30600741310945195, 'MI': 0.6723738452451481, 'CORRELATION': 0.857466472714002}, 'times': {'contamination': 0.19208693504333496, 'optimization': 0, 'imputation': 30.92500948905945}}, '0.8': {'scores': {'RMSE': 2.9806206255800913, 'MAE': 1.530963982498524, 'MI': 0.05121884841141813, 'CORRELATION': 0.2903624430928721}, 'times': {'contamination': 0.6799006462097168, 'optimization': 0, 'imputation': 28.389225006103516}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.1560886685592231, 'MAE': 0.10320394166419149, 'MI': 1.2780123906233032, 'CORRELATION': 0.9851724611327715}, 'times': {'contamination': 0.0010797977447509766, 'optimization': 0, 'imputation': 0.8159213066101074}}, '0.1': {'scores': {'RMSE': 0.3006324748663841, 'MAE': 0.17773178955210425, 'MI': 1.2119149147233643, 'CORRELATION': 0.9321993026569703}, 'times': {'contamination': 0.0021529197692871094, 'optimization': 0, 'imputation': 5.404278039932251}}, '0.2': {'scores': {'RMSE': 0.30708253455892426, 'MAE': 0.18786443029344255, 'MI': 1.0350247745925767, 'CORRELATION': 0.9270935540980816}, 'times': {'contamination': 0.007862567901611328, 'optimization': 0, 'imputation': 39.23897194862366}}, '0.4': {'scores': {'RMSE': 0.36627844349732885, 'MAE': 0.23513471435395922, 'MI': 0.8536501396545491, 'CORRELATION': 0.9028949327632931}, 'times': {'contamination': 0.04749464988708496, 'optimization': 0, 'imputation': 291.0960524082184}}, '0.6': {'scores': {'RMSE': 0.44187263450733627, 'MAE': 0.3005295255111392, 'MI': 0.7070128664004881, 'CORRELATION': 0.8600506431175654}, 'times': {'contamination': 0.19056296348571777, 'optimization': 0, 'imputation': 961.3684046268463}}, '0.8': {'scores': {'RMSE': 0.6162987723847368, 'MAE': 0.4408568111584791, 'MI': 0.38562262881823584, 'CORRELATION': 0.7078269987710476}, 'times': {'contamination': 0.6741812229156494, 'optimization': 0, 'imputation': 2265.02947473526}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.2157597331971723, 'MAE': 1.0542417765804475, 'MI': 0.569311657025473, 'CORRELATION': -0.41037521809198385}, 'times': {'contamination': 0.0011243820190429688, 'optimization': 0, 'imputation': 18.271201610565186}}, '0.1': {'scores': {'RMSE': 1.1799455746309517, 'MAE': 1.0537900828112892, 'MI': 0.3698854611544671, 'CORRELATION': -0.30580392001607287}, 'times': {'contamination': 0.0025169849395751953, 'optimization': 0, 'imputation': 18.178789377212524}}, '0.2': {'scores': {'RMSE': 1.341883829249102, 'MAE': 1.1116623537227253, 'MI': 0.22703785144726024, 'CORRELATION': -0.13139818884461385}, 'times': {'contamination': 0.008020877838134766, 'optimization': 0, 'imputation': 18.227224111557007}}, '0.4': {'scores': {'RMSE': 1.4574773306729822, 'MAE': 1.221059892905018, 'MI': 0.1526121106442972, 'CORRELATION': -0.06171770589679702}, 'times': {'contamination': 0.04882335662841797, 'optimization': 0, 'imputation': 18.527106523513794}}, '0.6': {'scores': {'RMSE': 1.4501476980845394, 'MAE': 1.1589217747122664, 'MI': 0.08174182790842249, 'CORRELATION': -0.028201438478978574}, 'times': {'contamination': 0.19412755966186523, 'optimization': 0, 'imputation': 19.096518754959106}}, '0.8': {'scores': {'RMSE': 1.204799199247893, 'MAE': 1.002446633752256, 'MI': 0.08875526330977121, 'CORRELATION': -0.02097728376019728}, 'times': {'contamination': 0.6939215660095215, 'optimization': 0, 'imputation': 19.685445308685303}}}}}}} @@ -59,9 +59,9 @@ run_2_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017461776733398438, 'optimization': 0, 'imputation': 0.001100778579711914}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0038170814514160156, 'optimization': 0, 'imputation': 0.0006277561187744141}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009348630905151367, 'optimization': 0, 'imputation': 0.0006661415100097656}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03160667419433594, 'optimization': 0, 'imputation': 0.0009412765502929688}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.07952380180358887, 'optimization': 0, 'imputation': 0.001110076904296875}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.18988037109375, 'optimization': 0, 'imputation': 0.0012199878692626953}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.00153350830078125, 'optimization': 222.17338752746582, 'imputation': 0.007252931594848633}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.00762939453125, 'optimization': 222.17338752746582, 'imputation': 0.006178379058837891}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.020302534103393555, 'optimization': 222.17338752746582, 'imputation': 0.006856203079223633}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07085132598876953, 'optimization': 222.17338752746582, 'imputation': 0.006796836853027344}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14228367805480957, 'optimization': 222.17338752746582, 'imputation': 0.006799221038818359}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.24705862998962402, 'optimization': 222.17338752746582, 'imputation': 0.005573272705078125}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.002927064895629883, 'optimization': 109.24887442588806, 'imputation': 10.111968994140625}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.0034782886505126953, 'optimization': 109.24887442588806, 'imputation': 10.12447738647461}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009154081344604492, 'optimization': 109.24887442588806, 'imputation': 10.325854778289795}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.031117677688598633, 'optimization': 109.24887442588806, 'imputation': 11.087183237075806}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07905244827270508, 'optimization': 109.24887442588806, 'imputation': 8.649941444396973}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18860864639282227, 'optimization': 109.24887442588806, 'imputation': 8.43183708190918}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0014863014221191406, 'optimization': 5088.581882238388, 'imputation': 10.688252687454224}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.0037031173706054688, 'optimization': 5088.581882238388, 'imputation': 50.06313109397888}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.00922083854675293, 'optimization': 5088.581882238388, 'imputation': 257.213321685791}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.03141498565673828, 'optimization': 5088.581882238388, 'imputation': 1488.7819337844849}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.07847213745117188, 'optimization': 5088.581882238388, 'imputation': 4525.959330558777}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18671298027038574, 'optimization': 5088.581882238388, 'imputation': 9460.7878510952}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.122220535003296, 'MAE': 0.9644508995813553, 'MI': 0.2759436355942961, 'CORRELATION': 0.09245761750327637}, 'times': {'contamination': 0.0015985965728759766, 'optimization': 4112.733412027359, 'imputation': 338.0099182128906}}, '0.1': {'scores': {'RMSE': 1.0832970108643896, 'MAE': 0.8823888940960694, 'MI': 0.0722893609050923, 'CORRELATION': -0.019930274489311815}, 'times': {'contamination': 0.0035643577575683594, 'optimization': 4112.733412027359, 'imputation': 337.6157658100128}}, '0.2': {'scores': {'RMSE': 1.0767155565632924, 'MAE': 0.8684991669552922, 'MI': 0.009245255133377466, 'CORRELATION': 0.0027516812337193518}, 'times': {'contamination': 0.009638309478759766, 'optimization': 4112.733412027359, 'imputation': 328.19135212898254}}, '0.4': {'scores': {'RMSE': 1.0934522863869605, 'MAE': 0.8840570779852788, 'MI': 0.003369568798431563, 'CORRELATION': -0.021061682051014274}, 'times': {'contamination': 0.03281116485595703, 'optimization': 4112.733412027359, 'imputation': 346.8224673271179}}, '0.6': {'scores': {'RMSE': 1.0783671319985777, 'MAE': 0.8704278560665365, 'MI': 0.00169355769499049, 'CORRELATION': -0.019325646685601}, 'times': {'contamination': 0.08042550086975098, 'optimization': 4112.733412027359, 'imputation': 340.2620213031769}}, '0.8': {'scores': {'RMSE': 1.081513280302422, 'MAE': 0.8746519908670293, 'MI': 0.0011728245783709944, 'CORRELATION': -0.016826349565356294}, 'times': {'contamination': 0.18836283683776855, 'optimization': 4112.733412027359, 'imputation': 341.4021186828613}}}}}}} run_3_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001561880111694336, 'optimization': 0, 'imputation': 0.0010650157928466797}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0035762786865234375, 'optimization': 0, 'imputation': 0.0006108283996582031}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009912252426147461, 'optimization': 0, 'imputation': 0.000682830810546875}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.033663034439086914, 'optimization': 0, 'imputation': 0.0008401870727539062}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.08425664901733398, 'optimization': 0, 'imputation': 0.0010020732879638672}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1884922981262207, 'optimization': 0, 'imputation': 0.0009903907775878906}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.0014843940734863281, 'optimization': 216.33094692230225, 'imputation': 0.006989240646362305}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.008179664611816406, 'optimization': 216.33094692230225, 'imputation': 0.0062677860260009766}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.02096843719482422, 'optimization': 216.33094692230225, 'imputation': 0.006853580474853516}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07195258140563965, 'optimization': 216.33094692230225, 'imputation': 0.00666499137878418}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14317655563354492, 'optimization': 216.33094692230225, 'imputation': 0.006315708160400391}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.2480306625366211, 'optimization': 216.33094692230225, 'imputation': 0.005487203598022461}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.0031473636627197266, 'optimization': 110.04800581932068, 'imputation': 10.122947692871094}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.003419160842895508, 'optimization': 110.04800581932068, 'imputation': 10.181205034255981}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009185314178466797, 'optimization': 110.04800581932068, 'imputation': 10.448293685913086}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.030958890914916992, 'optimization': 110.04800581932068, 'imputation': 11.198593139648438}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07897067070007324, 'optimization': 110.04800581932068, 'imputation': 8.581665992736816}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18915915489196777, 'optimization': 110.04800581932068, 'imputation': 8.440712690353394}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0015034675598144531, 'optimization': 5124.588714838028, 'imputation': 10.759928226470947}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.003936767578125, 'optimization': 5124.588714838028, 'imputation': 50.354418992996216}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.009255409240722656, 'optimization': 5124.588714838028, 'imputation': 259.3400568962097}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.0312647819519043, 'optimization': 5124.588714838028, 'imputation': 1500.3178548812866}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.07852554321289062, 'optimization': 5124.588714838028, 'imputation': 4581.28284406662}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18776154518127441, 'optimization': 5124.588714838028, 'imputation': 9590.927385091782}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.146433389804167, 'MAE': 0.9770400477715633, 'MI': 0.3372765709259859, 'CORRELATION': 0.0330859633180261}, 'times': {'contamination': 0.001608133316040039, 'optimization': 4109.78501701355, 'imputation': 347.9514887332916}}, '0.1': {'scores': {'RMSE': 1.0805589422598818, 'MAE': 0.8789487774083494, 'MI': 0.06450452519706741, 'CORRELATION': 0.0050948685955938995}, 'times': {'contamination': 0.0037963390350341797, 'optimization': 4109.78501701355, 'imputation': 342.1326117515564}}, '0.2': {'scores': {'RMSE': 1.113302451577659, 'MAE': 0.8972310309254206, 'MI': 0.013539230335286593, 'CORRELATION': -0.010746184336502297}, 'times': {'contamination': 0.010583162307739258, 'optimization': 4109.78501701355, 'imputation': 347.8061354160309}}, '0.4': {'scores': {'RMSE': 1.1059062825212693, 'MAE': 0.8920096539260874, 'MI': 0.0039427922204060845, 'CORRELATION': -0.021280076256874978}, 'times': {'contamination': 0.03199410438537598, 'optimization': 4109.78501701355, 'imputation': 351.9458327293396}}, '0.6': {'scores': {'RMSE': 1.0740866766668984, 'MAE': 0.8664850080628724, 'MI': 0.0015316126887234942, 'CORRELATION': -0.021487493774034198}, 'times': {'contamination': 0.08084416389465332, 'optimization': 4109.78501701355, 'imputation': 349.9893400669098}}, '0.8': {'scores': {'RMSE': 1.075891210325233, 'MAE': 0.8695393935351904, 'MI': 0.0011319165672490211, 'CORRELATION': -0.017885852991857847}, 'times': {'contamination': 0.19720029830932617, 'optimization': 4109.78501701355, 'imputation': 349.96222448349}}}}}}} - scores_list, algos, sets = Benchmarking().avg_results(run_1_chlorine, run_2_chlorine, run_3_chlorine, run_1_drift, run_2_drift, run_3_drift, run_1_eeg_a, run_2_eeg_a, run_3_eeg_a, run_1_eeg_r, run_2_eeg_r, run_3_eeg_r, run_1_fmri_o, run_2_fmri_o, run_3_fmri_o, run_1_fmri_s, run_2_fmri_s, run_3_fmri_s) + scores_list, algos, sets = Benchmark().avg_results(run_1_chlorine, run_2_chlorine, run_3_chlorine, run_1_drift, run_2_drift, run_3_drift, run_1_eeg_a, run_2_eeg_a, run_3_eeg_a, run_1_eeg_r, run_2_eeg_r, run_3_eeg_r, run_1_fmri_o, run_2_fmri_o, run_3_fmri_o, run_1_fmri_s, run_2_fmri_s, run_3_fmri_s) - result = Benchmarking().generate_matrix(scores_list, algos, sets) + result = Benchmark().generate_heatmap(scores_list, algos, sets) diff --git a/build/lib/imputegap/runner_contamination.py b/build/lib/imputegap/runner_contamination.py index ed426f9..b4971f7 100644 --- a/build/lib/imputegap/runner_contamination.py +++ b/build/lib/imputegap/runner_contamination.py @@ -8,9 +8,9 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") -# 3. contamination of the data with MCAR scenario -infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True) +# 3. contamination of the data with MCAR pattern +incomp_data = ts_1.Contamination.mcar(ts_1.data, series_rate=0.4, missing_rate=0.2, seed=True) # [OPTIONAL] you can plot your raw data / print the contamination -ts_1.print(limit=10) -ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./assets") \ No newline at end of file +ts_1.print(limit_series=10) +ts_1.plot(ts_1.data, incomp_data, max_series=1, save_path="./assets") diff --git a/build/lib/imputegap/runner_datasets.py b/build/lib/imputegap/runner_datasets.py index 4d03375..2302e22 100644 --- a/build/lib/imputegap/runner_datasets.py +++ b/build/lib/imputegap/runner_datasets.py @@ -8,10 +8,10 @@ # small one data_n = TimeSeries() data_n.load_timeseries(data=utils.search_path(dataset), max_series=20, max_values=400, header=False) - data_n.plot(raw_data=data_n.data, title="02_"+dataset + " - raw data 20x400", max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) - data_n.plot(raw_data=data_n.data, title="03_"+dataset + " - raw data 01x400", max_series=1, save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, max_series=1, save_path="./dataset/docs/" + dataset + "", display=False) data_n.normalize(normalizer="min_max") - data_n.plot(raw_data=data_n.data, title="04_"+dataset + " - norm min_max data 01x400", max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) # 5x one data_n = TimeSeries() @@ -29,14 +29,14 @@ max_value = 400 data_n.load_timeseries(data=utils.search_path(dataset), max_series=max_series, max_values=max_value, header=False) - data_n.plot(raw_data=data_n.data, title="1_"+dataset + " - raw data - "+str(max_series)+"x"+str(max_value), save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, save_path="./dataset/docs/" + dataset + "", display=False) data_n.normalize(normalizer="min_max") - data_n.plot(raw_data=data_n.data, title="2_" + dataset + " - norm min max "+str(max_series)+"x"+str(max_value), save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, save_path="./dataset/docs/" + dataset + "", display=False) # full one data_n = TimeSeries() data_n.load_timeseries(data=utils.search_path(dataset), header=False) - data_n.plot(raw_data=data_n.data, title="01_"+dataset + " - raw data - NxM", save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, save_path="./dataset/docs/" + dataset + "", display=False) categories, features = Explainer.load_configuration() characteristics, descriptions = Explainer.extract_features(data=data_n.data, features_categories=categories, features_list=features, do_catch24=False) diff --git a/build/lib/imputegap/runner_explainer.py b/build/lib/imputegap/runner_explainer.py index 302d64c..f99c0d7 100644 --- a/build/lib/imputegap/runner_explainer.py +++ b/build/lib/imputegap/runner_explainer.py @@ -9,7 +9,12 @@ ts_1.load_timeseries(utils.search_path("chlorine")) # 3. call the explanation of your dataset with a specific algorithm to gain insight on the Imputation results -shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, missing_rate=0.25, limitation=50, splitter=35, file_name="chlorine", algorithm="cdrec") +shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, + missing_rate=0.25, + limit_ratio=1, + split_ratio=0.7, + file_name="chlorine", + algorithm="cdrec") # [OPTIONAL] print the results with the impact of each feature. Explainer.print(shap_values, shap_details) \ No newline at end of file diff --git a/build/lib/imputegap/runner_imputation.py b/build/lib/imputegap/runner_imputation.py index fbc0e21..ffd4b39 100644 --- a/build/lib/imputegap/runner_imputation.py +++ b/build/lib/imputegap/runner_imputation.py @@ -10,10 +10,10 @@ ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +incomp_data = ts_1.Contamination.mcar(ts_1.data) # [OPTIONAL] save your results in a new Time Series object -ts_2 = TimeSeries().import_matrix(infected_data) +ts_2 = TimeSeries().import_matrix(incomp_data) # 4. imputation of the contaminated data # choice of the algorithm, and their parameters (default, automl, or defined by the user) @@ -25,11 +25,11 @@ # >>> cdrec.impute(params={"rank": 5, "epsilon": 0.01, "iterations": 100}) # [OPTIONAL] save your results in a new Time Series object -ts_3 = TimeSeries().import_matrix(cdrec.imputed_matrix) +ts_3 = TimeSeries().import_matrix(cdrec.recov_data) # 5. score the imputation with the raw_data cdrec.score(ts_1.data, ts_3.data) # 6. display the results ts_3.print_results(cdrec.metrics, algorithm="cdrec") -ts_3.plot(raw_data=ts_1.data, infected_data=ts_2.data, imputed_data=ts_3.data, title="imputation", max_series=1, save_path="./assets", display=True) +ts_3.plot(input_data=ts_1.data, incomp_data=ts_2.data, recov_data=ts_3.data, max_series=1, save_path="./assets") diff --git a/build/lib/imputegap/runner_loading.py b/build/lib/imputegap/runner_loading.py index b94edb3..9df0ee9 100644 --- a/build/lib/imputegap/runner_loading.py +++ b/build/lib/imputegap/runner_loading.py @@ -9,5 +9,5 @@ ts_1.normalize(normalizer="z_score") # [OPTIONAL] you can plot your raw data / print the information -ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets") -ts_1.print(limit=10) \ No newline at end of file +ts_1.plot(input_data=ts_1.data, max_series=10, max_values=100, save_path="./imputegap/assets") +ts_1.print(limit_series=10) \ No newline at end of file diff --git a/build/lib/imputegap/runner_optimization.py b/build/lib/imputegap/runner_optimization.py index e3eaa2f..7cccf97 100644 --- a/build/lib/imputegap/runner_optimization.py +++ b/build/lib/imputegap/runner_optimization.py @@ -10,18 +10,18 @@ ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +infected_data = ts_1.Contamination.mcar(ts_1.data) # 4. imputation of the contaminated data # imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm -cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 3}}) +cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_def=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 3}}) # 5. score the imputation with the raw_data -cdrec.score(ts_1.data, cdrec.imputed_matrix) +cdrec.score(ts_1.data, cdrec.recov_data) # 6. display the results ts_1.print_results(cdrec.metrics) -ts_1.plot(raw_data=ts_1.data, infected_data=infected_data, imputed_data=cdrec.imputed_matrix, title="imputation", max_series=1, save_path="./assets", display=True) +ts_1.plot(input_data=ts_1.data, incomp_data=infected_data, imputed_data=cdrec.recov_data, title="imputation", max_series=1, save_path="./assets", display=True) # 7. save hyperparameters utils.save_optimization(optimal_params=cdrec.parameters, algorithm="cdrec", dataset="eeg", optimizer="t") \ No newline at end of file diff --git a/build/lib/imputegap/test_naterq/chlorine_mcar_metrics_subplot.jpg b/build/lib/imputegap/test_naterq/chlorine_mcar_metrics_subplot.jpg new file mode 100644 index 0000000..05ed8de Binary files /dev/null and b/build/lib/imputegap/test_naterq/chlorine_mcar_metrics_subplot.jpg differ diff --git a/build/lib/imputegap/test_naterq/report_chlorine.txt b/build/lib/imputegap/test_naterq/report_chlorine.txt new file mode 100644 index 0000000..cb7112a --- /dev/null +++ b/build/lib/imputegap/test_naterq/report_chlorine.txt @@ -0,0 +1,155 @@ +Report for Dataset: chlorine +Generated on: 2025-01-06 17:29:26 +Run number: 0 +======================================================================================================================== + +RMSE: Root Mean Square Error - Measures the average magnitude of error. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | RMSE | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| chlorine | mcar | mean | bayesian | 0.05 | 0.9256738243 | +| chlorine | mcar | mean | bayesian | 0.1 | 0.8239629739 | +| chlorine | mcar | mean | bayesian | 0.2 | 0.8317409761 | +| chlorine | mcar | mean | bayesian | 0.4 | 0.8661785428 | +| chlorine | mcar | mean | bayesian | 0.6 | 0.8906205974 | +| chlorine | mcar | mean | bayesian | 0.8 | 0.9231926868 | +| chlorine | mcar | cdrec | bayesian | 0.05 | 0.1955580177 | +| chlorine | mcar | cdrec | bayesian | 0.1 | 0.2221132910 | +| chlorine | mcar | cdrec | bayesian | 0.2 | 0.2689014052 | +| chlorine | mcar | cdrec | bayesian | 0.4 | 0.3143181342 | +| chlorine | mcar | cdrec | bayesian | 0.6 | 0.3751478012 | +| chlorine | mcar | cdrec | bayesian | 0.8 | 0.9117409046 | +| chlorine | mcar | stmvl | bayesian | 0.05 | 0.3033328648 | +| chlorine | mcar | stmvl | bayesian | 0.1 | 0.2743409975 | +| chlorine | mcar | stmvl | bayesian | 0.2 | 0.3354154244 | +| chlorine | mcar | stmvl | bayesian | 0.4 | 0.3663147585 | +| chlorine | mcar | stmvl | bayesian | 0.6 | 0.4917835690 | +| chlorine | mcar | stmvl | bayesian | 0.8 | 5.2863734521 | +| chlorine | mcar | iim | bayesian | 0.05 | 0.2246776140 | +| chlorine | mcar | iim | bayesian | 0.1 | 0.3034580007 | +| chlorine | mcar | iim | bayesian | 0.2 | 0.4104578379 | +| chlorine | mcar | iim | bayesian | 0.4 | 0.4911437972 | +| chlorine | mcar | iim | bayesian | 0.6 | 0.5797153883 | +| chlorine | mcar | iim | bayesian | 0.8 | 0.8100585330 | +| chlorine | mcar | mrnn | bayesian | 0.05 | 1.0889986962 | +| chlorine | mcar | mrnn | bayesian | 0.1 | 0.8750845974 | +| chlorine | mcar | mrnn | bayesian | 0.2 | 1.3935692459 | +| chlorine | mcar | mrnn | bayesian | 0.4 | 1.2198343626 | +| chlorine | mcar | mrnn | bayesian | 0.6 | 1.1924360264 | +| chlorine | mcar | mrnn | bayesian | 0.8 | 1.3728850686 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +MAE: Mean Absolute Error - Measures the average absolute error. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | MAE | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| chlorine | mcar | mean | bayesian | 0.05 | 0.8788758766 | +| chlorine | mcar | mean | bayesian | 0.1 | 0.7297827051 | +| chlorine | mcar | mean | bayesian | 0.2 | 0.7138664942 | +| chlorine | mcar | mean | bayesian | 0.4 | 0.7449379439 | +| chlorine | mcar | mean | bayesian | 0.6 | 0.7677632103 | +| chlorine | mcar | mean | bayesian | 0.8 | 0.7897697041 | +| chlorine | mcar | cdrec | bayesian | 0.05 | 0.1437963497 | +| chlorine | mcar | cdrec | bayesian | 0.1 | 0.1368260921 | +| chlorine | mcar | cdrec | bayesian | 0.2 | 0.1698355542 | +| chlorine | mcar | cdrec | bayesian | 0.4 | 0.2041263696 | +| chlorine | mcar | cdrec | bayesian | 0.6 | 0.2215647404 | +| chlorine | mcar | cdrec | bayesian | 0.8 | 0.4801132375 | +| chlorine | mcar | stmvl | bayesian | 0.05 | 0.2644983509 | +| chlorine | mcar | stmvl | bayesian | 0.1 | 0.2274496988 | +| chlorine | mcar | stmvl | bayesian | 0.2 | 0.2667902545 | +| chlorine | mcar | stmvl | bayesian | 0.4 | 0.2683992894 | +| chlorine | mcar | stmvl | bayesian | 0.6 | 0.3590429490 | +| chlorine | mcar | stmvl | bayesian | 0.8 | 3.0120315982 | +| chlorine | mcar | iim | bayesian | 0.05 | 0.1626511249 | +| chlorine | mcar | iim | bayesian | 0.1 | 0.2038829926 | +| chlorine | mcar | iim | bayesian | 0.2 | 0.2785159739 | +| chlorine | mcar | iim | bayesian | 0.4 | 0.3245572848 | +| chlorine | mcar | iim | bayesian | 0.6 | 0.4144431748 | +| chlorine | mcar | iim | bayesian | 0.8 | 0.6124983237 | +| chlorine | mcar | mrnn | bayesian | 0.05 | 0.8825193441 | +| chlorine | mcar | mrnn | bayesian | 0.1 | 0.7897191909 | +| chlorine | mcar | mrnn | bayesian | 0.2 | 1.1278169010 | +| chlorine | mcar | mrnn | bayesian | 0.4 | 1.0043237478 | +| chlorine | mcar | mrnn | bayesian | 0.6 | 0.9838535398 | +| chlorine | mcar | mrnn | bayesian | 0.8 | 1.1227443271 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +MI: Mutual Information - Indicates dependency between variables. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | MI | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| chlorine | mcar | mean | bayesian | 0.05 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.1 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.2 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.4 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.6 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.8 | 0.0000000000 | +| chlorine | mcar | cdrec | bayesian | 0.05 | 1.3195962394 | +| chlorine | mcar | cdrec | bayesian | 0.1 | 1.2252402024 | +| chlorine | mcar | cdrec | bayesian | 0.2 | 1.0650037013 | +| chlorine | mcar | cdrec | bayesian | 0.4 | 0.9133456775 | +| chlorine | mcar | cdrec | bayesian | 0.6 | 0.7775541845 | +| chlorine | mcar | cdrec | bayesian | 0.8 | 0.2576488534 | +| chlorine | mcar | stmvl | bayesian | 0.05 | 1.2263963520 | +| chlorine | mcar | stmvl | bayesian | 0.1 | 1.0873378350 | +| chlorine | mcar | stmvl | bayesian | 0.2 | 0.9040935529 | +| chlorine | mcar | stmvl | bayesian | 0.4 | 0.7945562214 | +| chlorine | mcar | stmvl | bayesian | 0.6 | 0.5680681312 | +| chlorine | mcar | stmvl | bayesian | 0.8 | 0.0877803352 | +| chlorine | mcar | iim | bayesian | 0.05 | 1.0875116208 | +| chlorine | mcar | iim | bayesian | 0.1 | 1.0526306211 | +| chlorine | mcar | iim | bayesian | 0.2 | 0.7986686024 | +| chlorine | mcar | iim | bayesian | 0.4 | 0.6429014105 | +| chlorine | mcar | iim | bayesian | 0.6 | 0.4541369620 | +| chlorine | mcar | iim | bayesian | 0.8 | 0.1600984203 | +| chlorine | mcar | mrnn | bayesian | 0.05 | 0.5693116570 | +| chlorine | mcar | mrnn | bayesian | 0.1 | 0.3654213134 | +| chlorine | mcar | mrnn | bayesian | 0.2 | 0.2327887670 | +| chlorine | mcar | mrnn | bayesian | 0.4 | 0.1169414642 | +| chlorine | mcar | mrnn | bayesian | 0.6 | 0.0794767097 | +| chlorine | mcar | mrnn | bayesian | 0.8 | 0.0861103723 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +CORRELATION: Correlation Coefficient - Indicates linear relationship between variables. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | CORRELATION | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| chlorine | mcar | mean | bayesian | 0.05 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.1 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.2 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.4 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.6 | 0.0000000000 | +| chlorine | mcar | mean | bayesian | 0.8 | 0.0000000000 | +| chlorine | mcar | cdrec | bayesian | 0.05 | 0.9770377316 | +| chlorine | mcar | cdrec | bayesian | 0.1 | 0.9627754587 | +| chlorine | mcar | cdrec | bayesian | 0.2 | 0.9453318720 | +| chlorine | mcar | cdrec | bayesian | 0.4 | 0.9309636417 | +| chlorine | mcar | cdrec | bayesian | 0.6 | 0.9078517283 | +| chlorine | mcar | cdrec | bayesian | 0.8 | 0.6589813814 | +| chlorine | mcar | stmvl | bayesian | 0.05 | 0.9611641055 | +| chlorine | mcar | stmvl | bayesian | 0.1 | 0.9481608575 | +| chlorine | mcar | stmvl | bayesian | 0.2 | 0.9224394175 | +| chlorine | mcar | stmvl | bayesian | 0.4 | 0.9086873163 | +| chlorine | mcar | stmvl | bayesian | 0.6 | 0.8240735291 | +| chlorine | mcar | stmvl | bayesian | 0.8 | 0.4417418017 | +| chlorine | mcar | iim | bayesian | 0.05 | 0.9694504837 | +| chlorine | mcar | iim | bayesian | 0.1 | 0.9337303655 | +| chlorine | mcar | iim | bayesian | 0.2 | 0.8658822456 | +| chlorine | mcar | iim | bayesian | 0.4 | 0.8180219110 | +| chlorine | mcar | iim | bayesian | 0.6 | 0.7431519135 | +| chlorine | mcar | iim | bayesian | 0.8 | 0.4880867931 | +| chlorine | mcar | mrnn | bayesian | 0.05 | 0.0061108711 | +| chlorine | mcar | mrnn | bayesian | 0.1 | 0.1776164809 | +| chlorine | mcar | mrnn | bayesian | 0.2 | -0.0043224216 | +| chlorine | mcar | mrnn | bayesian | 0.4 | -0.0000288556 | +| chlorine | mcar | mrnn | bayesian | 0.6 | -0.0657094499 | +| chlorine | mcar | mrnn | bayesian | 0.8 | -0.0124248198 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +Dictionary of Results: +{'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001201629638671875, 'optimization': 0, 'imputation': 0.0005724430084228516}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001814126968383789, 'optimization': 0, 'imputation': 0.0004563331604003906}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.005623817443847656, 'optimization': 0, 'imputation': 0.0004363059997558594}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03413963317871094, 'optimization': 0, 'imputation': 0.0005552768707275391}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13074183464050293, 'optimization': 0, 'imputation': 0.0005936622619628906}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.4494190216064453, 'optimization': 0, 'imputation': 0.0005834102630615234}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19555801767314038, 'MAE': 0.14379634965165344, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770377315860114}, 'times': {'contamination': 0.0010943412780761719, 'optimization': 1.6249148845672607, 'imputation': 0.09233546257019043}}, '0.1': {'scores': {'RMSE': 0.22211329096601584, 'MAE': 0.13682609208383795, 'MI': 1.225240202380491, 'CORRELATION': 0.9627754587047338}, 'times': {'contamination': 0.005602359771728516, 'optimization': 1.6249148845672607, 'imputation': 0.1246938705444336}}, '0.2': {'scores': {'RMSE': 0.26890140517000855, 'MAE': 0.16983555417798818, 'MI': 1.0650037012869458, 'CORRELATION': 0.945331872005451}, 'times': {'contamination': 0.017725229263305664, 'optimization': 1.6249148845672607, 'imputation': 0.1363234519958496}}, '0.4': {'scores': {'RMSE': 0.3143181342292365, 'MAE': 0.2041263696093189, 'MI': 0.9133456774887369, 'CORRELATION': 0.9309636417166443}, 'times': {'contamination': 0.1031486988067627, 'optimization': 1.6249148845672607, 'imputation': 0.2686195373535156}}, '0.6': {'scores': {'RMSE': 0.37514780116434926, 'MAE': 0.22156474038385332, 'MI': 0.7775541845220788, 'CORRELATION': 0.9078517283026865}, 'times': {'contamination': 0.20231366157531738, 'optimization': 1.6249148845672607, 'imputation': 0.8690693378448486}}, '0.8': {'scores': {'RMSE': 0.9117409046445515, 'MAE': 0.4801132374733116, 'MI': 0.2576488533530952, 'CORRELATION': 0.6589813814462316}, 'times': {'contamination': 0.5354366302490234, 'optimization': 1.6249148845672607, 'imputation': 2.865450143814087}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.3033328648259709, 'MAE': 0.2644983508914945, 'MI': 1.2263963519649825, 'CORRELATION': 0.9611641055318173}, 'times': {'contamination': 0.0029397010803222656, 'optimization': 500.0222601890564, 'imputation': 23.88236165046692}}, '0.1': {'scores': {'RMSE': 0.27434099749552526, 'MAE': 0.22744969879475732, 'MI': 1.0873378350271077, 'CORRELATION': 0.9481608575454046}, 'times': {'contamination': 0.001943349838256836, 'optimization': 500.0222601890564, 'imputation': 24.082878351211548}}, '0.2': {'scores': {'RMSE': 0.3354154243946063, 'MAE': 0.2667902544729111, 'MI': 0.9040935528948765, 'CORRELATION': 0.9224394175345223}, 'times': {'contamination': 0.007236480712890625, 'optimization': 500.0222601890564, 'imputation': 27.05676031112671}}, '0.4': {'scores': {'RMSE': 0.3663147584695216, 'MAE': 0.2683992893683706, 'MI': 0.7945562213511235, 'CORRELATION': 0.9086873163095024}, 'times': {'contamination': 0.03319692611694336, 'optimization': 500.0222601890564, 'imputation': 24.969536066055298}}, '0.6': {'scores': {'RMSE': 0.49178356901493514, 'MAE': 0.3590429489696727, 'MI': 0.568068131156551, 'CORRELATION': 0.8240735290572155}, 'times': {'contamination': 0.13401484489440918, 'optimization': 500.0222601890564, 'imputation': 17.722254991531372}}, '0.8': {'scores': {'RMSE': 5.286373452119497, 'MAE': 3.0120315981628085, 'MI': 0.0877803352414065, 'CORRELATION': 0.4417418016734377}, 'times': {'contamination': 0.46097803115844727, 'optimization': 500.0222601890564, 'imputation': 17.994383335113525}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.2246776140243064, 'MAE': 0.16265112492381306, 'MI': 1.0875116207955637, 'CORRELATION': 0.9694504836799154}, 'times': {'contamination': 0.0009558200836181641, 'optimization': 4871.80725812912, 'imputation': 1.680412769317627}}, '0.1': {'scores': {'RMSE': 0.3034580006710775, 'MAE': 0.20388299260278156, 'MI': 1.0526306210784155, 'CORRELATION': 0.9337303655141744}, 'times': {'contamination': 0.0018503665924072266, 'optimization': 4871.80725812912, 'imputation': 10.345388412475586}}, '0.2': {'scores': {'RMSE': 0.4104578379330223, 'MAE': 0.2785159738696005, 'MI': 0.7986686024303655, 'CORRELATION': 0.8658822456465257}, 'times': {'contamination': 0.0055084228515625, 'optimization': 4871.80725812912, 'imputation': 65.17643117904663}}, '0.4': {'scores': {'RMSE': 0.4911437971846393, 'MAE': 0.32455728476996504, 'MI': 0.6429014104572732, 'CORRELATION': 0.8180219110130202}, 'times': {'contamination': 0.032411813735961914, 'optimization': 4871.80725812912, 'imputation': 474.7696805000305}}, '0.6': {'scores': {'RMSE': 0.579715388344659, 'MAE': 0.4144431747763777, 'MI': 0.45413696197432313, 'CORRELATION': 0.7431519134806602}, 'times': {'contamination': 0.1278684139251709, 'optimization': 4871.80725812912, 'imputation': 1531.380850315094}}, '0.8': {'scores': {'RMSE': 0.8100585330320411, 'MAE': 0.6124983237048439, 'MI': 0.1600984202902365, 'CORRELATION': 0.48808679305097513}, 'times': {'contamination': 0.4592604637145996, 'optimization': 4871.80725812912, 'imputation': 3588.4590351581573}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0889986961845628, 'MAE': 0.8825193440526788, 'MI': 0.569311657025473, 'CORRELATION': 0.006110871130276294}, 'times': {'contamination': 0.0009238719940185547, 'optimization': 474.33066391944885, 'imputation': 37.89777088165283}}, '0.1': {'scores': {'RMSE': 0.8750845974360951, 'MAE': 0.7897191908914645, 'MI': 0.36542131337202255, 'CORRELATION': 0.1776164808833599}, 'times': {'contamination': 0.0020151138305664062, 'optimization': 474.33066391944885, 'imputation': 36.68788194656372}}, '0.2': {'scores': {'RMSE': 1.3935692458593014, 'MAE': 1.1278169009994172, 'MI': 0.23278876704617288, 'CORRELATION': -0.0043224216288866475}, 'times': {'contamination': 0.006083011627197266, 'optimization': 474.33066391944885, 'imputation': 34.238656997680664}}, '0.4': {'scores': {'RMSE': 1.2198343626008104, 'MAE': 1.004323747843723, 'MI': 0.11694146418635429, 'CORRELATION': -2.8855554502904036e-05}, 'times': {'contamination': 0.03404045104980469, 'optimization': 474.33066391944885, 'imputation': 37.132654428482056}}, '0.6': {'scores': {'RMSE': 1.1924360263528335, 'MAE': 0.9838535398356899, 'MI': 0.0794767096848362, 'CORRELATION': -0.06570944989748748}, 'times': {'contamination': 0.1405935287475586, 'optimization': 474.33066391944885, 'imputation': 37.741902351379395}}, '0.8': {'scores': {'RMSE': 1.3728850685938416, 'MAE': 1.1227443270722774, 'MI': 0.08611037233596197, 'CORRELATION': -0.012424819834313067}, 'times': {'contamination': 0.47881627082824707, 'optimization': 474.33066391944885, 'imputation': 37.675835847854614}}}}}}} diff --git a/build/lib/imputegap/test_naterq/report_chlorine.xlsx b/build/lib/imputegap/test_naterq/report_chlorine.xlsx new file mode 100644 index 0000000..73b451b Binary files /dev/null and b/build/lib/imputegap/test_naterq/report_chlorine.xlsx differ diff --git a/build/lib/imputegap/tools/utils.py b/build/lib/imputegap/tools/utils.py index 09eb8a2..8b2e8f9 100644 --- a/build/lib/imputegap/tools/utils.py +++ b/build/lib/imputegap/tools/utils.py @@ -140,14 +140,14 @@ def load_parameters(query: str = "default", algorithm: str = "cdrec", dataset: s return (hidden_dim, learning_rate, iterations) elif algorithm == "greedy": n_calls = int(config['greedy']['n_calls']) - selected_metrics = config['greedy']['selected_metrics'] - return (n_calls, [selected_metrics]) + metrics = config['greedy']['metrics'] + return (n_calls, [metrics]) elif algorithm == "bayesian": n_calls = int(config['bayesian']['n_calls']) n_random_starts = int(config['bayesian']['n_random_starts']) acq_func = str(config['bayesian']['acq_func']) - selected_metrics = config['bayesian']['selected_metrics'] - return (n_calls, n_random_starts, acq_func, [selected_metrics]) + metrics = config['bayesian']['metrics'] + return (n_calls, n_random_starts, acq_func, [metrics]) elif algorithm == "pso": n_particles = int(config['pso']['n_particles']) c1 = float(config['pso']['c1']) @@ -155,14 +155,14 @@ def load_parameters(query: str = "default", algorithm: str = "cdrec", dataset: s w = float(config['pso']['w']) iterations = int(config['pso']['iterations']) n_processes = int(config['pso']['n_processes']) - selected_metrics = config['pso']['selected_metrics'] - return (n_particles, c1, c2, w, iterations, n_processes, [selected_metrics]) + metrics = config['pso']['metrics'] + return (n_particles, c1, c2, w, iterations, n_processes, [metrics]) elif algorithm == "sh": num_configs = int(config['sh']['num_configs']) num_iterations = int(config['sh']['num_iterations']) reduction_factor = int(config['sh']['reduction_factor']) - selected_metrics = config['sh']['selected_metrics'] - return (num_configs, num_iterations, reduction_factor, [selected_metrics]) + metrics = config['sh']['metrics'] + return (num_configs, num_iterations, reduction_factor, [metrics]) elif algorithm == "colors": colors = config['colors']['plot'] return colors diff --git a/dist/imputegap-1.0.2-py3-none-any.whl b/dist/imputegap-1.0.2-py3-none-any.whl index 704d7f6..94fe720 100644 Binary files a/dist/imputegap-1.0.2-py3-none-any.whl and b/dist/imputegap-1.0.2-py3-none-any.whl differ diff --git a/dist/imputegap-1.0.2.tar.gz b/dist/imputegap-1.0.2.tar.gz index 3e30d4d..ec01425 100644 Binary files a/dist/imputegap-1.0.2.tar.gz and b/dist/imputegap-1.0.2.tar.gz differ diff --git a/imputegap.egg-info/PKG-INFO b/imputegap.egg-info/PKG-INFO index 43f03b9..ce0255f 100644 --- a/imputegap.egg-info/PKG-INFO +++ b/imputegap.egg-info/PKG-INFO @@ -28,6 +28,7 @@ Requires-Dist: pycatch22==0.4.5 Requires-Dist: scikit-optimize==0.10.2 Requires-Dist: pyswarms==1.3.0 Requires-Dist: pytest==8.3.3 +Requires-Dist: xlsxwriter==3.2.0 Requires-Dist: types-toml Requires-Dist: types-setuptools Requires-Dist: wheel @@ -40,7 +41,7 @@ Requires-Dist: wheel ImputeGAP is a comprehensive framework designed for time series imputation algorithms. It offers a streamlined interface that bridges algorithm evaluation and parameter tuning, utilizing datasets from diverse fields such as neuroscience, medicine, and energy. The framework includes advanced imputation algorithms from five different families, supports various patterns of missing data, and provides multiple evaluation metrics. Additionally, ImputeGAP enables AutoML optimization, feature extraction, and feature analysis. The framework enables easy integration of new algorithms, datasets, and evaluation metrics. ![Python](https://img.shields.io/badge/Python-v3.12-blue) -![Release](https://img.shields.io/badge/Release-v0.2.2-brightgreen) +![Release](https://img.shields.io/badge/Release-v1.0.2-brightgreen) ![License](https://img.shields.io/badge/License-GPLv3-blue?style=flat&logo=gnu) ![Coverage](https://img.shields.io/badge/Coverage-93%25-brightgreen) ![PyPI](https://img.shields.io/pypi/v/imputegap?label=PyPI&color=blue) @@ -123,7 +124,7 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"), max_series=5, max_values= ts_1.normalize(normalizer="z_score") # [OPTIONAL] you can plot your raw data / print the information -ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets") +ts_1.plot(input_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets") ts_1.print(limit=10) ``` @@ -150,7 +151,7 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") # 3. contamination of the data with MCAR scenario -infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True) +infected_data = ts_1.Contamination.mcar(ts_1.data, series_rate=0.4, missing_rate=0.2, use_seed=True) # [OPTIONAL] you can plot your raw data / print the contamination ts_1.print(limit=10) @@ -180,7 +181,7 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +infected_data = ts_1.Contamination.mcar(ts_1.data) # 4. imputation of the contaminated data # choice of the algorithm, and their parameters (default, automl, or defined by the user) @@ -224,20 +225,20 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +infected_data = ts_1.Contamination.mcar(ts_1.data) # 4. imputation of the contaminated data # imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm -cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, - "optimizer": "bayesian", - "options": {"n_calls": 5}}) +cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_def=False, params={"ground_truth": ts_1.data, + "optimizer": "bayesian", + "options": {"n_calls": 5}}) # 5. score the imputation with the raw_data -cdrec.score(ts_1.data, cdrec.imputed_matrix) +cdrec.score(ts_1.data, cdrec.recov_data) # 6. [OPTIONAL] display the results ts_1.print_results(cdrec.metrics) -ts_1.plot(raw_data=ts_1.data, infected_data=infected_data, imputed_data=cdrec.imputed_matrix, title="imputation", +ts_1.plot(input_data=ts_1.data, incomp_data=infected_data, imputed_data=cdrec.recov_data, title="imputation", max_series=1, save_path="./imputegap/assets", display=True) # 7. [OPTIONAL] save hyperparameters @@ -268,7 +269,7 @@ ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("eeg-alcohol")) # 3. call the explanation of your dataset with a specific algorithm to gain insight on the Imputation results -shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec") +shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec") # [OPTIONAL] print the results with the impact of each feature. Explainer.print(shap_values, shap_details) @@ -285,9 +286,9 @@ To add your own imputation algorithm in Python or C++, please refer to the detai ## References -Mourad Khayati, Quentin Nater, and Jacques Pasquier. “ImputeVIS: An Interactive Evaluator to Benchmark Imputation Techniques for Time Series Data.” Proceedings of the VLDB Endowment (PVLDB). Demo Track 17, no. 1 (2024): 4329–32. +Mourad Khayati, Quentin Nater, and Jacques Pasquier. ImputeVIS: An Interactive Evaluator to Benchmark Imputation Techniques for Time Series Data. Proceedings of the VLDB Endowment (PVLDB). Demo Track 17, no. 1 (2024): 4329–32. -Mourad Khayati, Alberto Lerner, Zakhar Tymchenko, and Philippe Cudre-Mauroux. “Mind the Gap: An Experimental Evaluation of Imputation of Missing Values Techniques in Time Series.” In Proceedings of the VLDB Endowment (PVLDB), Vol. 13, 2020. +Mourad Khayati, Alberto Lerner, Zakhar Tymchenko, and Philippe Cudre-Mauroux. Mind the Gap: An Experimental Evaluation of Imputation of Missing Values Techniques in Time Series. In Proceedings of the VLDB Endowment (PVLDB), Vol. 13, 2020. --- diff --git a/imputegap.egg-info/SOURCES.txt b/imputegap.egg-info/SOURCES.txt index 4b63edb..929879d 100644 --- a/imputegap.egg-info/SOURCES.txt +++ b/imputegap.egg-info/SOURCES.txt @@ -6,7 +6,7 @@ requirements.txt setup.py imputegap/__init__.py imputegap/report.log -imputegap/runner_benchmarking.py +imputegap/runner_benchmark.py imputegap/runner_contamination.py imputegap/runner_datasets.py imputegap/runner_display.py @@ -33,16 +33,8 @@ imputegap/algorithms/lib/lib_algo.dll imputegap/algorithms/lib/lib_algo.so imputegap/algorithms/lib/lib_cdrec.so imputegap/algorithms/lib/lib_stmvl.so -imputegap/assets/BLACKOUTContamination_graph.jpg -imputegap/assets/EEG-MCARContamination_graph.jpg -imputegap/assets/MCARContamination_graph.jpg -imputegap/assets/MISSINGPERCENTANGEContamination_graph.jpg -imputegap/assets/contamination_graph.jpg -imputegap/assets/contamination_plot.jpg -imputegap/assets/imputation_graph.jpg -imputegap/assets/imputation_plot.jpg -imputegap/assets/raw_data_plot.jpg -imputegap/assets/rawdata_plot.jpg +imputegap/assets/25_01_06_14_32_52_plot.jpg +imputegap/assets/logo_imputegab.png imputegap/assets/shap/.gitkeep imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png @@ -173,6 +165,7 @@ imputegap/dataset/docs/meteo/2_meteo-normminmax3x500_graph.jpg imputegap/dataset/docs/meteo/features_meteo.txt imputegap/env/default_explainer.toml imputegap/env/default_values.toml +imputegap/imputegap/assets/rawdata_plot.jpg imputegap/params/optimal_parameters_b_bafu_iim.toml imputegap/params/optimal_parameters_b_bafu_mrnn.toml imputegap/params/optimal_parameters_b_bafu_stmvl.toml @@ -213,9 +206,10 @@ imputegap/params/optimal_parameters_e_fmri-stoptask_cdrec.toml imputegap/params/optimal_parameters_e_fmri-stoptask_stmvl.toml imputegap/params/optimal_parameters_g_eeg-alcohol_cdrec.toml imputegap/params/optimal_parameters_sh_eeg-reading_cdrec.toml +imputegap/params/optimal_parameters_t_eeg_cdrec.toml imputegap/recovery/README.md imputegap/recovery/__init__.py -imputegap/recovery/benchmarking.py +imputegap/recovery/benchmark.py imputegap/recovery/evaluation.py imputegap/recovery/explainer.py imputegap/recovery/imputation.py @@ -368,6 +362,9 @@ imputegap/reports/report_03/report_eeg-alcohol.txt imputegap/reports/report_03/report_eeg-reading.txt imputegap/reports/report_03/report_fmri-objectviewing.txt imputegap/reports/report_03/report_fmri-stoptask.txt +imputegap/test_naterq/chlorine_mcar_metrics_subplot.jpg +imputegap/test_naterq/report_chlorine.txt +imputegap/test_naterq/report_chlorine.xlsx imputegap/tools/__init__.py imputegap/tools/algorithm_parameters.py imputegap/tools/evaluator.py diff --git a/imputegap.egg-info/requires.txt b/imputegap.egg-info/requires.txt index 16efd3f..5bee54d 100644 --- a/imputegap.egg-info/requires.txt +++ b/imputegap.egg-info/requires.txt @@ -10,6 +10,7 @@ pycatch22==0.4.5 scikit-optimize==0.10.2 pyswarms==1.3.0 pytest==8.3.3 +xlsxwriter==3.2.0 types-toml types-setuptools wheel diff --git a/imputegap/__pycache__/__init__.cpython-312.pyc b/imputegap/__pycache__/__init__.cpython-312.pyc index 2141673..720262c 100644 Binary files a/imputegap/__pycache__/__init__.cpython-312.pyc and b/imputegap/__pycache__/__init__.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc b/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc index 019b6d1..b3757b2 100644 Binary files a/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc and b/imputegap/algorithms/__pycache__/cdrec.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/iim.cpython-312.pyc b/imputegap/algorithms/__pycache__/iim.cpython-312.pyc index 8aeb40c..6c3a32b 100644 Binary files a/imputegap/algorithms/__pycache__/iim.cpython-312.pyc and b/imputegap/algorithms/__pycache__/iim.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/mean_impute.cpython-312.pyc b/imputegap/algorithms/__pycache__/mean_impute.cpython-312.pyc index 4714509..bd2a6eb 100644 Binary files a/imputegap/algorithms/__pycache__/mean_impute.cpython-312.pyc and b/imputegap/algorithms/__pycache__/mean_impute.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/min_impute.cpython-312.pyc b/imputegap/algorithms/__pycache__/min_impute.cpython-312.pyc index 38731d5..67563b0 100644 Binary files a/imputegap/algorithms/__pycache__/min_impute.cpython-312.pyc and b/imputegap/algorithms/__pycache__/min_impute.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/mrnn.cpython-312.pyc b/imputegap/algorithms/__pycache__/mrnn.cpython-312.pyc index cbaac9f..3e820a5 100644 Binary files a/imputegap/algorithms/__pycache__/mrnn.cpython-312.pyc and b/imputegap/algorithms/__pycache__/mrnn.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc b/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc index 9e7995a..66c9c33 100644 Binary files a/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc and b/imputegap/algorithms/__pycache__/stmvl.cpython-312.pyc differ diff --git a/imputegap/algorithms/__pycache__/zero_impute.cpython-312.pyc b/imputegap/algorithms/__pycache__/zero_impute.cpython-312.pyc index 5cd17d3..492f8f3 100644 Binary files a/imputegap/algorithms/__pycache__/zero_impute.cpython-312.pyc and b/imputegap/algorithms/__pycache__/zero_impute.cpython-312.pyc differ diff --git a/imputegap/algorithms/cdrec.py b/imputegap/algorithms/cdrec.py index 93f6deb..86e92fd 100644 --- a/imputegap/algorithms/cdrec.py +++ b/imputegap/algorithms/cdrec.py @@ -101,13 +101,13 @@ def native_cdrec(__py_matrix, __py_rank, __py_epsilon, __py_iterations): return __py_imputed_matrix; -def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None): +def cdrec(incomp_data, truncation_rank, iterations, epsilon, logs=True, lib_path=None): """ CDRec algorithm for matrix imputation of missing values using Centroid Decomposition. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input matrix with contamination (missing values represented as NaNs). truncation_rank : int The truncation rank for matrix decomposition (must be greater than 1 and smaller than the number of series). @@ -127,18 +127,18 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_pa Example ------- - >>> imputed_data = cdrec(contamination=contamination_matrix, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True) - >>> print(imputed_data) + >>> recov_data = cdrec(incomp_data=incomp_data, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True) + >>> print(recov_data) """ start_time = time.time() # Record start time # Call the C++ function to perform recovery - imputed_matrix = native_cdrec(contamination, truncation_rank, epsilon, iterations) + recov_data = native_cdrec(incomp_data, truncation_rank, epsilon, iterations) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation cdrec - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/cpp_integration.py b/imputegap/algorithms/cpp_integration.py index b9e1f7c..ec302ee 100644 --- a/imputegap/algorithms/cpp_integration.py +++ b/imputegap/algorithms/cpp_integration.py @@ -119,11 +119,11 @@ def your_algo(contamination, param, logs=True): start_time = time.time() # Record start time # Call the C++ function to perform recovery - imputed_matrix = native_algo(contamination, param) + recov_data = native_algo(contamination, param) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation algo - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/iim.py b/imputegap/algorithms/iim.py index 485b04b..2ac16dc 100644 --- a/imputegap/algorithms/iim.py +++ b/imputegap/algorithms/iim.py @@ -2,13 +2,13 @@ from imputegap.wrapper.AlgoPython.IIM.testerIIM import impute_with_algorithm -def iim(contamination, number_neighbor, algo_code, logs=True): +def iim(incomp_data, number_neighbor, algo_code, logs=True): """ Perform imputation using the Iterative Imputation Method (IIM) algorithm. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input matrix with contamination (missing values represented as NaNs). number_neighbor : int The number of neighbors to use for the K-Nearest Neighbors (KNN) classifier (default is 10). @@ -31,8 +31,8 @@ def iim(contamination, number_neighbor, algo_code, logs=True): Example ------- - >>> imputed_data = iim(contamination_matrix, number_neighbor=10, algo_code="iim 2") - >>> print(imputed_data) + >>> recov_data = iim(incomp_data, number_neighbor=10, algo_code="iim 2") + >>> print(recov_data) References ---------- @@ -41,10 +41,10 @@ def iim(contamination, number_neighbor, algo_code, logs=True): """ start_time = time.time() # Record start time - imputed_matrix = impute_with_algorithm(algo_code, contamination.copy(), number_neighbor) + recov_data = impute_with_algorithm(algo_code, incomp_data.copy(), number_neighbor) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation iim - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/mean_impute.py b/imputegap/algorithms/mean_impute.py index c5d8b3b..58fbe66 100644 --- a/imputegap/algorithms/mean_impute.py +++ b/imputegap/algorithms/mean_impute.py @@ -1,13 +1,13 @@ import numpy as np -def mean_impute(contamination, params=None): +def mean_impute(incomp_data, params=None): """ Impute NaN values with the mean value of the time series. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input time series with contamination (missing values represented as NaNs). params : dict, optional Optional parameters for the algorithm. If None, the minimum value from the contamination is used (default is None). @@ -24,18 +24,18 @@ def mean_impute(contamination, params=None): Example ------- - >>> contamination = np.array([[5, 2, np.nan], [3, np.nan, 6]]) - >>> imputed_matrix = mean_impute(contamination) - >>> print(imputed_matrix) + >>> incomp_data = np.array([[5, 2, np.nan], [3, np.nan, 6]]) + >>> recov_data = mean_impute(incomp_data) + >>> print(recov_data) array([[5., 2., 4.], [3., 4., 6.]]) """ # logic - mean_value = np.nanmean(contamination) + mean_value = np.nanmean(incomp_data) # Imputation - imputed_matrix = np.nan_to_num(contamination, nan=mean_value) + recov_data = np.nan_to_num(incomp_data, nan=mean_value) - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/min_impute.py b/imputegap/algorithms/min_impute.py index adf782c..c6b56d6 100644 --- a/imputegap/algorithms/min_impute.py +++ b/imputegap/algorithms/min_impute.py @@ -1,13 +1,13 @@ import numpy as np -def min_impute(contamination, params=None): +def min_impute(incomp_data, params=None): """ Impute NaN values with the minimum value of the time series. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input time series with contamination (missing values represented as NaNs). params : dict, optional Optional parameters for the algorithm. If None, the minimum value from the contamination is used (default is None). @@ -24,18 +24,18 @@ def min_impute(contamination, params=None): Example ------- - >>> contamination = np.array([[1, 2, np.nan], [4, np.nan, 6]]) - >>> imputed_matrix = min_impute(contamination) - >>> print(imputed_matrix) + >>> incomp_data = np.array([[1, 2, np.nan], [4, np.nan, 6]]) + >>> recov_data = min_impute(incomp_data) + >>> print(recov_data) array([[1., 2., 1.], [4., 1., 6.]]) """ # logic - min_value = np.nanmin(contamination) + min_value = np.nanmin(incomp_data) # Imputation - imputed_matrix = np.nan_to_num(contamination, nan=min_value) + recov_data = np.nan_to_num(incomp_data, nan=min_value) - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/mrnn.py b/imputegap/algorithms/mrnn.py index ed4eb32..3b43b58 100644 --- a/imputegap/algorithms/mrnn.py +++ b/imputegap/algorithms/mrnn.py @@ -2,13 +2,13 @@ from imputegap.wrapper.AlgoPython.MRNN.testerMRNN import mrnn_recov -def mrnn(contamination, hidden_dim, learning_rate, iterations, sequence_length, logs=True): +def mrnn(incomp_data, hidden_dim, learning_rate, iterations, sequence_length, logs=True): """ Perform imputation using the Multivariate Recurrent Neural Network (MRNN) algorithm. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input matrix with contamination (missing values represented as NaNs). hidden_dim : int The number of hidden dimensions in the MRNN model. @@ -34,8 +34,8 @@ def mrnn(contamination, hidden_dim, learning_rate, iterations, sequence_length, Example ------- - >>> imputed_data = mrnn(contamination_matrix, hidden_dim=64, learning_rate=0.001, iterations=1000, sequence_length=7) - >>> print(imputed_data) + >>> recov_data = mrnn(incomp_data, hidden_dim=64, learning_rate=0.001, iterations=1000, sequence_length=7) + >>> print(recov_data) References ---------- @@ -43,11 +43,11 @@ def mrnn(contamination, hidden_dim, learning_rate, iterations, sequence_length, """ start_time = time.time() # Record start time - imputed_matrix = mrnn_recov(matrix_in=contamination, hidden_dim=hidden_dim, learning_rate=learning_rate, - iterations=iterations, seq_length=sequence_length) + recov_data = mrnn_recov(matrix_in=incomp_data, hidden_dim=hidden_dim, learning_rate=learning_rate, + iterations=iterations, seq_length=sequence_length) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation mrnn - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/stmvl.py b/imputegap/algorithms/stmvl.py index c97272b..93bf409 100644 --- a/imputegap/algorithms/stmvl.py +++ b/imputegap/algorithms/stmvl.py @@ -76,8 +76,8 @@ def native_stmvl(__py_matrix, __py_window, __py_gamma, __py_alpha): Example ------- - >>> imputed_data = stmvl(contamination=contamination_matrix, window_size=2, gamma=0.85, alpha=7) - >>> print(imputed_data) + >>> recov_data = stmvl(incomp_data=incomp_data, window_size=2, gamma=0.85, alpha=7) + >>> print(recov_data) References ---------- @@ -120,28 +120,28 @@ def native_stmvl(__py_matrix, __py_window, __py_gamma, __py_alpha): return __py_recovered; -def stmvl(contamination, window_size, gamma, alpha, logs=True): +def stmvl(incomp_data, window_size, gamma, alpha, logs=True): """ CDREC algorithm for imputation of missing data :author: Quentin Nater - :param contamination: time series with contamination + :param incomp_data: time series with contamination :param window_size: window size for temporal component :param gamma: smoothing parameter for temporal weight :param alpha: power for spatial weight :param logs: print logs of time execution - :return: imputed_matrix, metrics : all time series with imputation data and their metrics + :return: recov_data, metrics : all time series with imputation data and their metrics """ start_time = time.time() # Record start time # Call the C++ function to perform recovery - imputed_matrix = native_stmvl(contamination, window_size, gamma, alpha) + recov_data = native_stmvl(incomp_data, window_size, gamma, alpha) end_time = time.time() if logs: print(f"\n\t\t> logs, imputation stvml - Execution Time: {(end_time - start_time):.4f} seconds\n") - return imputed_matrix + return recov_data diff --git a/imputegap/algorithms/zero_impute.py b/imputegap/algorithms/zero_impute.py index e6a64d3..da456e4 100644 --- a/imputegap/algorithms/zero_impute.py +++ b/imputegap/algorithms/zero_impute.py @@ -1,13 +1,13 @@ import numpy as np -def zero_impute(contamination, params=None): +def zero_impute(incomp_data, params=None): """ Impute missing values (NaNs) with zeros in the time series. Parameters ---------- - contamination : numpy.ndarray + incomp_data : numpy.ndarray The input time series matrix with missing values represented as NaNs. params : dict, optional Optional parameters for the algorithm. This is not used in the current implementation but can be passed for future extensions (default is None). @@ -23,14 +23,14 @@ def zero_impute(contamination, params=None): Example ------- - >>> contamination = np.array([[1, 2, np.nan], [4, np.nan, 6]]) - >>> imputed_matrix = zero_impute(contamination) - >>> print(imputed_matrix) + >>> incomp_data = np.array([[1, 2, np.nan], [4, np.nan, 6]]) + >>> recov_data = zero_impute(incomp_data) + >>> print(recov_data) array([[1., 2., 0.], [4., 0., 6.]]) :author: Quentin Nater """ - imputed_matrix = np.nan_to_num(contamination, nan=0) + recov_data = np.nan_to_num(incomp_data, nan=0) - return imputed_matrix + return recov_data diff --git a/imputegap/assets/BLACKOUTContamination_graph.jpg b/imputegap/assets/BLACKOUTContamination_graph.jpg deleted file mode 100644 index 0a8ef8b..0000000 Binary files a/imputegap/assets/BLACKOUTContamination_graph.jpg and /dev/null differ diff --git a/imputegap/assets/EEG-MCARContamination_graph.jpg b/imputegap/assets/EEG-MCARContamination_graph.jpg deleted file mode 100644 index ddc4dc5..0000000 Binary files a/imputegap/assets/EEG-MCARContamination_graph.jpg and /dev/null differ diff --git a/imputegap/assets/MCARContamination_graph.jpg b/imputegap/assets/MCARContamination_graph.jpg deleted file mode 100644 index 2b30af4..0000000 Binary files a/imputegap/assets/MCARContamination_graph.jpg and /dev/null differ diff --git a/imputegap/assets/MISSINGPERCENTANGEContamination_graph.jpg b/imputegap/assets/MISSINGPERCENTANGEContamination_graph.jpg deleted file mode 100644 index 8042899..0000000 Binary files a/imputegap/assets/MISSINGPERCENTANGEContamination_graph.jpg and /dev/null differ diff --git a/imputegap/assets/contamination_graph.jpg b/imputegap/assets/contamination_graph.jpg deleted file mode 100644 index aab2efb..0000000 Binary files a/imputegap/assets/contamination_graph.jpg and /dev/null differ diff --git a/imputegap/assets/contamination_plot.jpg b/imputegap/assets/contamination_plot.jpg deleted file mode 100644 index dd6f9fc..0000000 Binary files a/imputegap/assets/contamination_plot.jpg and /dev/null differ diff --git a/imputegap/assets/imputation_graph.jpg b/imputegap/assets/imputation_graph.jpg deleted file mode 100644 index 7ffdcf3..0000000 Binary files a/imputegap/assets/imputation_graph.jpg and /dev/null differ diff --git a/imputegap/assets/imputation_plot.jpg b/imputegap/assets/imputation_plot.jpg deleted file mode 100644 index 7ffdcf3..0000000 Binary files a/imputegap/assets/imputation_plot.jpg and /dev/null differ diff --git a/imputegap/assets/logo_imputegab.png b/imputegap/assets/logo_imputegab.png new file mode 100644 index 0000000..7a9473b Binary files /dev/null and b/imputegap/assets/logo_imputegab.png differ diff --git a/imputegap/assets/raw_data_plot.jpg b/imputegap/assets/raw_data_plot.jpg deleted file mode 100644 index dbfb5e9..0000000 Binary files a/imputegap/assets/raw_data_plot.jpg and /dev/null differ diff --git a/imputegap/assets/rawdata_plot.jpg b/imputegap/assets/rawdata_plot.jpg deleted file mode 100644 index 6fab034..0000000 Binary files a/imputegap/assets/rawdata_plot.jpg and /dev/null differ diff --git a/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png b/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png index 1b94847..fc1bed5 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png and b/imputegap/assets/shap/chlorine_cdrec_DTL_Beeswarm.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png b/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png index 826dda3..ab7d8fc 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png and b/imputegap/assets/shap/chlorine_cdrec_DTL_Waterfall.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_results.txt b/imputegap/assets/shap/chlorine_cdrec_results.txt index 2265b48..015b837 100644 --- a/imputegap/assets/shap/chlorine_cdrec_results.txt +++ b/imputegap/assets/shap/chlorine_cdrec_results.txt @@ -1,22 +1,22 @@ -Feature : 1 cdrec with a score of 90.54 Geometry 10-bin histogram mode DN_HistogramMode_10 -Feature : 12 cdrec with a score of 3.99 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat -Feature : 5 cdrec with a score of 3.83 Correlation Time reversibility CO_trev_1_num -Feature : 18 cdrec with a score of 0.57 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 -Feature : 13 cdrec with a score of 0.37 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd -Feature : 3 cdrec with a score of 0.33 Correlation First minimum of the ACF CO_FirstMin_ac -Feature : 14 cdrec with a score of 0.29 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd -Feature : 6 cdrec with a score of 0.09 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40 -Feature : 0 cdrec with a score of 0.0 Geometry 5-bin histogram mode DN_HistogramMode_5 -Feature : 2 cdrec with a score of 0.0 Correlation First 1/e crossing of the ACF CO_f1ecac -Feature : 4 cdrec with a score of 0.0 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5 +Feature : 6 cdrec with a score of 58.09 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40 +Feature : 5 cdrec with a score of 9.13 Correlation Time reversibility CO_trev_1_num +Feature : 2 cdrec with a score of 6.2 Correlation First 1/e crossing of the ACF CO_f1ecac +Feature : 15 cdrec with a score of 5.34 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1 +Feature : 10 cdrec with a score of 4.95 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff +Feature : 1 cdrec with a score of 3.76 Geometry 10-bin histogram mode DN_HistogramMode_10 +Feature : 12 cdrec with a score of 3.39 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat +Feature : 0 cdrec with a score of 2.41 Geometry 5-bin histogram mode DN_HistogramMode_5 +Feature : 17 cdrec with a score of 2.32 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh +Feature : 21 cdrec with a score of 2.24 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr +Feature : 4 cdrec with a score of 1.42 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5 +Feature : 8 cdrec with a score of 0.38 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov +Feature : 13 cdrec with a score of 0.36 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd +Feature : 14 cdrec with a score of 0.01 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd +Feature : 3 cdrec with a score of 0.0 Correlation First minimum of the ACF CO_FirstMin_ac Feature : 7 cdrec with a score of 0.0 Geometry Longest stretch of above-mean values SB_BinaryStats_mean_longstretch1 -Feature : 8 cdrec with a score of 0.0 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov Feature : 9 cdrec with a score of 0.0 Trend Wangs periodicity metric PD_PeriodicityWang_th0_01 -Feature : 10 cdrec with a score of 0.0 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff Feature : 11 cdrec with a score of 0.0 Correlation First minimum of the AMI function IN_AutoMutualInfoStats_40_gaussian_fmmi -Feature : 15 cdrec with a score of 0.0 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1 Feature : 16 cdrec with a score of 0.0 Geometry Longest stretch of decreasing values SB_BinaryStats_diff_longstretch0 -Feature : 17 cdrec with a score of 0.0 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh +Feature : 18 cdrec with a score of 0.0 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 Feature : 19 cdrec with a score of 0.0 Geometry Detrended fluctuation analysis (low-scale scaling) SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1 Feature : 20 cdrec with a score of 0.0 Transformation Centroid frequency SP_Summaries_welch_rect_centroid -Feature : 21 cdrec with a score of 0.0 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png index 136ae00..d127afd 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png index 842ad10..89e7fbf 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png index e83e663..b5f67ad 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_correlation_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png index f0eca2d..4b01c30 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_geometry_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_plot.png index b64bb11..c05c662 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png index 30ad3b5..1ee24f1 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_reverse_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png index 9e1c31a..e273f90 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_transformation_plot.png differ diff --git a/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png b/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png index 75a03f7..1ed3924 100644 Binary files a/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png and b/imputegap/assets/shap/chlorine_cdrec_shap_trend_plot.png differ diff --git a/imputegap/dataset/README.md b/imputegap/dataset/README.md index 18e5abb..149f35d 100644 --- a/imputegap/dataset/README.md +++ b/imputegap/dataset/README.md @@ -577,3 +577,5 @@ Finally, BAFU - normalized 20x400 demonstrates the impact of "MIN-MAX" normaliza | Granularity | 30 minutes | | Observations | spans years 1974 to 2015 | | Dataset dimensions | N=50000 M=10 | + + diff --git a/imputegap/env/default_values.toml b/imputegap/env/default_values.toml index 7135392..1fe62f3 100644 --- a/imputegap/env/default_values.toml +++ b/imputegap/env/default_values.toml @@ -21,13 +21,13 @@ sequence_length = 7 [greedy] n_calls = 250 -selected_metrics='RMSE' +metrics='RMSE' [bayesian] n_calls = 2 n_random_starts = 50 acq_func = 'gp_hedge' -selected_metrics='RMSE' +metrics='RMSE' [pso] n_particles = 50 @@ -36,13 +36,13 @@ c2 = 0.3 w = 0.9 iterations=10 n_processes=1 -selected_metrics='RMSE' +metrics='RMSE' [sh] num_configs = 10 num_iterations = 2 reduction_factor = 10 -selected_metrics="RMSE" +metrics="RMSE" [explainer] diff --git a/imputegap/imputegap/assets/rawdata_plot.jpg b/imputegap/imputegap/assets/rawdata_plot.jpg new file mode 100644 index 0000000..5c7b06f Binary files /dev/null and b/imputegap/imputegap/assets/rawdata_plot.jpg differ diff --git a/imputegap/params/optimal_parameters_t_eeg_cdrec.toml b/imputegap/params/optimal_parameters_t_eeg_cdrec.toml new file mode 100644 index 0000000..e47a8de --- /dev/null +++ b/imputegap/params/optimal_parameters_t_eeg_cdrec.toml @@ -0,0 +1,4 @@ +[cdrec] +rank = 6 +epsilon = 7.662399122383144e-5 +iteration = 329 diff --git a/imputegap/recovery/README.md b/imputegap/recovery/README.md index 8325be0..96570b2 100644 --- a/imputegap/recovery/README.md +++ b/imputegap/recovery/README.md @@ -2,7 +2,7 @@

# CONTAMINATION -## Scenarios +## Patterns
@@ -14,7 +14,7 @@ - + @@ -29,7 +29,7 @@ ### MCAR MCAR selects random series and remove block at random positions until a total of W of all points of time series are missing. -This scenario uses random number generator with fixed seed and will produce the same blocks every run. +This pattern uses random number generator with fixed seed and will produce the same blocks every run.
MNumber of time seriesPStarting position (protection)
RMissing rate of the scenarioRMissing rate of the pattern
Spercentage of series selected
Definition @@ -82,7 +82,7 @@ This scenario uses random number generator with fixed seed and will produce the ### BLACKOUT -The **BLACKOUT** scenario selects all time series to introduce missing values. It removes a set percentage of data points from all series, creating gaps for further analysis. +The **BLACKOUT** pattern selects all time series to introduce missing values. It removes a set percentage of data points from all series, creating gaps for further analysis.
diff --git a/imputegap/recovery/__pycache__/benchmark.cpython-312.pyc b/imputegap/recovery/__pycache__/benchmark.cpython-312.pyc new file mode 100644 index 0000000..46527c9 Binary files /dev/null and b/imputegap/recovery/__pycache__/benchmark.cpython-312.pyc differ diff --git a/imputegap/recovery/__pycache__/benchmarking.cpython-312.pyc b/imputegap/recovery/__pycache__/benchmarking.cpython-312.pyc deleted file mode 100644 index 36af878..0000000 Binary files a/imputegap/recovery/__pycache__/benchmarking.cpython-312.pyc and /dev/null differ diff --git a/imputegap/recovery/__pycache__/evaluation.cpython-312.pyc b/imputegap/recovery/__pycache__/evaluation.cpython-312.pyc index cd22c13..814ff24 100644 Binary files a/imputegap/recovery/__pycache__/evaluation.cpython-312.pyc and b/imputegap/recovery/__pycache__/evaluation.cpython-312.pyc differ diff --git a/imputegap/recovery/__pycache__/explainer.cpython-312.pyc b/imputegap/recovery/__pycache__/explainer.cpython-312.pyc index c221aea..f4763a9 100644 Binary files a/imputegap/recovery/__pycache__/explainer.cpython-312.pyc and b/imputegap/recovery/__pycache__/explainer.cpython-312.pyc differ diff --git a/imputegap/recovery/__pycache__/imputation.cpython-312.pyc b/imputegap/recovery/__pycache__/imputation.cpython-312.pyc index 7efc13b..ffe9bda 100644 Binary files a/imputegap/recovery/__pycache__/imputation.cpython-312.pyc and b/imputegap/recovery/__pycache__/imputation.cpython-312.pyc differ diff --git a/imputegap/recovery/__pycache__/manager.cpython-312.pyc b/imputegap/recovery/__pycache__/manager.cpython-312.pyc index 1991277..ce0d141 100644 Binary files a/imputegap/recovery/__pycache__/manager.cpython-312.pyc and b/imputegap/recovery/__pycache__/manager.cpython-312.pyc differ diff --git a/imputegap/recovery/__pycache__/optimization.cpython-312.pyc b/imputegap/recovery/__pycache__/optimization.cpython-312.pyc index faae109..fdb774b 100644 Binary files a/imputegap/recovery/__pycache__/optimization.cpython-312.pyc and b/imputegap/recovery/__pycache__/optimization.cpython-312.pyc differ diff --git a/imputegap/recovery/benchmark.py b/imputegap/recovery/benchmark.py new file mode 100644 index 0000000..29a5d74 --- /dev/null +++ b/imputegap/recovery/benchmark.py @@ -0,0 +1,697 @@ +import datetime +import importlib +import os +import math +import time +import numpy as np +import matplotlib.pyplot as plt + +import xlsxwriter + +from imputegap.tools import utils +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.manager import TimeSeries + + +class Benchmark: + """ + A class to evaluate the performance of imputation algorithms through benchmarking across datasets and patterns. + + Methods + ------- + _config_optimization(): + Configure and execute optimization for a selected imputation algorithm and contamination pattern. + avg_results(): + Calculate average metrics (e.g., RMSE) across multiple datasets and algorithm runs. + generate_matrix(): + Generate and save a heatmap visualization of RMSE scores for datasets and algorithms. + generate_reports(): + Create detailed text-based reports summarizing metrics and timing results for all evaluations. + generate_plots(): + Visualize metrics (e.g., RMSE, MAE) and timing (e.g., imputation, optimization) across patterns and datasets. + comprehensive_evaluation(): + Perform a complete benchmarking pipeline, including contamination, imputation, evaluation, and reporting. + + Example + ------- + output : {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010309219360351562, 'optimization': 0, 'imputation': 0.0005755424499511719}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0020699501037597656, 'optimization': 0, 'imputation': 0.00048422813415527344}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007096290588378906, 'optimization': 0, 'imputation': 0.000461578369140625}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.043192148208618164, 'optimization': 0, 'imputation': 0.0005095005035400391}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.17184901237487793, 'optimization': 0, 'imputation': 0.0005536079406738281}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6064670085906982, 'optimization': 0, 'imputation': 0.0005743503570556641}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873978, 'MAE': 0.13619797235197734, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0009615421295166016, 'optimization': 0, 'imputation': 0.09218788146972656}}, '0.1': {'scores': {'RMSE': 0.18152059329152104, 'MAE': 0.09925566629402761, 'MI': 1.1516089897042538, 'CORRELATION': 0.9829398352220718}, 'times': {'contamination': 0.00482487678527832, 'optimization': 0, 'imputation': 0.09549617767333984}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.08459032692102293, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01713728904724121, 'optimization': 0, 'imputation': 0.1129295825958252}}, '0.4': {'scores': {'RMSE': 0.7544523683503829, 'MAE': 0.11218049973594252, 'MI': 0.021165172206064526, 'CORRELATION': 0.814120507570725}, 'times': {'contamination': 0.10881781578063965, 'optimization': 0, 'imputation': 1.9378046989440918}}, '0.6': {'scores': {'RMSE': 0.4355197572001326, 'MAE': 0.1380846624733049, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122915}, 'times': {'contamination': 0.2380077838897705, 'optimization': 0, 'imputation': 1.8785057067871094}}, '0.8': {'scores': {'RMSE': 0.7672558930795506, 'MAE': 0.32988968428439397, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323675}, 'times': {'contamination': 0.6805167198181152, 'optimization': 0, 'imputation': 1.9562773704528809}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0022056102752685547, 'optimization': 0, 'imputation': 52.07010293006897}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.002231597900390625, 'optimization': 0, 'imputation': 52.543020248413086}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.0072672367095947266, 'optimization': 0, 'imputation': 52.88247036933899}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.04321551322937012, 'optimization': 0, 'imputation': 54.10793352127075}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.1728806495666504, 'optimization': 0, 'imputation': 40.53373336791992}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.6077785491943359, 'optimization': 0, 'imputation': 35.151907444000244}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0010058879852294922, 'optimization': 0, 'imputation': 0.7380530834197998}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019745826721191406, 'optimization': 0, 'imputation': 4.7826457023620605}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.00801849365234375, 'optimization': 0, 'imputation': 33.94813060760498}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.04391813278198242, 'optimization': 0, 'imputation': 255.31524085998535}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.17044353485107422, 'optimization': 0, 'imputation': 840.7470128536224}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.5999574661254883, 'optimization': 0, 'imputation': 1974.6101157665253}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9458508648057621, 'MAE': 0.7019459696903068, 'MI': 0.11924522547609226, 'CORRELATION': 0.02915935932568557}, 'times': {'contamination': 0.001056671142578125, 'optimization': 0, 'imputation': 49.42237901687622}}, '0.1': {'scores': {'RMSE': 1.0125309431502871, 'MAE': 0.761136543268339, 'MI': 0.12567590499764303, 'CORRELATION': -0.037161060882302754}, 'times': {'contamination': 0.003415822982788086, 'optimization': 0, 'imputation': 49.04829454421997}}, '0.2': {'scores': {'RMSE': 1.0317754516097355, 'MAE': 0.7952869439926, 'MI': 0.10908095436833125, 'CORRELATION': -0.04155403791391449}, 'times': {'contamination': 0.007429599761962891, 'optimization': 0, 'imputation': 49.42568325996399}}, '0.4': {'scores': {'RMSE': 1.0807965786089415, 'MAE': 0.7326965517264863, 'MI': 0.006171770470542263, 'CORRELATION': -0.020630168509677818}, 'times': {'contamination': 0.042899370193481445, 'optimization': 0, 'imputation': 49.479795694351196}}, '0.6': {'scores': {'RMSE': 1.0441472017887297, 'MAE': 0.7599852461729673, 'MI': 0.01121013333181846, 'CORRELATION': -0.007513931343350665}, 'times': {'contamination': 0.17329692840576172, 'optimization': 0, 'imputation': 50.439927101135254}}, '0.8': {'scores': {'RMSE': 1.0379347892718205, 'MAE': 0.757440007226372, 'MI': 0.0035880775657246428, 'CORRELATION': -0.0014975078469404196}, 'times': {'contamination': 0.6166613101959229, 'optimization': 0, 'imputation': 50.66455388069153}}}}}}} + """ + + def _config_optimization(self, opti_mean, ts_test, pattern, algorithm, block_size_mcar): + """ + Configure and execute optimization for selected imputation algorithm and pattern. + + Parameters + ---------- + opti_mean : float + Mean parameter for contamination. + ts_test : TimeSeries + TimeSeries object containing dataset. + pattern : str + Type of contamination pattern (e.g., "mcar", "mp", "blackout"). + algorithm : str + Imputation algorithm to use. + block_size_mcar : int + Size of blocks removed in MCAR + + Returns + ------- + BaseImputer + Configured imputer instance with optimal parameters. + """ + + if pattern == "mcar": + incomp_data_opti = ts_test.Contamination.mcar(input_data=ts_test.data, series_rate=opti_mean, + missing_rate=opti_mean, block_size=block_size_mcar, + seed=True) + elif pattern == "mp": + incomp_data_opti = ts_test.Contamination.missing_percentage(input_data=ts_test.data, series_rate=opti_mean, + missing_rate=opti_mean) + else: + incomp_data_opti = ts_test.Contamination.blackout(input_data=ts_test.data, missing_rate=opti_mean) + + i_opti = None + if algorithm == "cdrec": + i_opti = Imputation.MatrixCompletion.CDRec(incomp_data_opti) + elif algorithm == "stmvl": + i_opti = Imputation.PatternSearch.STMVL(incomp_data_opti) + elif algorithm == "iim": + i_opti = Imputation.Statistics.IIM(incomp_data_opti) + elif algorithm == "mrnn": + i_opti = Imputation.DeepLearning.MRNN(incomp_data_opti) + elif algorithm == "mean": + i_opti = Imputation.Statistics.MeanImpute(incomp_data_opti) + + return i_opti + + def avg_results(self, *datasets): + """ + Calculate the average of all metrics and times across multiple datasets. + + Parameters + ---------- + datasets : dict + Multiple dataset dictionaries to be averaged. + + Returns + ------- + List + Matrix with averaged scores and times for all levels, list of algorithms, list of datasets + """ + + # Step 1: Compute average RMSE across runs for each dataset and algorithm + aggregated_data = {} + + for runs in datasets: + for dataset, dataset_items in runs.items(): + if dataset not in aggregated_data: + aggregated_data[dataset] = {} + + for pattern, pattern_items in dataset_items.items(): + for algo, algo_data in pattern_items.items(): + if algo not in aggregated_data[dataset]: + aggregated_data[dataset][algo] = [] + + for missing_values, missing_values_item in algo_data.items(): + for param, param_data in missing_values_item.items(): + rmse = param_data["scores"]["RMSE"] + aggregated_data[dataset][algo].append(rmse) + + # Step 2: Compute averages using NumPy + average_rmse_matrix = {} + for dataset, algos in aggregated_data.items(): + average_rmse_matrix[dataset] = {} + for algo, rmse_values in algos.items(): + rmse_array = np.array(rmse_values) + avg_rmse = np.mean(rmse_array) + average_rmse_matrix[dataset][algo] = avg_rmse + + # Step 3: Create a matrix representation of datasets and algorithms + datasets_list = list(average_rmse_matrix.keys()) + algorithms = {algo for algos in average_rmse_matrix.values() for algo in algos} + algorithms_list = sorted(algorithms) + + # Prepare a NumPy matrix + comprehensive_matrix = np.zeros((len(datasets_list), len(algorithms_list))) + + for i, dataset in enumerate(datasets_list): + for j, algo in enumerate(algorithms_list): + comprehensive_matrix[i, j] = average_rmse_matrix[dataset].get(algo, np.nan) + + print("Visualization of datasets:", datasets_list) + print("Visualization of algorithms:", algorithms_list) + print("Visualization of matrix:\n", comprehensive_matrix) + + return comprehensive_matrix, algorithms_list, datasets_list + + def generate_heatmap(self, scores_list, algos, sets, save_dir="./reports", display=True): + """ + Generate and save RMSE matrix in HD quality. + + Parameters + ---------- + scores_list : np.ndarray + 2D numpy array containing RMSE values. + algos : list of str + List of algorithm names (columns of the heatmap). + sets : list of str + List of dataset names (rows of the heatmap). + save_dir : str, optional + Directory to save the generated plot (default is "./reports"). + display : bool, optional + Display or not the plot + + Returns + ------- + Bool + True if the matrix has been generated + """ + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + fig, ax = plt.subplots(figsize=(10, 6)) + cmap = plt.cm.Greys + norm = plt.Normalize(vmin=0, vmax=2) # Normalizing values between 0 and 2 (RMSE) + + # Create the heatmap + heatmap = ax.imshow(scores_list, cmap=cmap, norm=norm, aspect='auto') + + # Add color bar for reference + cbar = plt.colorbar(heatmap, ax=ax, orientation='vertical') + cbar.set_label('RMSE', rotation=270, labelpad=15) + + # Set the tick labels + ax.set_xticks(np.arange(len(algos))) + ax.set_xticklabels(algos) + ax.set_yticks(np.arange(len(sets))) + ax.set_yticklabels(sets) + + # Add titles and labels + ax.set_title('ImputeGAP Algorithms Comparison') + ax.set_xlabel('Algorithms') + ax.set_ylabel('Datasets') + + # Show values on the heatmap + for i in range(len(sets)): + for j in range(len(algos)): + ax.text(j, i, f"{scores_list[i, j]:.2f}", + ha='center', va='center', + color="black" if scores_list[i, j] < 1 else "white") # for visibility + + filename = f"benchmarking_rmse.jpg" + filepath = os.path.join(save_dir, filename) + plt.savefig(filepath, dpi=300, bbox_inches='tight') # Save in HD with tight layout + + # Show the plot + if display : + plt.tight_layout() + plt.show() + plt.close() + + return True + + def generate_reports_txt(self, runs_plots_scores, save_dir="./reports", dataset="", run=-1): + """ + Generate and save a text report of metrics and timing for each dataset, algorithm, and pattern. + + Parameters + ---------- + runs_plots_scores : dict + Dictionary containing scores and timing information for each dataset, pattern, and algorithm. + save_dir : str, optional + Directory to save the reports file (default is "./reports"). + dataset : str, optional + Name of the data for the report name. + run : int, optional + Number of the run. + + Returns + ------- + None + + Notes + ----- + The report is saved in a "report.txt" file in `save_dir`, organized in sections with headers and results. + """ + print("run", run) + os.makedirs(save_dir, exist_ok=True) + save_path = os.path.join(save_dir, f"report_{dataset}.txt") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + with open(save_path, "w") as file: + # Write an overall header for the report + file.write(f"Report for Dataset: {dataset}\n") + file.write(f"Generated on: {current_time}\n") + if run >= 0: + file.write(f"Run number: {run}\n") + file.write("=" * 120 + "\n\n") + + metrics = { + "RMSE": "Root Mean Square Error - Measures the average magnitude of error.", + "MAE": "Mean Absolute Error - Measures the average absolute error.", + "MI": "Mutual Information - Indicates dependency between variables.", + "CORRELATION": "Correlation Coefficient - Indicates linear relationship between variables." + } + + for metric, description in metrics.items(): + # Write the metric description + file.write(f"{metric}: {description}\n\n") + + column_widths = [15, 15, 15, 15, 12, 25] + + # Create a table header + headers = ["Dataset", "Algorithm", "Optimizer", "Pattern", "X Value", metric] + header_row = "|".join(f" {header:^{width}} " for header, width in zip(headers, column_widths)) + separator_row = "+" + "+".join(f"{'-' * (width + 2)}" for width in column_widths) + "+" + file.write(f"{separator_row}\n") + file.write(f"|{header_row}|\n") + file.write(f"{separator_row}\n") + + # Extract and write results for the current metric + for dataset, algo_items in runs_plots_scores.items(): + for algorithm, optimizer_items in algo_items.items(): + for optimizer, pattern_data in optimizer_items.items(): + for pattern, x_data_items in pattern_data.items(): + for x, values in x_data_items.items(): + value = values.get("scores", {}).get(metric, None) + if value is not None: + value = f"{value:.10f}" # Limit to 10 decimal places + row_values = [dataset, algorithm, optimizer, pattern, str(x), value] + row = "|".join(f" {value:^{width}} " for value, width in zip(row_values, column_widths)) + file.write(f"|{row}|\n") + file.write(f"{separator_row}\n\n") + + file.write("Dictionary of Results:\n") + file.write(str(runs_plots_scores) + "\n") + + print(f"\nReport recorded in {save_path}") + + def generate_reports_excel(self, runs_plots_scores, save_dir="./reports", dataset="", run=-1): + """ + Generate and save an Excel-like text report of metrics and timing for each dataset, algorithm, and pattern. + + Parameters + ---------- + runs_plots_scores : dict + Dictionary containing scores and timing information for each dataset, pattern, and algorithm. + save_dir : str, optional + Directory to save the Excel-like file (default is "./reports"). + dataset : str, optional + Name of the data for the Excel-like file name. + run : int, optional + Number of the run + + Returns + ------- + None + """ + os.makedirs(save_dir, exist_ok=True) + save_path = os.path.join(save_dir, f"report_{dataset}.xlsx") + + # Create an Excel workbook + workbook = xlsxwriter.Workbook(save_path) + + # Add a summary sheet with the header, creation date, dictionary content, and links to other sheets + summary_sheet = workbook.add_worksheet("Summary") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + summary_sheet.set_column(0, 1, 50) + + # Add the logo using importlib.resources + logo_path = importlib.resources.files("imputegap.assets").joinpath("logo_imputegap.png") + summary_sheet.insert_image("A1", str(logo_path), {"x_scale": 0.5, "y_scale": 0.5}) + + # Title and header + summary_sheet.write(5, 0, "IMPUTEGAP") # Title below the logo + summary_sheet.write(7, 0, "Report for Dataset") + summary_sheet.write(7, 1, dataset) + summary_sheet.write(8, 0, "Generated on") + summary_sheet.write(8, 1, current_time) + if run >= 0: + summary_sheet.write(9, 0, "Run Number") + summary_sheet.write(9, 1, run) + + # Add links to metric sheets + row = 12 + summary_sheet.write(row, 0, "Metric Sheets:") + row += 1 + metrics = { + "RMSE": "Root Mean Square Error - Measures the average magnitude of error.", + "MAE": "Mean Absolute Error - Measures the average absolute error.", + "MI": "Mutual Information - Indicates dependency between variables.", + "CORRELATION": "Correlation Coefficient - Indicates linear relationship between variables." + } + for metric in metrics.keys(): + summary_sheet.write_url(row, 0, f"internal:'{metric}'!A1", string=f"Go to {metric} Sheet") + row += 1 + + # Write the dictionary content + summary_sheet.write(row + 1, 0, "Dictionary of Results") + row += 2 + + for key, value in runs_plots_scores.items(): + summary_sheet.write(row, 0, str(key)) + summary_sheet.write(row, 1, str(value)) + row += 1 + + for metric, description in metrics.items(): + # Create a worksheet for each metric + worksheet = workbook.add_worksheet(metric) + + # Add the logo to each metric sheet + worksheet.insert_image("A1", str(logo_path), {"x_scale": 0.5, "y_scale": 0.5}) + + # Write the metric description at the top and add IMPUTEGAP header + worksheet.write(5, 0, "IMPUTEGAP") + worksheet.write(7, 0, f"{metric}: {description}") + + # Define consistent column headers and widths + headers = ["Dataset", "Algorithm", "Optimizer", "Pattern", "X Value", metric] + column_widths = [15, 15, 15, 15, 12, 20] # Adjust widths for Excel + + # Write the headers + for col, (header, width) in enumerate(zip(headers, column_widths)): + worksheet.set_column(col, col, width) + worksheet.write(8, col, header) + + # Populate the data + row = 9 + for dataset, algo_items in runs_plots_scores.items(): + for algorithm, optimizer_items in algo_items.items(): + for optimizer, pattern_data in optimizer_items.items(): + for pattern, x_data_items in pattern_data.items(): + for x, values in x_data_items.items(): + value = values.get("scores", {}).get(metric, None) + if value is not None: + value = f"{value:.10f}" + data = [dataset, algorithm, optimizer, pattern, str(x), value] + for col, cell_value in enumerate(data): + worksheet.write(row, col, cell_value) + row += 1 + + # Close the workbook + workbook.close() + + print(f"\nExcel report recorded in {save_path}") + + def generate_plots(self, runs_plots_scores, ticks, subplot=False, save_dir="./reports"): + """ + Generate and save plots for each metric and pattern based on provided scores. + + Parameters + ---------- + runs_plots_scores : dict + Dictionary containing scores and timing information for each dataset, pattern, and algorithm. + ticks : list of float + List of missing rates for contamination. + subplot : bool, optional + If True, generates a single figure with subplots for all metrics (default is False). + save_dir : str, optional + Directory to save generated plots (default is "./reports"). + + Returns + ------- + None + + Notes + ----- + Saves generated plots in `save_dir`, categorized by dataset, pattern, and metric. + """ + os.makedirs(save_dir, exist_ok=True) + metrics = ["RMSE", "MAE", "MI", "CORRELATION", "imputation_time", "log_imputation"] + + for dataset, pattern_items in runs_plots_scores.items(): + for pattern, algo_items in pattern_items.items(): + + if subplot: + fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 12)) # Adjusted figsize + axes = axes.ravel() # Flatten the 2D array of axes to a 1D array + + # Iterate over each metric, generating separate plots, including new timing metrics + for i, metric in enumerate(metrics): + + if subplot: + if i < len(axes): + ax = axes[i] + else: + break # Prevent index out of bounds if metrics exceed subplot slots + else: + plt.figure(figsize=(10, 4)) + ax = plt.gca() + + has_data = False # Flag to check if any data is added to the plot + + # Iterate over each algorithm and plot them in the same figure + for algorithm, optimizer_items in algo_items.items(): + x_vals = [] + y_vals = [] + for optimizer, x_data in optimizer_items.items(): + for x, values in x_data.items(): + # Differentiate between score metrics and timing metrics + if metric == "imputation_time" and "imputation" in values["times"]: + x_vals.append(float(x)) + y_vals.append(values["times"]["imputation"]) + elif metric == "log_imputation" and "log_imputation" in values["times"]: + x_vals.append(float(x)) + y_vals.append(values["times"]["log_imputation"]) + elif metric in values["scores"]: + x_vals.append(float(x)) + y_vals.append(values["scores"][metric]) + + # Only plot if there are values to plot + if x_vals and y_vals: + # Sort x and y values by x for correct spacing + sorted_pairs = sorted(zip(x_vals, y_vals)) + x_vals, y_vals = zip(*sorted_pairs) + + # Plot each algorithm as a line with scattered points + ax.plot(x_vals, y_vals, label=f"{algorithm}") + ax.scatter(x_vals, y_vals) + has_data = True + + # Save plot only if there is data to display + if has_data: + ylabel_metric = { + "imputation_time": "Imputation Time (sec)", + "log_imputation": "Imputation Time (log)", + }.get(metric, metric) + + ax.set_title(metric) + ax.set_xlabel("Rates") + ax.set_ylabel(ylabel_metric) + ax.set_xlim(0.0, 0.85) + + # Set y-axis limits with padding below 0 for visibility + if metric == "imputation_time": + ax.set_ylim(-10, 90) + elif metric == "log_imputation": + ax.set_ylim(-10, 90) + elif metric == "MAE": + ax.set_ylim(-0.1, 2.4) + elif metric == "MI": + ax.set_ylim(-0.1, 1.85) + elif metric == "RMSE": + ax.set_ylim(-0.1, 2.6) + elif metric == "CORRELATION": + ax.set_ylim(-0.75, 1.1) + + # Customize x-axis ticks + ax.set_xticks(ticks) + ax.set_xticklabels([f"{int(tick * 100)}%" for tick in ticks]) + ax.grid(True, zorder=0) + ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) + + if not subplot: + filename = f"{dataset}_{pattern}_{metric}.jpg" + filepath = os.path.join(save_dir, filename) + plt.savefig(filepath) + plt.close() + + if subplot: + plt.tight_layout() + filename = f"{dataset}_{pattern}_metrics_subplot.jpg" + filepath = os.path.join(save_dir, filename) + plt.savefig(filepath) + plt.close() + + print("\nAll plots recorded in", save_dir) + + def eval(self, algorithms=["cdrec"], datasets=["eeg-alcohol"], patterns=["mcar"], x_axis=[0.05, 0.1, 0.2, 0.4, 0.6, 0.8], optimizers=["user_def"], save_dir="./reports", runs=1): + """ + Execute a comprehensive evaluation of imputation algorithms over multiple datasets and patterns. + + Parameters + ---------- + algorithms : list of str + List of imputation algorithms to test. + datasets : list of str + List of dataset names to evaluate. + patterns : list of str + List of contamination patterns to apply. + x_axis : list of float + List of missing rates for contamination. + optimizers : list of dict + List of optimizers with their configurations. + save_dir : str, optional + Directory to save reports and plots (default is "./reports"). + runs : int, optional + Number of executions with a view to averaging them + + Returns + ------- + List + List of all runs results, matrix with averaged scores and times for all levels + + Notes + ----- + Runs contamination, imputation, and evaluation, then generates plots and a summary reports. + """ + + print("initialization of the comprehensive evaluation. It can take time...\n") + run_storage = [] + scores_list, algos, sets = None, None, None + + for i_run in range(0, abs(runs)): + for dataset in datasets: + runs_plots_scores = {} + limitation_series, limitation_values = 100, 1000 + block_size_mcar = 10 + + print("1. evaluation launch for", dataset, + "========================================================\n\n\n") + ts_test = TimeSeries() + + header = False + if dataset == "eeg-reading": + header = True + elif dataset == "drift": + limitation_series = 50 + elif dataset == "fmri-objectviewing": + limitation_series = 360 + elif dataset == "fmri-stoptask": + limitation_series = 360 + + if runs == -1: + limitation_series = 10 + limitation_values = 110 + print("TEST LOADED...") + + ts_test.load_timeseries(data=utils.search_path(dataset), max_series=limitation_series, + max_values=limitation_values, header=header) + + start_time_opti, end_time_opti = 0, 0 + M, N = ts_test.data.shape + + if N < 250: + block_size_mcar = 2 + + print("1. normalization of ", dataset, "\n") + ts_test.normalize() + + for pattern in patterns: + print("\t2. contamination of", dataset, "with pattern", pattern, "\n") + + for algorithm in algorithms: + has_been_optimized = False + print("\t3. algorithm selected", algorithm, "\n") + + for x in x_axis: + print("\t\t4. missing values (series&values) set to", x, "for x_axis\n") + + start_time_contamination = time.time() # Record start time + if pattern == "mcar": + incomp_data = ts_test.Contamination.mcar(input_data=ts_test.data, series_rate=x, + missing_rate=x, block_size=block_size_mcar, + seed=True) + elif pattern == "mp": + incomp_data = ts_test.Contamination.missing_percentage(input_data=ts_test.data, + series_rate=x, + missing_rate=x) + else: + incomp_data = ts_test.Contamination.blackout(input_data=ts_test.data, missing_rate=x) + end_time_contamination = time.time() + + for optimizer in optimizers: + algo = None + if algorithm == "cdrec": + algo = Imputation.MatrixCompletion.CDRec(incomp_data) + elif algorithm == "stmvl": + algo = Imputation.PatternSearch.STMVL(incomp_data) + elif algorithm == "iim": + algo = Imputation.Statistics.IIM(incomp_data) + elif algorithm == "mrnn": + algo = Imputation.DeepLearning.MRNN(incomp_data) + elif algorithm == "mean": + algo = Imputation.Statistics.MeanImpute(incomp_data) + + if isinstance(optimizer, dict): + optimizer_gt = {"input_data": ts_test.data, **optimizer} + optimizer_value = optimizer.get('optimizer') # or optimizer['optimizer'] + + if not has_been_optimized and algorithm != "mean": + print("\t\t5. AutoML to set the parameters", optimizer, "\n") + start_time_opti = time.time() # Record start time + i_opti = self._config_optimization(0.25, ts_test, pattern, algorithm, + block_size_mcar) + i_opti.impute(user_def=False, params=optimizer_gt) + utils.save_optimization(optimal_params=i_opti.parameters, algorithm=algorithm, + dataset=dataset, optimizer="e") + + has_been_optimized = True + end_time_opti = time.time() + else: + print("\t\t5. AutoML already optimized : ", optimizer, "\n") + + if algorithm != "mean": + opti_params = utils.load_parameters(query="optimal", algorithm=algorithm, + dataset=dataset, optimizer="e") + print("\t\t6. imputation", algorithm, "with optimal parameters", *opti_params) + + else: + print("\t\t5. MeanImpute launches without optimal params", optimizer, "for", algorithm, "\n") + opti_params = None + else: + print("\t\t5. Default parameters have been set the parameters", optimizer, "for", algorithm, "\n") + optimizer_value = optimizer + opti_params = None + + start_time_imputation = time.time() + algo.impute(params=opti_params) + end_time_imputation = time.time() + + algo.score(input_data=ts_test.data, recov_data=algo.recov_data) + + time_contamination = end_time_contamination - start_time_contamination + time_opti = end_time_opti - start_time_opti + time_imputation = end_time_imputation - start_time_imputation + log_time_imputation = math.log(time_imputation) if time_imputation > 0 else None + + dic_timing = {"contamination": time_contamination, "optimization": time_opti, "imputation": time_imputation, "log_imputation": log_time_imputation} + + dataset_s = dataset + if "-" in dataset: + dataset_s = dataset.replace("-", "") + + runs_plots_scores.setdefault(str(dataset_s), {}).setdefault(str(pattern), + {}).setdefault( + str(algorithm), {}).setdefault(str(optimizer_value), {})[str(x)] = { + "scores": algo.metrics, + "times": dic_timing + } + + print("\t\truns_plots_scores", runs_plots_scores) + + print("\truns_plots_scores : ", runs_plots_scores) + save_dir_runs = save_dir + "/run_" + str(i_run) + print("\truns saved in : ", save_dir_runs) + self.generate_plots(runs_plots_scores=runs_plots_scores, ticks=x_axis, subplot=True, save_dir=save_dir_runs) + self.generate_plots(runs_plots_scores=runs_plots_scores, ticks=x_axis, subplot=False, save_dir=save_dir_runs) + self.generate_reports_txt(runs_plots_scores, save_dir_runs, dataset, i_run) + self.generate_reports_excel(runs_plots_scores, save_dir_runs, dataset, i_run) + run_storage.append(runs_plots_scores) + + print("============================================================================\n\n\n\n\n\n") + + scores_list, algos, sets = self.avg_results(*run_storage) + _ = Benchmark().generate_heatmap(scores_list, algos, sets, save_dir=save_dir) + + return run_storage, scores_list diff --git a/imputegap/recovery/evaluation.py b/imputegap/recovery/evaluation.py index 3f42194..1b89d16 100644 --- a/imputegap/recovery/evaluation.py +++ b/imputegap/recovery/evaluation.py @@ -9,7 +9,7 @@ class Evaluation: Methods ------- - metrics_computation(): + compute_all_metrics(): Compute various evaluation metrics (RMSE, MAE, MI, CORRELATION) for the imputation. compute_rmse(): Compute the Root Mean Squared Error (RMSE) between the ground truth and the imputed values. @@ -22,28 +22,28 @@ class Evaluation: """ - def __init__(self, ground_truth, imputation, contamination): + def __init__(self, input_data, recov_data, incomp_data): """ - Initialize the Evaluation class with ground truth, imputation, and contamination time series. + Initialize the Evaluation class with ground truth, imputation, and incomp_data time series. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The original time series without contamination. - imputation : numpy.ndarray + recov_data : numpy.ndarray The imputed time series. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The time series with contamination (NaN values). Returns ------- None """ - self.ground_truth = ground_truth - self.imputation = imputation - self.contamination = contamination + self.input_data = input_data + self.recov_data = recov_data + self.incomp_data = incomp_data - def metrics_computation(self): + def compute_all_metrics(self): """ Compute a set of evaluation metrics for the imputation based on the ground truth and contamination data. @@ -79,9 +79,9 @@ def compute_rmse(self): float The RMSE value for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) + nan_locations = np.isnan(self.incomp_data) - mse = np.mean((self.ground_truth[nan_locations] - self.imputation[nan_locations]) ** 2) + mse = np.mean((self.input_data[nan_locations] - self.recov_data[nan_locations]) ** 2) rmse = np.sqrt(mse) return float(rmse) @@ -97,9 +97,9 @@ def compute_mae(self): float The MAE value for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) + nan_locations = np.isnan(self.incomp_data) - absolute_error = np.abs(self.ground_truth[nan_locations] - self.imputation[nan_locations]) + absolute_error = np.abs(self.input_data[nan_locations] - self.recov_data[nan_locations]) mean_absolute_error = np.mean(absolute_error) return mean_absolute_error @@ -116,16 +116,16 @@ def compute_mi(self): float The mutual information (MI) score for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) + nan_locations = np.isnan(self.incomp_data) # Discretize the continuous data into bins - ground_truth_binned = np.digitize(self.ground_truth[nan_locations], - bins=np.histogram_bin_edges(self.ground_truth[nan_locations], bins=10)) - imputation_binned = np.digitize(self.imputation[nan_locations], - bins=np.histogram_bin_edges(self.imputation[nan_locations], bins=10)) + input_data_binned = np.digitize(self.input_data[nan_locations], + bins=np.histogram_bin_edges(self.input_data[nan_locations], bins=10)) + imputation_binned = np.digitize(self.recov_data[nan_locations], + bins=np.histogram_bin_edges(self.recov_data[nan_locations], bins=10)) - mi_discrete = mutual_info_score(ground_truth_binned, imputation_binned) - # mi_continuous = mutual_info_score(self.ground_truth[nan_locations], self.ground_truth[nan_locations]) + mi_discrete = mutual_info_score(input_data_binned, imputation_binned) + # mi_continuous = mutual_info_score(self.input_data[nan_locations], self.input_data[nan_locations]) return mi_discrete @@ -141,11 +141,11 @@ def compute_correlation(self): float The Pearson correlation coefficient for NaN positions in the contamination dataset. """ - nan_locations = np.isnan(self.contamination) - ground_truth_values = self.ground_truth[nan_locations] - imputed_values = self.imputation[nan_locations] + nan_locations = np.isnan(self.incomp_data) + input_data_values = self.input_data[nan_locations] + imputed_values = self.recov_data[nan_locations] - correlation, _ = pearsonr(ground_truth_values, imputed_values) + correlation, _ = pearsonr(input_data_values, imputed_values) if np.isnan(correlation): correlation = 0 diff --git a/imputegap/recovery/explainer.py b/imputegap/recovery/explainer.py index e39ec29..df1aefa 100644 --- a/imputegap/recovery/explainer.py +++ b/imputegap/recovery/explainer.py @@ -32,11 +32,11 @@ class Explainer: convert_results(tmp, file, algo, descriptions, features, categories, mean_features, to_save) Convert SHAP raw results into a refined format for display. - launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False, verbose=False) + execute_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False, verbose=False) Launch the SHAP model to explain the dataset features. - shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar", missing_rate=0.4, - block_size=10, protection=0.1, use_seed=True, seed=42, limitation=15, splitter=0, + shap_explainer(input_data, algorithm="cdrec", params=None, incomp_data="mcar", missing_rate=0.4, + block_size=10, offset=0.1, seed=True, limitation=15, splitter=0, file_name="ts", display=False, verbose=False) Handle parameters and set variables to launch the SHAP model. @@ -213,8 +213,8 @@ def convert_results(tmp, file, algo, descriptions, features, categories, mean_fe return result_shap - def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False, - verbose=False): + def execute_shap_model(x_dataset, x_information, y_dataset, file, algorithm, splitter=10, display=False, + verbose=False): """ Launch the SHAP model for explaining the features of the dataset. @@ -467,36 +467,34 @@ def launch_shap_model(x_dataset, x_information, y_dataset, file, algorithm, spli return results_shap - def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar", missing_rate=0.4, - block_size=10, protection=0.1, use_seed=True, seed=42, limitation=15, splitter=0, + def shap_explainer(input_data, algorithm="cdrec", params=None, pattern="mcar", missing_rate=0.4, + block_size=10, offset=0.1, seed=True, limit_ratio=1, split_ratio=0.6, file_name="ts", display=False, verbose=False): """ Handle parameters and set variables to launch the SHAP model. Parameters ---------- - raw_data : numpy.ndarray + input_data : numpy.ndarray The original time series dataset. algorithm : str, optional The algorithm used for imputation (default is 'cdrec'). Valid values: 'cdrec', 'stmvl', 'iim', 'mrnn'. params : dict, optional Parameters for the algorithm. - contamination : str, optional - Contamination scenario to apply (default is 'mcar'). + pattern : str, optional + Contamination pattern to apply (default is 'mcar'). missing_rate : float, optional Percentage of missing values per series (default is 0.4). block_size : int, optional Size of the block to remove at each random position selected (default is 10). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the time series (default is 0.1). - use_seed : bool, optional + seed : bool, optional Whether to use a seed for reproducibility (default is True). - seed : int, optional - Seed value for reproducibility (default is 42). - limitation : int, optional - Limitation on the number of series for the model (default is 15). - splitter : int, optional - Limitation on the training series for the model (default is 0). + limit_ratio : flaot, optional + Limitation on the number of series for the model (default is 1). + split_ratio : flaot, optional + Limitation on the training series for the model (default is 0.6). file_name : str, optional Name of the dataset file (default is 'ts'). display : bool, optional @@ -522,43 +520,52 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar start_time = time.time() # Record start time - if limitation > raw_data.shape[0]: - limitation = int(raw_data.shape[0] * 0.75) + if limit_ratio < 0.05 or limit_ratio > 1: + print("\nlimit percentage higher than 100%, reduce to 100% of the dataset") + limit_ratio = 1 + + M = input_data.shape[0] + limit = math.ceil(M * limit_ratio) + + if split_ratio < 0.05 or split_ratio > 0.95: + print("\nsplit ratio to small or to high, reduce to 60% of the dataset") + split_ratio = 0.6 + + training_ratio = int(limit * split_ratio) - if splitter == 0 or splitter >= limitation - 1: - splitter = int(limitation * 0.60) + print("\nFrom", limit, "/", M, "elements, the training dataset has been set with", training_ratio,"elements and the testing dataset with", (limit-training_ratio), "elements") if verbose: print("SHAP Explainer has been called\n\t", "missing_values (", missing_rate * 100, "%)\n\t", - "for a contamination (", contamination, "), \n\t", + "for a contamination (", pattern, "), \n\t", "imputated by (", algorithm, ") with params (", params, ")\n\t", - "with limitation and splitter after verification of (", limitation, ") and (", splitter, ") for ", - raw_data.shape, "...\n\n\tGeneration of the dataset with the time series...") + "with limitation and splitter after verification of (", limit, ") and (", training_ratio, ") for ", + input_data.shape, "...\n\n\tGeneration of the dataset with the time series...") - ground_truth_matrices, obfuscated_matrices = [], [] + input_data_matrices, obfuscated_matrices = [], [] output_metrics, output_rmse, input_params, input_params_full = [], [], [], [] categories, features = Explainer.load_configuration() - for current_series in range(0, limitation): + for current_series in range(0, limit): - print("Generation ", current_series, "/", limitation, "(", int((current_series / limitation) * 100), "%)________________________________________________________") + print("Generation ", current_series, "/", limit, "(", int((current_series / limit) * 100), "%)________________________________________________________") print("\tContamination ", current_series, "...") - if contamination == "mcar": - obfuscated_matrix = TimeSeries().Contaminate.mcar(ts=raw_data, series_impacted=current_series, - missing_rate=missing_rate, block_size=block_size, - protection=protection, use_seed=use_seed, seed=seed, - explainer=True) + if pattern == "mcar": + incomp_data = TimeSeries().Contamination.mcar(input_data=input_data, series_rate=current_series, + missing_rate=missing_rate, block_size=block_size, + offset=offset, seed=seed, + explainer=True) else: - print("Contamination proposed not found : ", contamination, " >> BREAK") + print("Contamination proposed not found : ", pattern, " >> BREAK") return None - ground_truth_matrices.append(raw_data) - obfuscated_matrices.append(obfuscated_matrix) + input_data_matrices.append(input_data) + obfuscated_matrices.append(incomp_data) - catch_fct, descriptions = Explainer.extract_features(np.array(obfuscated_matrix), categories, features, False) + catch_fct, descriptions = Explainer.extract_features(np.array(incomp_data), categories, features, False) extracted_features = np.array(list(catch_fct.values())) input_params.append(extracted_features) @@ -566,17 +573,17 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar print("\tImputation ", current_series, "...") if algorithm == "cdrec": - algo = Imputation.MatrixCompletion.CDRec(obfuscated_matrix) + algo = Imputation.MatrixCompletion.CDRec(incomp_data) elif algorithm == "stmvl": - algo = Imputation.PatternSearch.STMVL(obfuscated_matrix) + algo = Imputation.PatternSearch.STMVL(incomp_data) elif algorithm == "iim": - algo = Imputation.Statistics.IIM(obfuscated_matrix) + algo = Imputation.Statistics.IIM(incomp_data) elif algorithm == "mrnn": - algo = Imputation.DeepLearning.MRNN(obfuscated_matrix) + algo = Imputation.DeepLearning.MRNN(incomp_data) algo.logs = False - algo.impute(user_defined=True, params=params) - algo.score(raw_data) + algo.impute(user_def=True, params=params) + algo.score(input_data) imputation_results = algo.metrics output_metrics.append(imputation_results) @@ -586,8 +593,8 @@ def shap_explainer(raw_data, algorithm="cdrec", params=None, contamination="mcar for input, output in zip(input_params, output_metrics): shap_details.append((input, output["RMSE"])) - shap_values = Explainer.launch_shap_model(input_params, input_params_full, output_rmse, file_name, algorithm, - splitter, display, verbose) + shap_values = Explainer.execute_shap_model(input_params, input_params_full, output_rmse, file_name, algorithm, + training_ratio, display, verbose) print("\n\nSHAP Explainer succeeded without fail, please find the results in : ./assets/shap/*\n") diff --git a/imputegap/recovery/imputation.py b/imputegap/recovery/imputation.py index c7187d2..0d9ef79 100644 --- a/imputegap/recovery/imputation.py +++ b/imputegap/recovery/imputation.py @@ -22,9 +22,9 @@ class BaseImputer: ------- impute(params=None): Abstract method to perform the imputation. - score(raw_matrix, imputed_matrix=None): + score(input_data, recov_data=None): Compute metrics for the imputed time series. - _check_params(user_defined, params): + _check_params(user_def, params): Check and format parameters for imputation. _optimize(parameters={}): Optimize hyperparameters for the imputation algorithm. @@ -32,17 +32,17 @@ class BaseImputer: algorithm = "" # Class variable to hold the algorithm name logs = True - def __init__(self, infected_matrix): + def __init__(self, incomp_data): """ Initialize the BaseImputer with an infected time series matrix. Parameters ---------- - infected_matrix : numpy.ndarray + incomp_data : numpy.ndarray Matrix used during the imputation of the time series. """ - self.infected_matrix = infected_matrix - self.imputed_matrix = None + self.incomp_data = incomp_data + self.recov_data = None self.metrics = None self.parameters = None @@ -62,33 +62,33 @@ def impute(self, params=None): """ raise NotImplementedError("This method should be overridden by subclasses") - def score(self, raw_matrix, imputed_matrix=None): + def score(self, input_data, recov_data=None): """ Compute evaluation metrics for the imputed time series. Parameters ---------- - raw_matrix : numpy.ndarray + input_data : numpy.ndarray The original time series without contamination. - imputed_matrix : numpy.ndarray, optional + recov_data : numpy.ndarray, optional The imputed time series (default is None). Returns ------- None """ - if self.imputed_matrix is None: - self.imputed_matrix = imputed_matrix + if self.recov_data is None: + self.recov_data = recov_data - self.metrics = Evaluation(raw_matrix, self.imputed_matrix, self.infected_matrix).metrics_computation() + self.metrics = Evaluation(input_data, self.recov_data, self.incomp_data).compute_all_metrics() - def _check_params(self, user_defined, params): + def _check_params(self, user_def, params): """ Format the parameters for optimization or imputation. Parameters ---------- - user_defined : bool + user_def : bool Whether the parameters are user-defined or not. params : dict or list List or dictionary of parameters. @@ -100,7 +100,7 @@ def _check_params(self, user_defined, params): """ if params is not None: - if not user_defined: + if not user_def: self._optimize(params) if isinstance(self.parameters, dict): @@ -133,7 +133,7 @@ def _optimize(self, parameters={}): Parameters ---------- parameters : dict - Dictionary containing optimization configurations such as ground_truth, optimizer, and options. + Dictionary containing optimization configurations such as input_data, optimizer, and options. Returns ------- @@ -141,9 +141,9 @@ def _optimize(self, parameters={}): """ from imputegap.recovery.optimization import Optimization - raw_data = parameters.get('ground_truth') - if raw_data is None: - raise ValueError(f"Need ground_truth to be able to adapt the hyper-parameters: {raw_data}") + input_data = parameters.get('input_data') + if input_data is None: + raise ValueError(f"Need input_data to be able to adapt the hyper-parameters: {input_data}") optimizer = parameters.get('optimizer', "bayesian") defaults = utils.load_parameters(query="default", algorithm=optimizer) @@ -157,13 +157,13 @@ def _optimize(self, parameters={}): n_calls = options.get('n_calls', n_calls_d) random_starts = options.get('n_random_starts', n_random_starts_d) func = options.get('acq_func', acq_func_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) bo_optimizer = Optimization.Bayesian() - optimal_params, _ = bo_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, + optimal_params, _ = bo_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, n_calls=n_calls, n_random_starts=random_starts, @@ -179,13 +179,13 @@ def _optimize(self, parameters={}): w = options.get('w', w_d) iterations = options.get('iterations', iterations_d) n_processes = options.get('n_processes', n_processes_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) swarm_optimizer = Optimization.ParticleSwarm() - optimal_params, _ = swarm_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, algorithm=self.algorithm, + optimal_params, _ = swarm_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, n_particles=n_particles, c1=c1, c2=c2, w=w, iterations=iterations, n_processes=n_processes) @@ -197,13 +197,13 @@ def _optimize(self, parameters={}): num_configs = options.get('num_configs', num_configs_d) num_iterations = options.get('num_iterations', num_iterations_d) reduction_factor = options.get('reduction_factor', reduction_factor_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) sh_optimizer = Optimization.SuccessiveHalving() - optimal_params, _ = sh_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, algorithm=self.algorithm, + optimal_params, _ = sh_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, num_configs=num_configs, num_iterations=num_iterations, reduction_factor=reduction_factor) @@ -212,13 +212,13 @@ def _optimize(self, parameters={}): options = parameters.get('options', {}) n_calls = options.get('n_calls', n_calls_d) - metrics = options.get('selected_metrics', selected_metrics_d) + metrics = options.get('metrics', selected_metrics_d) go_optimizer = Optimization.Greedy() - optimal_params, _ = go_optimizer.optimize(ground_truth=raw_data, - contamination=self.infected_matrix, - selected_metrics=metrics, algorithm=self.algorithm, + optimal_params, _ = go_optimizer.optimize(input_data=input_data, + incomp_data=self.incomp_data, + metrics=metrics, algorithm=self.algorithm, n_calls=n_calls) self.parameters = optimal_params @@ -230,19 +230,19 @@ class Imputation: Methods ------- - evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec"): + evaluate_params(input_data, incomp_data, configuration, algorithm="cdrec"): Evaluate imputation performance using given parameters and algorithm. """ - def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec"): + def evaluate_params(input_data, incomp_data, configuration, algorithm="cdrec"): """ Evaluate various metrics for given parameters and imputation algorithm. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The original time series without contamination. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The time series with contamination. configuration : tuple Tuple of the configuration of the algorithm. @@ -260,9 +260,9 @@ def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec if algorithm == 'cdrec': rank, epsilon, iterations = configuration - algo = Imputation.MatrixCompletion.CDRec(contamination) + algo = Imputation.MatrixCompletion.CDRec(incomp_data) algo.logs = False - algo.impute(user_defined=True, params={"rank": rank, "epsilon": epsilon, "iterations": iterations}) + algo.impute(user_def=True, params={"rank": rank, "epsilon": epsilon, "iterations": iterations}) elif algorithm == 'iim': if not isinstance(configuration, list): @@ -270,30 +270,30 @@ def evaluate_params(ground_truth, contamination, configuration, algorithm="cdrec learning_neighbours = configuration[0] alg_code = "iim " + re.sub(r'[\W_]', '', str(learning_neighbours)) - algo = Imputation.Statistics.IIM(contamination) + algo = Imputation.Statistics.IIM(incomp_data) algo.logs = False - algo.impute(user_defined=True, params={"learning_neighbours": learning_neighbours, "alg_code": alg_code}) + algo.impute(user_def=True, params={"learning_neighbours": learning_neighbours, "alg_code": alg_code}) elif algorithm == 'mrnn': hidden_dim, learning_rate, iterations = configuration - algo = Imputation.DeepLearning.MRNN(contamination) + algo = Imputation.DeepLearning.MRNN(incomp_data) algo.logs = False - algo.impute(user_defined=True, + algo.impute(user_def=True, params={"hidden_dim": hidden_dim, "learning_rate": learning_rate, "iterations": iterations, "seq_length": 7}) elif algorithm == 'stmvl': window_size, gamma, alpha = configuration - algo = Imputation.PatternSearch.STMVL(contamination) + algo = Imputation.PatternSearch.STMVL(incomp_data) algo.logs = False - algo.impute(user_defined=True, params={"window_size": window_size, "gamma": gamma, "alpha": alpha}) + algo.impute(user_def=True, params={"window_size": window_size, "gamma": gamma, "alpha": alpha}) else: raise ValueError(f"Invalid algorithm: {algorithm}") - algo.score(ground_truth) + algo.score(input_data) error_measures = algo.metrics return error_measures @@ -334,9 +334,9 @@ def impute(self, params=None): Returns ------- self : ZeroImpute - The object with `imputed_matrix` set. + The object with `recov_data` set. """ - self.imputed_matrix = zero_impute(self.infected_matrix, params) + self.recov_data = zero_impute(self.incomp_data, params) return self @@ -364,9 +364,9 @@ def impute(self, params=None): Returns ------- self : MinImpute - The object with `imputed_matrix` set. + The object with `recov_data` set. """ - self.imputed_matrix = min_impute(self.infected_matrix, params) + self.recov_data = min_impute(self.incomp_data, params) return self @@ -394,9 +394,9 @@ def impute(self, params=None): Returns ------- self : MinImpute - The object with `imputed_matrix` set. + The object with `recov_data` set. """ - self.imputed_matrix = mean_impute(self.infected_matrix, params) + self.recov_data = mean_impute(self.incomp_data, params) return self @@ -406,18 +406,18 @@ class IIM(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the IIM algorithm. """ algorithm = "iim" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the IIM algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the IIM algorithm, if None, default ones are loaded. @@ -430,15 +430,15 @@ def impute(self, user_defined=True, params=None): Returns ------- self : IIM - The object with `imputed_matrix` set. + The object with `recov_data` set. Example ------- - >>> iim_imputer = Imputation.Statistics.IIM(infected_matrix) + >>> iim_imputer = Imputation.Statistics.IIM(incomp_data) >>> iim_imputer.impute() # default parameters for imputation > or - >>> iim_imputer.impute(user_defined=True, params={'learning_neighbors': 10}) # user-defined > or - >>> iim_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = iim_imputer.imputed_matrix + >>> iim_imputer.impute(user_def=True, params={'learning_neighbors': 10}) # user-defined > or + >>> iim_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = iim_imputer.recov_data References ---------- @@ -446,12 +446,12 @@ def impute(self, user_defined=True, params=None): keywords: {Data models;Adaptation models;Computational modeling;Predictive models;Numerical models;Aggregates;Regression tree analysis;Missing values;Data imputation} """ if params is not None: - learning_neighbours, algo_code = self._check_params(user_defined, params) + learning_neighbours, algo_code = self._check_params(user_def, params) else: learning_neighbours, algo_code = utils.load_parameters(query="default", algorithm=self.algorithm) - self.imputed_matrix = iim(contamination=self.infected_matrix, number_neighbor=learning_neighbours, - algo_code=algo_code, logs=self.logs) + self.recov_data = iim(incomp_data=self.incomp_data, number_neighbor=learning_neighbours, + algo_code=algo_code, logs=self.logs) return self @@ -471,19 +471,19 @@ class CDRec(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the CDRec algorithm. """ algorithm = "cdrec" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the CDRec algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the CDRec algorithm or Auto-ML configuration, if None, default ones are loaded. @@ -499,7 +499,7 @@ def impute(self, user_defined=True, params=None): **Auto-ML parameters:** - - ground_truth : numpy.ndarray + - input_data : numpy.ndarray The original time series dataset without contamination. - optimizer : str The optimizer to use for parameter optimization. Valid values are "bayesian", "greedy", "pso", or "sh". @@ -510,7 +510,7 @@ def impute(self, user_defined=True, params=None): - n_calls : int, optional Number of calls to the objective function. Default is 3. - - selected_metrics : list, optional + - metrics : list, optional List of selected metrics to consider for optimization. Default is ["RMSE"]. - n_random_starts : int, optional Number of initial calls to the objective function, from random points. Default is 50. @@ -521,7 +521,7 @@ def impute(self, user_defined=True, params=None): - n_calls : int, optional Number of calls to the objective function. Default is 3. - - selected_metrics : list, optional + - metrics : list, optional List of selected metrics to consider for optimization. Default is ["RMSE"]. **PSO:** @@ -551,15 +551,15 @@ def impute(self, user_defined=True, params=None): Returns ------- self : CDRec - CDRec object with `imputed_matrix` set. + CDRec object with `recov_data` set. Example ------- - >>> cdrec_imputer = Imputation.MatrixCompletion.CDRec(infected_matrix) + >>> cdrec_imputer = Imputation.MatrixCompletion.CDRec(incomp_data) >>> cdrec_imputer.impute() # default parameters for imputation > or - >>> cdrec_imputer.impute(user_defined=True, params={'rank': 5, 'epsilon': 0.01, 'iterations': 100}) # user-defined > or - >>> cdrec_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = cdrec_imputer.imputed_matrix + >>> cdrec_imputer.impute(user_def=True, params={'rank': 5, 'epsilon': 0.01, 'iterations': 100}) # user-defined > or + >>> cdrec_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = cdrec_imputer.recov_data References ---------- @@ -567,12 +567,12 @@ def impute(self, user_defined=True, params=None): """ if params is not None: - rank, epsilon, iterations = self._check_params(user_defined, params) + rank, epsilon, iterations = self._check_params(user_def, params) else: rank, epsilon, iterations = utils.load_parameters(query="default", algorithm=self.algorithm) - self.imputed_matrix = cdrec(contamination=self.infected_matrix, truncation_rank=rank, - iterations=iterations, epsilon=epsilon, logs=self.logs) + self.recov_data = cdrec(incomp_data=self.incomp_data, truncation_rank=rank, + iterations=iterations, epsilon=epsilon, logs=self.logs) return self @@ -593,18 +593,18 @@ class MRNN(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the MRNN algorithm. """ algorithm = "mrnn" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the MRNN algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the MRNN algorithm, if None, default ones are loaded. @@ -621,29 +621,29 @@ def impute(self, user_defined=True, params=None): Returns ------- self : MRNN - The object with `imputed_matrix` set. + The object with `recov_data` set. Example ------- - >>> mrnn_imputer = Imputation.DeepLearning.MRNN(infected_matrix) + >>> mrnn_imputer = Imputation.DeepLearning.MRNN(incomp_data) >>> mrnn_imputer.impute() # default parameters for imputation > or - >>> mrnn_imputer.impute(user_defined=True, params={'hidden_dim': 10, 'learning_rate':0.01, 'iterations':50, 'sequence_length': 7}) # user-defined > or - >>> mrnn_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = mrnn_imputer.imputed_matrix + >>> mrnn_imputer.impute(user_def=True, params={'hidden_dim': 10, 'learning_rate':0.01, 'iterations':50, 'sequence_length': 7}) # user-defined > or + >>> mrnn_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = mrnn_imputer.recov_data References ---------- J. Yoon, W. R. Zame and M. van der Schaar, "Estimating Missing Data in Temporal Data Streams Using Multi-Directional Recurrent Neural Networks," in IEEE Transactions on Biomedical Engineering, vol. 66, no. 5, pp. 1477-1490, May 2019, doi: 10.1109/TBME.2018.2874712. keywords: {Time measurement;Interpolation;Estimation;Medical diagnostic imaging;Correlation;Recurrent neural networks;Biomedical measurement;Missing data;temporal data streams;imputation;recurrent neural nets} """ if params is not None: - hidden_dim, learning_rate, iterations, sequence_length = self._check_params(user_defined, params) + hidden_dim, learning_rate, iterations, sequence_length = self._check_params(user_def, params) else: hidden_dim, learning_rate, iterations, sequence_length = utils.load_parameters(query="default", algorithm="mrnn") - self.imputed_matrix = mrnn(contamination=self.infected_matrix, hidden_dim=hidden_dim, - learning_rate=learning_rate, iterations=iterations, - sequence_length=sequence_length, logs=self.logs) + self.recov_data = mrnn(incomp_data=self.incomp_data, hidden_dim=hidden_dim, + learning_rate=learning_rate, iterations=iterations, + sequence_length=sequence_length, logs=self.logs) return self @@ -663,18 +663,18 @@ class STMVL(BaseImputer): Methods ------- - impute(self, user_defined=True, params=None): + impute(self, user_def=True, params=None): Perform imputation using the STMVL algorithm. """ algorithm = "stmvl" - def impute(self, user_defined=True, params=None): + def impute(self, user_def=True, params=None): """ Perform imputation using the STMVL algorithm. Parameters ---------- - user_defined : bool, optional + user_def : bool, optional Whether to use user-defined or default parameters (default is True). params : dict, optional Parameters of the STMVL algorithm, if None, default ones are loaded. @@ -689,15 +689,15 @@ def impute(self, user_defined=True, params=None): Returns ------- self : STMVL - The object with `imputed_matrix` set. + The object with `recov_data` set. Example ------- - >>> stmvl_imputer = Imputation.PatternSearch.STMVL(infected_matrix) + >>> stmvl_imputer = Imputation.PatternSearch.STMVL(incomp_data) >>> stmvl_imputer.impute() # default parameters for imputation > or - >>> stmvl_imputer.impute(user_defined=True, params={'window_size': 7, 'learning_rate':0.01, 'gamma':0.85, 'alpha': 7}) # user-defined > or - >>> stmvl_imputer.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian - >>> imputed_data = stmvl_imputer.imputed_matrix + >>> stmvl_imputer.impute(user_def=True, params={'window_size': 7, 'learning_rate':0.01, 'gamma':0.85, 'alpha': 7}) # user-defined > or + >>> stmvl_imputer.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) # auto-ml with bayesian + >>> recov_data = stmvl_imputer.recov_data References ---------- @@ -705,12 +705,12 @@ def impute(self, user_defined=True, params=None): School of Information Science and Technology, Southwest Jiaotong University; Microsoft Research; Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences. """ if params is not None: - window_size, gamma, alpha = self._check_params(user_defined, params) + window_size, gamma, alpha = self._check_params(user_def, params) else: window_size, gamma, alpha = utils.load_parameters(query="default", algorithm="stmvl") - self.imputed_matrix = stmvl(contamination=self.infected_matrix, window_size=window_size, gamma=gamma, - alpha=alpha, logs=self.logs) + self.recov_data = stmvl(incomp_data=self.incomp_data, window_size=window_size, gamma=gamma, + alpha=alpha, logs=self.logs) return self diff --git a/imputegap/recovery/manager.py b/imputegap/recovery/manager.py index 92437b2..7409fcf 100644 --- a/imputegap/recovery/manager.py +++ b/imputegap/recovery/manager.py @@ -1,3 +1,4 @@ +import datetime import os import time import numpy as np @@ -50,12 +51,11 @@ class TimeSeries: normalize(normalizer="z_score") : Normalizes the time series dataset. - plot(raw_data, infected_data=None, imputed_data=None, title="Time Series Data", - max_series=None, max_values=None, size=(16, 8), save_path="", display=True) : + plot(input_data, incomp_data=None, recov_data=None, max_series=None, max_values=None, size=(16, 8), save_path="", display=True) : Plots the time series data, including raw, contaminated, or imputed data. - Contaminate : - Class containing methods to contaminate time series data with missing values based on different scenarios. + Contamination : + Class containing methods to contaminate time series data with missing values based on different patterns. """ @@ -153,46 +153,53 @@ def load_timeseries(self, data, max_series=None, max_values=None, header=False): return self - def print(self, limit=10, view_by_series=False): + def print(self, limit_timestamps=10, limit_series=7, view_by_series=False): """ Prints a limited number of time series from the dataset. Parameters ---------- - limit : int, optional - The number of series to print. Default is 10. + limit_timestamps : int, optional + The number of timestamps to print. Default is 15. Use -1 for no restriction. + limit_series : int, optional + The number of series to print. Default is 10. Use -1 for no restriction. view_by_series : bool, optional - Whether to view by series (True) or by values (False). + Whether to view by series (True) or by values (False). Returns ------- None """ - print("\nTime Series set :") to_print = self.data nbr_series, nbr_values = to_print.shape - print_col, print_row = "Values", "Series" + print_col, print_row = "Timestamp", "Series" + + if limit_timestamps == -1: + limit_timestamps = to_print.shape[1] + if limit_series == -1: + limit_series = to_print.shape[0] + to_print = to_print[:limit_series, :limit_timestamps] if not view_by_series: to_print = to_print.T - print_col, print_row = "Series", "Values" - - print(f"{' ':19}", end="") - for i, _ in enumerate(to_print[1]): - if i < 10: - print(f"{print_col} {i}", end=" " * 8) - elif i < 100: - print(f"{print_col} {i}", end=" " * 7) - else: - print(f"{print_col} {i}", end=" " * 6) + print_col, print_row = "Series", "Timestamp" + + header_format = "{:<15}" # Fixed size for headers + value_format = "{:>15.10f}" # Fixed size for values + # Print the header + print(f"{'':<18}", end="") # Empty space for the row labels + for i in range(to_print.shape[1]): + print(header_format.format(f"{print_col}_{i + 1}"), end="") print() - for i, series in enumerate(to_print[:limit]): - print(f"{print_row} {i} \t\t" + " ".join([f"{elem:15.10f}" for elem in series])) + # Print each limited series with fixed size + for i, series in enumerate(to_print): + print(header_format.format(f"{print_row} {i + 1}"), end="") + print("".join([value_format.format(elem) for elem in series])) - if limit < to_print.shape[0]: + if limit_series < nbr_series: print("...") print("\nshape of the time series :", to_print.shape, "\n\tnumber of series =", nbr_series, @@ -214,7 +221,7 @@ def print_results(self, metrics, algorithm=""): None """ - print("\n\nResults of the imputation ", algorithm, " :") + print("\n\nImputation Results of", algorithm, ":") for key, value in metrics.items(): print(f"{key:<20} = {value}") print("\n") @@ -286,27 +293,26 @@ def normalize(self, normalizer="z_score"): print(f"\n\t\t> logs, normalization {normalizer} - Execution Time: {(end_time - start_time):.4f} seconds\n") - def plot(self, raw_data, infected_data=None, imputed_data=None, title="Time Series Data", max_series=None, - max_values=None, series_x=None, size=(16, 8), save_path="", display=True): + def plot(self, input_data, incomp_data=None, recov_data=None, max_series=None, max_values=None, series_range=None, subplot=True, size=(16, 8), save_path="", display=True): """ Plot the time series data, including raw, contaminated, or imputed data. Parameters ---------- - raw_data : numpy.ndarray + input_data : numpy.ndarray The original time series data without contamination. - infected_data : numpy.ndarray, optional + incomp_data : numpy.ndarray, optional The contaminated time series data. - imputed_data : numpy.ndarray, optional + recov_data : numpy.ndarray, optional The imputed time series data. - title : str, optional - Title of the plot. Default is "Time Series Data". max_series : int, optional The maximum number of series to plot. max_values : int, optional The maximum number of values per series to plot. - series_x : int, optional + series_range : int, optional The index of a specific series to plot. If set, only this series will be plotted. + subplot : bool, optional + Print one time series by subplot or all in the same plot. size : tuple, optional Size of the plot in inches. Default is (16, 8). save_path : str, optional @@ -320,116 +326,147 @@ def plot(self, raw_data, infected_data=None, imputed_data=None, title="Time Seri The file path of the saved plot, if applicable. """ number_of_series = 0 - plt.figure(figsize=size) - plt.grid(True, linestyle='--', color='#d3d3d3', linewidth=0.6) + if max_series is None: - max_series, _ = raw_data.shape + max_series = input_data.shape[0] if max_values is None: - _, max_values = raw_data.shape + max_values = input_data.shape[1] - if raw_data is not None: + series_indices = [series_range] if series_range is not None else range(min(input_data.shape[0], max_series)) + n_series_to_plot = len(series_indices) + if subplot: + n_cols = min(3, n_series_to_plot) + n_rows = (n_series_to_plot + n_cols - 1) // n_cols + fig, axes = plt.subplots(n_rows, n_cols, figsize=size, squeeze=False) + axes = axes.flatten() + else: + plt.figure(figsize=size) + plt.grid(True, linestyle='--', color='#d3d3d3', linewidth=0.6) + + if input_data is not None: colors = utils.load_parameters("default", algorithm="colors") - # Determine range of series to plot - series_indices = [series_x] if series_x is not None else range(raw_data.shape[0]) + for idx, i in enumerate(series_indices): + + if subplot: + color = colors[0] + else: + color = colors[i % len(colors)] - for i in series_indices: - color = colors[i % len(colors)] + timestamps = np.arange(min(input_data.shape[1], max_values)) - if infected_data is None and imputed_data is None: # plot only raw matrix - plt.plot(np.arange(min(raw_data.shape[1], max_values)), raw_data[i, :max_values], linewidth=2.5, + # Select the current axes if using subplots + if subplot: + ax = axes[idx] + ax.grid(True, linestyle='--', color='#d3d3d3', linewidth=0.6) + else: + ax = plt + + if incomp_data is None and recov_data is None: # plot only raw matrix + ax.plot(timestamps, input_data[i, :max_values], linewidth=2.5, color=color, linestyle='-', label=f'TS {i + 1}') - if infected_data is not None and imputed_data is None: # plot infected matrix - if np.isnan(infected_data[i, :]).any(): - plt.plot(np.arange(min(raw_data.shape[1], max_values)), raw_data[i, :max_values], linewidth=1.5, + if incomp_data is not None and recov_data is None: # plot infected matrix + if np.isnan(incomp_data[i, :]).any(): + ax.plot(timestamps, input_data[i, :max_values], linewidth=1.5, color='r', linestyle='--', label=f'TS-MB {i + 1}') - plt.plot(np.arange(min(infected_data.shape[1], max_values)), infected_data[i, :max_values], + ax.plot(np.arange(min(incomp_data.shape[1], max_values)), incomp_data[i, :max_values], color=color, linewidth=2.5, linestyle='-', label=f'TS-RAW {i + 1}') - if imputed_data is not None: # plot imputed matrix - if np.isnan(infected_data[i, :]).any(): - plt.plot(np.arange(min(imputed_data.shape[1], max_values)), imputed_data[i, :max_values], + if recov_data is not None: # plot imputed matrix + if np.isnan(incomp_data[i, :]).any(): + ax.plot(np.arange(min(recov_data.shape[1], max_values)), recov_data[i, :max_values], linestyle='-', color="r", label=f'TS-IMP {i + 1}') - if np.isnan(infected_data[i, :]).any(): - plt.plot(np.arange(min(raw_data.shape[1], max_values)), raw_data[i, :max_values], linewidth=1.5, + if np.isnan(incomp_data[i, :]).any(): + ax.plot(timestamps, input_data[i, :max_values], linewidth=1.5, linestyle='--', color=color, label=f'TS-MB {i + 1}') - plt.plot(np.arange(min(infected_data.shape[1], max_values)), infected_data[i, :max_values], + ax.plot(np.arange(min(incomp_data.shape[1], max_values)), incomp_data[i, :max_values], color=color, linewidth=2.5, linestyle='-', label=f'TS-RAW {i + 1}') + # Label and legend for subplot + if subplot: + ax.set_xlabel('Timestamp') + ax.set_ylabel('Values') + ax.legend(loc='upper left', fontsize=8) + ax.set_title(f'Time Series {i + 1}') + number_of_series += 1 if number_of_series == max_series: break - plt.xlabel('Timestamp') - plt.ylabel('Values') - plt.title(title) - plt.legend( - loc='upper left', - fontsize=12, - frameon=True, - fancybox=True, - shadow=True, - borderpad=1.5, - bbox_to_anchor=(1.02, 1), # Adjusted to keep the legend inside the window - ) + if subplot: + for idx in range(len(series_indices), len(axes)): + axes[idx].axis('off') + + if not subplot: + plt.xlabel('Timestamp') + plt.ylabel('Values') + plt.legend( + loc='upper left', + fontsize=12, + frameon=True, + fancybox=True, + shadow=True, + borderpad=1.5, + bbox_to_anchor=(1.02, 1), # Adjusted to keep the legend inside the window + ) file_path = None + if save_path: os.makedirs(save_path, exist_ok=True) - file_path = os.path.join(save_path + "/" + title.replace(" ", "") + "_plot.jpg") + + now = datetime.datetime.now() + current_time = now.strftime("%y_%m_%d_%H_%M_%S") + + file_path = os.path.join(save_path + "/" + current_time + "_plot.jpg") plt.savefig(file_path, bbox_inches='tight') print("plots saved in ", file_path) if display: plt.show() - # plt.close() - return file_path - class Contaminate: + class Contamination: """ - Inner class to apply contamination scenarios to the time series data. + Inner class to apply contamination patterns to the time series data. Methods ------- - mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0.1, use_seed=True, seed=42, explainer=False) : + mcar(ts, series_rate=0.2, missing_rate=0.2, block_size=10, offset=0.1, seed=True, explainer=False) : Apply Missing Completely at Random (MCAR) contamination to the time series data. - missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1) : + missing_percentage(ts, series_rate=0.2, missing_rate=0.2, offset=0.1) : Apply missing percentage contamination to the time series data. - blackout(ts, missing_rate=0.2, protection=0.1) : + blackout(ts, missing_rate=0.2, offset=0.1) : Apply blackout contamination to the time series data. """ - def mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0.1, use_seed=True, seed=42, - explainer=False): + def mcar(input_data, series_rate=0.2, missing_rate=0.2, block_size=10, offset=0.1, seed=True, explainer=False): """ Apply Missing Completely at Random (MCAR) contamination to the time series data. Parameters ---------- - ts : numpy.ndarray + input_data : numpy.ndarray The time series dataset to contaminate. - series_impacted : float, optional + series_rate : float, optional Percentage of series to contaminate (default is 0.2). missing_rate : float, optional Percentage of missing values per series (default is 0.2). block_size : int, optional Size of the block of missing data (default is 10). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the series (default is 0.1). - use_seed : bool, optional + seed : bool, optional Whether to use a seed for reproducibility (default is True). - seed : int, optional - The value of the seed (default is 42). explainer : bool, optional Whether to apply MCAR to specific series for explanation purposes (default is False). @@ -439,37 +476,38 @@ def mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0. The contaminated time series data. """ - if use_seed: - np.random.seed(seed) + if seed: + seed_value = 42 + np.random.seed(seed_value) - ts_contaminated = ts.copy() + ts_contaminated = input_data.copy() M, _ = ts_contaminated.shape if not explainer: # use random series missing_rate = utils.verification_limitation(missing_rate) - series_impacted = utils.verification_limitation(series_impacted) - protection = utils.verification_limitation(protection) + series_rate = utils.verification_limitation(series_rate) + offset = utils.verification_limitation(offset) - nbr_series_impacted = int(np.ceil(M * series_impacted)) + nbr_series_impacted = int(np.ceil(M * series_rate)) series_selected = [str(idx) for idx in np.random.choice(M, nbr_series_impacted, replace=False)] else: # use fix series - series_selected = [str(series_impacted)] + series_selected = [str(series_rate)] if not explainer: print("\n\nMCAR contamination has been called with :" - "\n\ta number of series impacted ", series_impacted * 100, "%", + "\n\ta number of series impacted ", series_rate * 100, "%", "\n\ta missing rate of ", missing_rate * 100, "%", - "\n\ta starting position at ", protection, + "\n\ta starting position at ", offset, "\n\ta block size of ", block_size, - "\n\twith a seed option set to ", use_seed, + "\n\twith a seed option set to ", seed, "\n\tshape of the set ", ts_contaminated.shape, "\n\tthis selection of series", *series_selected, "\n\n") for series in series_selected: S = int(series) N = len(ts_contaminated[S]) # number of values in the series - P = int(N * protection) # values to protect in the beginning of the series + P = int(N * offset) # values to protect in the beginning of the series W = int((N - P) * missing_rate) # number of data to remove B = int(W / block_size) # number of block to remove @@ -498,19 +536,19 @@ def mcar(ts, series_impacted=0.2, missing_rate=0.2, block_size=10, protection=0. return ts_contaminated - def missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1): + def missing_percentage(input_data, series_rate=0.2, missing_rate=0.2, offset=0.1): """ Apply missing percentage contamination to the time series data. Parameters ---------- - ts : numpy.ndarray + input_data : numpy.ndarray The time series dataset to contaminate. - series_impacted : float, optional + series_rate : float, optional Percentage of series to contaminate (default is 0.2). missing_rate : float, optional Percentage of missing values per series (default is 0.2). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the series (default is 0.1). Returns @@ -519,26 +557,26 @@ def missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1 The contaminated time series data. """ - ts_contaminated = ts.copy() + ts_contaminated = input_data.copy() M, _ = ts_contaminated.shape missing_rate = utils.verification_limitation(missing_rate) - series_impacted = utils.verification_limitation(series_impacted) - protection = utils.verification_limitation(protection) + series_rate = utils.verification_limitation(series_rate) + offset = utils.verification_limitation(offset) - nbr_series_impacted = int(np.ceil(M * series_impacted)) + nbr_series_impacted = int(np.ceil(M * series_rate)) print("\n\nMISSING PERCENTAGE contamination has been called with :" - "\n\ta number of series impacted ", series_impacted * 100, "%", + "\n\ta number of series impacted ", series_rate * 100, "%", "\n\ta missing rate of ", missing_rate * 100, "%", - "\n\ta starting position at ", protection, + "\n\ta starting position at ", offset, "\n\tshape of the set ", ts_contaminated.shape, "\n\tthis selection of series 0 to ", nbr_series_impacted, "\n\n") for series in range(0, nbr_series_impacted): S = int(series) N = len(ts_contaminated[S]) # number of values in the series - P = int(N * protection) # values to protect in the beginning of the series + P = int(N * offset) # values to protect in the beginning of the series W = int((N - P) * missing_rate) # number of data to remove for to_remove in range(0, W): @@ -547,17 +585,17 @@ def missing_percentage(ts, series_impacted=0.2, missing_rate=0.2, protection=0.1 return ts_contaminated - def blackout(ts, missing_rate=0.2, protection=0.1): + def blackout(input_data, missing_rate=0.2, offset=0.1): """ Apply blackout contamination to the time series data. Parameters ---------- - ts : numpy.ndarray + input_data : numpy.ndarray The time series dataset to contaminate. missing_rate : float, optional Percentage of missing values per series (default is 0.2). - protection : float, optional + offset : float, optional Size of the uncontaminated section at the beginning of the series (default is 0.1). Returns @@ -565,5 +603,5 @@ def blackout(ts, missing_rate=0.2, protection=0.1): numpy.ndarray The contaminated time series data. """ - return TimeSeries.Contaminate.missing_percentage(ts, series_impacted=1, missing_rate=missing_rate, - protection=protection) + return TimeSeries.Contamination.missing_percentage(input_data, series_rate=1, missing_rate=missing_rate, + offset=offset) diff --git a/imputegap/recovery/optimization.py b/imputegap/recovery/optimization.py index 9bf549d..bb212c5 100644 --- a/imputegap/recovery/optimization.py +++ b/imputegap/recovery/optimization.py @@ -28,7 +28,7 @@ class BaseOptimizer: _objective(**kwargs): Abstract method to evaluate the imputation algorithm with the provided parameters. Must be implemented by subclasses. - optimize(ground_truth, contamination, selected_metrics, algorithm, **kwargs): + optimize(input_data, incomp_data, metrics, algorithm, **kwargs): Abstract method for the main optimization process. Must be implemented by subclasses. """ @@ -50,13 +50,13 @@ def _objective(self, **kwargs): ---------- **kwargs : dict Parameters needed to evaluate the imputation algorithm, such as: - - ground_truth : numpy.ndarray + - input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray The contaminated time series dataset to impute. - algorithm : str The imputation algorithm name. - - selected_metrics : list of str + - metrics : list of str List of selected metrics for optimization. - params : dict or list Parameter values for the optimization. @@ -68,7 +68,7 @@ def _objective(self, **kwargs): """ raise NotImplementedError("Subclasses must implement the _objective method") - def optimize(self, ground_truth, contamination, selected_metrics, algorithm, **kwargs): + def optimize(self, input_data, incomp_data, metrics, algorithm, **kwargs): """ Abstract method for optimization. Must be implemented in subclasses. @@ -78,11 +78,11 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, **k Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. algorithm : str The imputation algorithm to optimize. @@ -106,16 +106,16 @@ class Optimization: Methods ------- - Greedy.optimize(ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=250): + Greedy.optimize(input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=250): Perform greedy optimization for hyperparameters. - Bayesian.optimize(ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=100, n_random_starts=50, acq_func='gp_hedge'): + Bayesian.optimize(input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=100, n_random_starts=50, acq_func='gp_hedge'): Perform Bayesian optimization for hyperparameters. - ParticleSwarm.optimize(ground_truth, contamination, selected_metrics, algorithm, n_particles, c1, c2, w, iterations, n_processes): + ParticleSwarm.optimize(input_data, incomp_data, metrics, algorithm, n_particles, c1, c2, w, iterations, n_processes): Perform Particle Swarm Optimization (PSO) for hyperparameters. - SuccessiveHalving.optimize(ground_truth, contamination, selected_metrics, algorithm, num_configs, num_iterations, reduction_factor): + SuccessiveHalving.optimize(input_data, incomp_data, metrics, algorithm, num_configs, num_iterations, reduction_factor): Perform Successive Halving optimization for hyperparameters. """ @@ -124,19 +124,19 @@ class Greedy(BaseOptimizer): Greedy optimization strategy for hyperparameters. """ - def _objective(self, ground_truth, contamination, algorithm, selected_metrics, params): + def _objective(self, input_data, incomp_data, algorithm, metrics, params): """ Objective function for Greedy optimization. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. algorithm : str The imputation algorithm name. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. params : dict The parameters for the imputation algorithm. @@ -146,24 +146,24 @@ def _objective(self, ground_truth, contamination, algorithm, selected_metrics, p float Mean error for the selected metrics. """ - errors = Imputation.evaluate_params(ground_truth, contamination, params, algorithm) + errors = Imputation.evaluate_params(input_data, incomp_data, params, algorithm) - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] - return np.mean([errors[metric] for metric in selected_metrics]) + return np.mean([errors[metric] for metric in metrics]) - def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=250): + def optimize(self, input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=250): """ Perform greedy optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -202,7 +202,7 @@ def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algor params_dict = {name: value for name, value in zip(param_names, params)} # Calculate the score for the current set of parameters - score = self._objective(ground_truth, contamination, algorithm, selected_metrics, params_dict) + score = self._objective(input_data, incomp_data, algorithm, metrics, params_dict) # Update the best parameters if the current score is better if score < best_score: @@ -222,19 +222,19 @@ class Bayesian(BaseOptimizer): Bayesian optimization strategy for hyperparameters. """ - def _objective(self, ground_truth, contamination, algorithm, selected_metrics, params): + def _objective(self, input_data, incomp_data, algorithm, metrics, params): """ Objective function for Bayesian optimization. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. algorithm : str The imputation algorithm name. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. params : dict Parameter values for the optimization. @@ -250,24 +250,24 @@ def _objective(self, ground_truth, contamination, algorithm, selected_metrics, p else: param_values = tuple(params) - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] - errors = Imputation.evaluate_params(ground_truth, contamination, param_values, algorithm) - return np.mean([errors[metric] for metric in selected_metrics]) + errors = Imputation.evaluate_params(input_data, incomp_data, param_values, algorithm) + return np.mean([errors[metric] for metric in metrics]) - def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algorithm="cdrec", n_calls=100, + def optimize(self, input_data, incomp_data, metrics=["RMSE"], algorithm="cdrec", n_calls=100, n_random_starts=50, acq_func='gp_hedge'): """ Perform Bayesian optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -287,9 +287,9 @@ def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algor search_spaces = SEARCH_SPACES - # Adjust the search space for 'cdrec' based on obfuscated_matrix + # Adjust the search space for 'cdrec' based on incomp_data if algorithm == 'cdrec': - max_rank = contamination.shape[1] - 1 + max_rank = incomp_data.shape[1] - 1 SEARCH_SPACES['cdrec'][0] = Integer(0, min(9, max_rank), name='rank') # Update the rank range # Define the search space @@ -299,7 +299,7 @@ def optimize(self, ground_truth, contamination, selected_metrics=["RMSE"], algor optimizer = skopt.Optimizer(dimensions=space, n_initial_points=n_random_starts, acq_func=acq_func) for i in range(n_calls): suggested_params = optimizer.ask() - score = self._objective(ground_truth, contamination, algorithm, selected_metrics, suggested_params) + score = self._objective(input_data, incomp_data, algorithm, metrics, suggested_params) optimizer.tell(suggested_params, score) # Optimal parameters @@ -343,19 +343,19 @@ def _format_params(self, particle_params, algorithm): return particle_params - def _objective(self, ground_truth, contamination, algorithm, selected_metrics, params): + def _objective(self, input_data, incomp_data, algorithm, metrics, params): """ Objective function for Particle Swarm Optimization. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. algorithm : str The imputation algorithm name. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. params : numpy.ndarray Parameter values for the optimization. @@ -373,22 +373,22 @@ def _objective(self, ground_truth, contamination, algorithm, selected_metrics, p for i in range(n_particles): # Iterate over each particle particle_params = self._format_params(params[i], algorithm) # Get the parameters for this particle - errors = Imputation.evaluate_params(ground_truth, contamination, tuple(particle_params), algorithm) - errors_for_all_particles[i] = np.mean([errors[metric] for metric in selected_metrics]) + errors = Imputation.evaluate_params(input_data, incomp_data, tuple(particle_params), algorithm) + errors_for_all_particles[i] = np.mean([errors[metric] for metric in metrics]) return errors_for_all_particles - def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_particles, c1, c2, w, iterations, + def optimize(self, input_data, incomp_data, metrics, algorithm, n_particles, c1, c2, w, iterations, n_processes): """ Perform Particle Swarm Optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -412,14 +412,14 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_p """ start_time = time.time() # Record start time - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] # Define the search space search_space = SEARCH_SPACES_PSO if algorithm == 'cdrec': - max_rank = contamination.shape[1] - 1 + max_rank = incomp_data.shape[1] - 1 search_space['cdrec'][0] = (search_space['cdrec'][0][0], min(search_space['cdrec'][0][1], max_rank)) # Select the correct search space based on the algorithm @@ -434,7 +434,7 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_p options={'c1': c1, 'c2': c2, 'w': w}, bounds=bounds) # Perform optimization - objective_with_args = partial(self._objective, ground_truth, contamination, algorithm, selected_metrics) + objective_with_args = partial(self._objective, input_data, incomp_data, algorithm, metrics) cost, pos = optimizer.optimize(objective_with_args, iters=iterations, n_processes=n_processes) param_names = PARAM_NAMES @@ -450,7 +450,7 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, n_p class SuccessiveHalving(BaseOptimizer): - def _objective(self, errors_dict, selected_metrics): + def _objective(self, errors_dict, metrics): """ Objective function for Successive Halving optimization. @@ -458,7 +458,7 @@ def _objective(self, errors_dict, selected_metrics): ---------- errors_dict : dict Dictionary containing error metrics. - selected_metrics : list of str + metrics : list of str List of selected metrics for optimization. Returns @@ -466,21 +466,21 @@ def _objective(self, errors_dict, selected_metrics): float Mean error for the selected metrics. """ - selected_errors = [errors_dict[metric] for metric in selected_metrics] + selected_errors = [errors_dict[metric] for metric in metrics] return np.mean(selected_errors) - def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num_configs, num_iterations, + def optimize(self, input_data, incomp_data, metrics, algorithm, num_configs, num_iterations, reduction_factor): """ Perform Successive Halving optimization for hyperparameters. Parameters ---------- - ground_truth : numpy.ndarray + input_data : numpy.ndarray The ground truth time series dataset. - contamination : numpy.ndarray + incomp_data : numpy.ndarray The contaminated time series dataset to impute. - selected_metrics : list of str, optional + metrics : list of str, optional List of selected metrics for optimization (default is ["RMSE"]). algorithm : str, optional The imputation algorithm to optimize (default is 'cdrec'). @@ -498,18 +498,18 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num """ start_time = time.time() # Record start time - if not isinstance(selected_metrics, list): - selected_metrics = [selected_metrics] + if not isinstance(metrics, list): + metrics = [metrics] # Define the parameter names for each algorithm param_names = PARAM_NAMES - data_length = len(ground_truth) + data_length = len(input_data) chunk_size = data_length // num_iterations # prepare configurations for each algorithm separately if algorithm == 'cdrec': - max_rank = contamination.shape[1] - 1 + max_rank = incomp_data.shape[1] - 1 temp_rank_range = [i for i in sh_params.CDREC_RANK_RANGE if i < max_rank] if not temp_rank_range: @@ -535,12 +535,12 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num for i in range(num_iterations): # Calculate how much data to use in this iteration end_idx = (i + 1) * chunk_size - partial_ground_truth = ground_truth[:end_idx] - partial_obfuscated = contamination[:end_idx] + partial_input_data = input_data[:end_idx] + partial_obfuscated = incomp_data[:end_idx] scores = [self._objective( - Imputation.evaluate_params(partial_ground_truth, partial_obfuscated, config, algorithm), - selected_metrics) for config in configs] + Imputation.evaluate_params(partial_input_data, partial_obfuscated, config, algorithm), + metrics) for config in configs] top_configs_idx = np.argsort(scores)[:max(1, len(configs) // reduction_factor)] configs = [configs[i] for i in top_configs_idx] @@ -552,14 +552,14 @@ def optimize(self, ground_truth, contamination, selected_metrics, algorithm, num if algorithm == 'iim': best_config = min(configs, key=lambda single_config: self._objective( - Imputation.evaluate_params(ground_truth, contamination, [single_config], algorithm), - selected_metrics)) + Imputation.evaluate_params(input_data, incomp_data, [single_config], algorithm), + metrics)) else: best_config = min(configs, key=lambda config: self._objective( - Imputation.evaluate_params(ground_truth, contamination, config, algorithm), selected_metrics)) + Imputation.evaluate_params(input_data, incomp_data, config, algorithm), metrics)) best_score = self._objective( - Imputation.evaluate_params(ground_truth, contamination, best_config, algorithm), selected_metrics) + Imputation.evaluate_params(input_data, incomp_data, best_config, algorithm), metrics) # Check the size of param_names[algorithm] if len(param_names[algorithm]) == 1: diff --git a/imputegap/report.log b/imputegap/report.log index c845a08..142fb64 100644 --- a/imputegap/report.log +++ b/imputegap/report.log @@ -1,2 +1,14 @@ 2024-11-05 13:35:23,270 - pyswarms.single.global_best - INFO - Optimize for 2 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9} 2024-11-05 13:35:25,844 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.30811485946437683, best pos: [8.02117616e+00 4.16577767e-02 4.57882822e+02] +2025-01-06 15:35:47,715 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:35:48,117 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:35:48,272 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:35:48,417 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:29,357 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:30,118 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:30,554 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:36:31,014 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:01,844 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:02,387 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:02,642 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. +2025-01-06 15:38:02,826 - matplotlib.legend - WARNING - No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. diff --git a/imputegap/reports/benchmarking_rmse.jpg b/imputegap/reports/benchmarking_rmse.jpg index 0a14d94..75b3ed7 100644 Binary files a/imputegap/reports/benchmarking_rmse.jpg and b/imputegap/reports/benchmarking_rmse.jpg differ diff --git a/imputegap/runner_benchmark.py b/imputegap/runner_benchmark.py new file mode 100644 index 0000000..3ac76aa --- /dev/null +++ b/imputegap/runner_benchmark.py @@ -0,0 +1,73 @@ +from imputegap.recovery.benchmark import Benchmark + +reconstruction = False +matrix = False + +datasets_full = ["eeg-alcohol", "eeg-reading", "fmri-objectviewing", "fmri-stoptask", "chlorine", "drift"] +dataset_test = ["eeg-alcohol"] + +opti_bayesian = {"optimizer": "bayesian", "options": {"n_calls": 15, "n_random_starts": 50, "acq_func": "gp_hedge", "metrics": "RMSE"}} +opti_greedy = {"optimizer": "greedy", "options": {"n_calls": 250, "metrics": "RMSE"}} +opti_pso = {"optimizer": "pso", "options": {"n_particles": 50, "iterations": 10, "metrics": "RMSE"}} +opti_sh = {"optimizer": "sh", "options": {"num_configs": 10, "num_iterations": 5, "metrics": "RMSE"}} +optimizers = [opti_bayesian] + +algorithms_full = ["mean", "cdrec", "stmvl", "iim", "mrnn"] +algorithms_test = ["mean", "cdrec", "stmvl"] + +patterns_small = ["mcar"] +patterns_full = ["mcar", "missing_percentage"] + +x_axis = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8] + +if not reconstruction: + runs_results, avg_scores_list = Benchmark().eval(algorithms=algorithms_test, datasets=dataset_test, patterns=patterns_small, x_axis=x_axis, optimizers=optimizers, save_dir="test_naterq", runs=3) + print("\n\n\nresults:", runs_results) + +elif reconstruction and not matrix: + test_plots = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001201629638671875, 'optimization': 0, 'imputation': 0.0005724430084228516}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001814126968383789, 'optimization': 0, 'imputation': 0.0004563331604003906}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.005623817443847656, 'optimization': 0, 'imputation': 0.0004363059997558594}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03413963317871094, 'optimization': 0, 'imputation': 0.0005552768707275391}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13074183464050293, 'optimization': 0, 'imputation': 0.0005936622619628906}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.4494190216064453, 'optimization': 0, 'imputation': 0.0005834102630615234}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19555801767314038, 'MAE': 0.14379634965165344, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770377315860114}, 'times': {'contamination': 0.0010943412780761719, 'optimization': 1.6249148845672607, 'imputation': 0.09233546257019043}}, '0.1': {'scores': {'RMSE': 0.22211329096601584, 'MAE': 0.13682609208383795, 'MI': 1.225240202380491, 'CORRELATION': 0.9627754587047338}, 'times': {'contamination': 0.005602359771728516, 'optimization': 1.6249148845672607, 'imputation': 0.1246938705444336}}, '0.2': {'scores': {'RMSE': 0.26890140517000855, 'MAE': 0.16983555417798818, 'MI': 1.0650037012869458, 'CORRELATION': 0.945331872005451}, 'times': {'contamination': 0.017725229263305664, 'optimization': 1.6249148845672607, 'imputation': 0.1363234519958496}}, '0.4': {'scores': {'RMSE': 0.3143181342292365, 'MAE': 0.2041263696093189, 'MI': 0.9133456774887369, 'CORRELATION': 0.9309636417166443}, 'times': {'contamination': 0.1031486988067627, 'optimization': 1.6249148845672607, 'imputation': 0.2686195373535156}}, '0.6': {'scores': {'RMSE': 0.37514780116434926, 'MAE': 0.22156474038385332, 'MI': 0.7775541845220788, 'CORRELATION': 0.9078517283026865}, 'times': {'contamination': 0.20231366157531738, 'optimization': 1.6249148845672607, 'imputation': 0.8690693378448486}}, '0.8': {'scores': {'RMSE': 0.9117409046445515, 'MAE': 0.4801132374733116, 'MI': 0.2576488533530952, 'CORRELATION': 0.6589813814462316}, 'times': {'contamination': 0.5354366302490234, 'optimization': 1.6249148845672607, 'imputation': 2.865450143814087}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.3033328648259709, 'MAE': 0.2644983508914945, 'MI': 1.2263963519649825, 'CORRELATION': 0.9611641055318173}, 'times': {'contamination': 0.0029397010803222656, 'optimization': 500.0222601890564, 'imputation': 23.88236165046692}}, '0.1': {'scores': {'RMSE': 0.27434099749552526, 'MAE': 0.22744969879475732, 'MI': 1.0873378350271077, 'CORRELATION': 0.9481608575454046}, 'times': {'contamination': 0.001943349838256836, 'optimization': 500.0222601890564, 'imputation': 24.082878351211548}}, '0.2': {'scores': {'RMSE': 0.3354154243946063, 'MAE': 0.2667902544729111, 'MI': 0.9040935528948765, 'CORRELATION': 0.9224394175345223}, 'times': {'contamination': 0.007236480712890625, 'optimization': 500.0222601890564, 'imputation': 27.05676031112671}}, '0.4': {'scores': {'RMSE': 0.3663147584695216, 'MAE': 0.2683992893683706, 'MI': 0.7945562213511235, 'CORRELATION': 0.9086873163095024}, 'times': {'contamination': 0.03319692611694336, 'optimization': 500.0222601890564, 'imputation': 24.969536066055298}}, '0.6': {'scores': {'RMSE': 0.49178356901493514, 'MAE': 0.3590429489696727, 'MI': 0.568068131156551, 'CORRELATION': 0.8240735290572155}, 'times': {'contamination': 0.13401484489440918, 'optimization': 500.0222601890564, 'imputation': 17.722254991531372}}, '0.8': {'scores': {'RMSE': 5.286373452119497, 'MAE': 3.0120315981628085, 'MI': 0.0877803352414065, 'CORRELATION': 0.4417418016734377}, 'times': {'contamination': 0.46097803115844727, 'optimization': 500.0222601890564, 'imputation': 17.994383335113525}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.2246776140243064, 'MAE': 0.16265112492381306, 'MI': 1.0875116207955637, 'CORRELATION': 0.9694504836799154}, 'times': {'contamination': 0.0009558200836181641, 'optimization': 4871.80725812912, 'imputation': 1.680412769317627}}, '0.1': {'scores': {'RMSE': 0.3034580006710775, 'MAE': 0.20388299260278156, 'MI': 1.0526306210784155, 'CORRELATION': 0.9337303655141744}, 'times': {'contamination': 0.0018503665924072266, 'optimization': 4871.80725812912, 'imputation': 10.345388412475586}}, '0.2': {'scores': {'RMSE': 0.4104578379330223, 'MAE': 0.2785159738696005, 'MI': 0.7986686024303655, 'CORRELATION': 0.8658822456465257}, 'times': {'contamination': 0.0055084228515625, 'optimization': 4871.80725812912, 'imputation': 65.17643117904663}}, '0.4': {'scores': {'RMSE': 0.4911437971846393, 'MAE': 0.32455728476996504, 'MI': 0.6429014104572732, 'CORRELATION': 0.8180219110130202}, 'times': {'contamination': 0.032411813735961914, 'optimization': 4871.80725812912, 'imputation': 474.7696805000305}}, '0.6': {'scores': {'RMSE': 0.579715388344659, 'MAE': 0.4144431747763777, 'MI': 0.45413696197432313, 'CORRELATION': 0.7431519134806602}, 'times': {'contamination': 0.1278684139251709, 'optimization': 4871.80725812912, 'imputation': 1531.380850315094}}, '0.8': {'scores': {'RMSE': 0.8100585330320411, 'MAE': 0.6124983237048439, 'MI': 0.1600984202902365, 'CORRELATION': 0.48808679305097513}, 'times': {'contamination': 0.4592604637145996, 'optimization': 4871.80725812912, 'imputation': 3588.4590351581573}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0889986961845628, 'MAE': 0.8825193440526788, 'MI': 0.569311657025473, 'CORRELATION': 0.006110871130276294}, 'times': {'contamination': 0.0009238719940185547, 'optimization': 474.33066391944885, 'imputation': 37.89777088165283}}, '0.1': {'scores': {'RMSE': 0.8750845974360951, 'MAE': 0.7897191908914645, 'MI': 0.36542131337202255, 'CORRELATION': 0.1776164808833599}, 'times': {'contamination': 0.0020151138305664062, 'optimization': 474.33066391944885, 'imputation': 36.68788194656372}}, '0.2': {'scores': {'RMSE': 1.3935692458593014, 'MAE': 1.1278169009994172, 'MI': 0.23278876704617288, 'CORRELATION': -0.0043224216288866475}, 'times': {'contamination': 0.006083011627197266, 'optimization': 474.33066391944885, 'imputation': 34.238656997680664}}, '0.4': {'scores': {'RMSE': 1.2198343626008104, 'MAE': 1.004323747843723, 'MI': 0.11694146418635429, 'CORRELATION': -2.8855554502904036e-05}, 'times': {'contamination': 0.03404045104980469, 'optimization': 474.33066391944885, 'imputation': 37.132654428482056}}, '0.6': {'scores': {'RMSE': 1.1924360263528335, 'MAE': 0.9838535398356899, 'MI': 0.0794767096848362, 'CORRELATION': -0.06570944989748748}, 'times': {'contamination': 0.1405935287475586, 'optimization': 474.33066391944885, 'imputation': 37.741902351379395}}, '0.8': {'scores': {'RMSE': 1.3728850685938416, 'MAE': 1.1227443270722774, 'MI': 0.08611037233596197, 'CORRELATION': -0.012424819834313067}, 'times': {'contamination': 0.47881627082824707, 'optimization': 474.33066391944885, 'imputation': 37.675835847854614}}}}}}} + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") + Benchmark().generate_reports_txt(runs_plots_scores=test_plots, save_dir="./test_naterq", dataset="chlorine", run=0) + Benchmark().generate_reports_excel(runs_plots_scores=test_plots, save_dir="./test_naterq", dataset="chlorine", run=0) + + """ + test_plots = {'eeg_reading': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6937095315379215, 'MAE': 0.5871322524124026, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013728141784667969, 'optimization': 0, 'imputation': 0.0005629062652587891}}, '0.1': {'scores': {'RMSE': 0.8825047928812179, 'MAE': 0.7058469910884912, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0016565322875976562, 'optimization': 0, 'imputation': 0.00047278404235839844}}, '0.2': {'scores': {'RMSE': 1.0076040625030085, 'MAE': 0.8133998806656898, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00404810905456543, 'optimization': 0, 'imputation': 0.00045371055603027344}}, '0.4': {'scores': {'RMSE': 1.014301846668858, 'MAE': 0.8219008090987252, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03703451156616211, 'optimization': 0, 'imputation': 0.0006351470947265625}}, '0.6': {'scores': {'RMSE': 1.0158383459630567, 'MAE': 0.8210620770500036, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.11827206611633301, 'optimization': 0, 'imputation': 0.000629425048828125}}, '0.8': {'scores': {'RMSE': 1.01877327240803, 'MAE': 0.8157442592731639, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.39914441108703613, 'optimization': 0, 'imputation': 0.0005762577056884766}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6092168096877171, 'MAE': 0.43725473329243575, 'MI': 0.8113862111415893, 'CORRELATION': 0.6669628813645995}, 'times': {'contamination': 0.0009872913360595703, 'optimization': -0.284501314163208, 'imputation': 0.19295310974121094}}, '0.1': {'scores': {'RMSE': 0.7694804794390454, 'MAE': 0.46934521855095135, 'MI': 0.6303931303314629, 'CORRELATION': 0.6338704662419556}, 'times': {'contamination': 0.004288911819458008, 'optimization': -0.284501314163208, 'imputation': 0.23847365379333496}}, '0.2': {'scores': {'RMSE': 0.54163559631001, 'MAE': 0.3838909357504076, 'MI': 0.6804417798137956, 'CORRELATION': 0.8550799708158655}, 'times': {'contamination': 0.01486515998840332, 'optimization': -0.284501314163208, 'imputation': 0.4856741428375244}}, '0.4': {'scores': {'RMSE': 0.6150678993354384, 'MAE': 0.3994113839683473, 'MI': 0.5964930437182837, 'CORRELATION': 0.8282842809048951}, 'times': {'contamination': 0.10318613052368164, 'optimization': -0.284501314163208, 'imputation': 0.5878500938415527}}, '0.6': {'scores': {'RMSE': 0.8559878849846194, 'MAE': 0.4875679606049892, 'MI': 0.4352238530939769, 'CORRELATION': 0.7114520144242487}, 'times': {'contamination': 0.1801285743713379, 'optimization': -0.284501314163208, 'imputation': 2.260394811630249}}, '0.8': {'scores': {'RMSE': 1.0028418021086185, 'MAE': 0.6478458585388304, 'MI': 0.26800404550676565, 'CORRELATION': 0.6191696179492259}, 'times': {'contamination': 0.45122456550598145, 'optimization': -0.284501314163208, 'imputation': 2.1127378940582275}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.38913260498789515, 'MAE': 0.28887981808629887, 'MI': 0.9003693740232723, 'CORRELATION': 0.8305251080007574}, 'times': {'contamination': 0.004096508026123047, 'optimization': 474.1963918209076, 'imputation': 28.061330318450928}}, '0.1': {'scores': {'RMSE': 0.42262809349896036, 'MAE': 0.31228067649415225, 'MI': 0.8330304029808546, 'CORRELATION': 0.8802327685495391}, 'times': {'contamination': 0.0014801025390625, 'optimization': 474.1963918209076, 'imputation': 28.245431184768677}}, '0.2': {'scores': {'RMSE': 0.4299898931605415, 'MAE': 0.2914674774962624, 'MI': 0.8303895697315763, 'CORRELATION': 0.9049819009058613}, 'times': {'contamination': 0.0040132999420166016, 'optimization': 474.1963918209076, 'imputation': 30.743361473083496}}, '0.4': {'scores': {'RMSE': 0.4658583297277367, 'MAE': 0.32456738916683475, 'MI': 0.7105191885562022, 'CORRELATION': 0.8900070341144635}, 'times': {'contamination': 0.02961254119873047, 'optimization': 474.1963918209076, 'imputation': 29.556389808654785}}, '0.6': {'scores': {'RMSE': 0.5970596677005412, 'MAE': 0.40317626348969443, 'MI': 0.5057637077329502, 'CORRELATION': 0.8092444114848254}, 'times': {'contamination': 0.10307097434997559, 'optimization': 474.1963918209076, 'imputation': 20.913992404937744}}, '0.8': {'scores': {'RMSE': 4.099584545523784, 'MAE': 1.4360755142687804, 'MI': 0.03924813725195477, 'CORRELATION': 0.21658071586750138}, 'times': {'contamination': 0.38839101791381836, 'optimization': 474.1963918209076, 'imputation': 18.921329736709595}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.738070963229811, 'MAE': 0.5586987523761138, 'MI': 0.9549505679325584, 'CORRELATION': 0.4719896373208298}, 'times': {'contamination': 0.00078582763671875, 'optimization': 3513.147577047348, 'imputation': 1.0417201519012451}}, '0.1': {'scores': {'RMSE': 0.6776044115374216, 'MAE': 0.4883939650690726, 'MI': 0.6051652352756725, 'CORRELATION': 0.7008457537827716}, 'times': {'contamination': 0.001641988754272461, 'optimization': 3513.147577047348, 'imputation': 6.6430745124816895}}, '0.2': {'scores': {'RMSE': 0.642538776211307, 'MAE': 0.45068800736093795, 'MI': 0.5847752699836343, 'CORRELATION': 0.7870826275047371}, 'times': {'contamination': 0.003993034362792969, 'optimization': 3513.147577047348, 'imputation': 43.09042835235596}}, '0.4': {'scores': {'RMSE': 0.595402838774376, 'MAE': 0.4200046319465559, 'MI': 0.5780737300771779, 'CORRELATION': 0.8157719741321808}, 'times': {'contamination': 0.037882328033447266, 'optimization': 3513.147577047348, 'imputation': 325.3523244857788}}, '0.6': {'scores': {'RMSE': 0.6457758226280373, 'MAE': 0.465851861042097, 'MI': 0.4940897071221384, 'CORRELATION': 0.7797841684978442}, 'times': {'contamination': 0.1108400821685791, 'optimization': 3513.147577047348, 'imputation': 1001.1619775295258}}, '0.8': {'scores': {'RMSE': 0.7031022809975706, 'MAE': 0.5292159877681492, 'MI': 0.3802525627714059, 'CORRELATION': 0.7224487387493247}, 'times': {'contamination': 0.3698101043701172, 'optimization': 3513.147577047348, 'imputation': 2408.869615316391}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.347580032956689, 'MAE': 1.1601095334550815, 'MI': 0.1586603634624117, 'CORRELATION': -0.18399931939875533}, 'times': {'contamination': 0.0010280609130859375, 'optimization': 294.0501501560211, 'imputation': 24.799844980239868}}, '0.1': {'scores': {'RMSE': 1.4429520609208166, 'MAE': 1.1748671084301718, 'MI': 0.21482702111483185, 'CORRELATION': -0.3608701962716392}, 'times': {'contamination': 0.0016400814056396484, 'optimization': 294.0501501560211, 'imputation': 27.284573793411255}}, '0.2': {'scores': {'RMSE': 1.2876145426625936, 'MAE': 1.0269096650749077, 'MI': 0.07484247431469719, 'CORRELATION': -0.007533643863897717}, 'times': {'contamination': 0.004055500030517578, 'optimization': 294.0501501560211, 'imputation': 26.624001264572144}}, '0.4': {'scores': {'RMSE': 1.4608458720939472, 'MAE': 1.1594757078481346, 'MI': 0.03787981276790102, 'CORRELATION': 0.011173417358467087}, 'times': {'contamination': 0.03002309799194336, 'optimization': 294.0501501560211, 'imputation': 27.17277193069458}}, '0.6': {'scores': {'RMSE': 1.634424595829425, 'MAE': 1.3356121929070988, 'MI': 0.02225643037919471, 'CORRELATION': -0.02299352560191792}, 'times': {'contamination': 0.10050559043884277, 'optimization': 294.0501501560211, 'imputation': 27.999096632003784}}, '0.8': {'scores': {'RMSE': 1.3047427885621508, 'MAE': 1.05104242568594, 'MI': 0.01007462604941533, 'CORRELATION': -0.004969975534923902}, 'times': {'contamination': 0.3871951103210449, 'optimization': 294.0501501560211, 'imputation': 27.538389205932617}}}}}}} + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") + + test_plots = {'eeg_alcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.008088350296020508, 'optimization': 0, 'imputation': 0.0003597736358642578}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013017654418945312, 'optimization': 0, 'imputation': 0.00038313865661621094}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017611980438232422, 'optimization': 0, 'imputation': 0.00021719932556152344}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.012012958526611328, 'optimization': 0, 'imputation': 0.0003046989440917969}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04199981689453125, 'optimization': 0, 'imputation': 0.000354766845703125}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.11134958267211914, 'optimization': 0, 'imputation': 0.00029206275939941406}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.0006604194641113281, 'optimization': 1.5429341793060303, 'imputation': 0.054087162017822266}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.0025806427001953125, 'optimization': 1.5429341793060303, 'imputation': 0.07224416732788086}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.0056035518646240234, 'optimization': 1.5429341793060303, 'imputation': 0.039177656173706055}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.03652334213256836, 'optimization': 1.5429341793060303, 'imputation': 0.0999898910522461}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.10041642189025879, 'optimization': 1.5429341793060303, 'imputation': 0.1369919776916504}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.17512726783752441, 'optimization': 1.5429341793060303, 'imputation': 0.38109540939331055}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7434750032306926, 'MAE': 0.5711687107703531, 'MI': 1.0614546580642759, 'CORRELATION': 0.7570103181096193}, 'times': {'contamination': 0.001224517822265625, 'optimization': 25.973577737808228, 'imputation': 1.827949047088623}}, '0.1': {'scores': {'RMSE': 0.6079049353979786, 'MAE': 0.4565071330548986, 'MI': 0.5897845472515851, 'CORRELATION': 0.7033347467102922}, 'times': {'contamination': 0.0011165142059326172, 'optimization': 25.973577737808228, 'imputation': 1.8397388458251953}}, '0.2': {'scores': {'RMSE': 0.5938200686690087, 'MAE': 0.4583475323523134, 'MI': 0.5238356117195857, 'CORRELATION': 0.789556744168648}, 'times': {'contamination': 0.0017132759094238281, 'optimization': 25.973577737808228, 'imputation': 1.8568992614746094}}, '0.4': {'scores': {'RMSE': 0.6922622994445695, 'MAE': 0.5327565871766037, 'MI': 0.3842117779328253, 'CORRELATION': 0.738304743934084}, 'times': {'contamination': 0.009068012237548828, 'optimization': 25.973577737808228, 'imputation': 2.0719306468963623}}, '0.6': {'scores': {'RMSE': 0.7719376402414535, 'MAE': 0.5756544384278333, 'MI': 0.268745121385816, 'CORRELATION': 0.6398387148302656}, 'times': {'contamination': 0.02822709083557129, 'optimization': 25.973577737808228, 'imputation': 1.5673530101776123}}, '0.8': {'scores': {'RMSE': 1.0218833589128922, 'MAE': 0.8012134667654269, 'MI': 0.0051679642909252645, 'CORRELATION': 0.06083718960882358}, 'times': {'contamination': 0.09586524963378906, 'optimization': 25.973577737808228, 'imputation': 1.7056498527526855}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.26665906759668434, 'MAE': 0.21589657916392105, 'MI': 1.4930024107375521, 'CORRELATION': 0.9704001503125854}, 'times': {'contamination': 0.0005829334259033203, 'optimization': 563.972785949707, 'imputation': 0.05102872848510742}}, '0.1': {'scores': {'RMSE': 0.28425094570125403, 'MAE': 0.22787684897303442, 'MI': 1.0594854362146846, 'CORRELATION': 0.9444192673990515}, 'times': {'contamination': 0.0008502006530761719, 'optimization': 563.972785949707, 'imputation': 0.2425684928894043}}, '0.2': {'scores': {'RMSE': 0.334887339804727, 'MAE': 0.25851830743811066, 'MI': 0.9711245925356778, 'CORRELATION': 0.9390073163681255}, 'times': {'contamination': 0.001627206802368164, 'optimization': 563.972785949707, 'imputation': 1.4222276210784912}}, '0.4': {'scores': {'RMSE': 0.4719169787140248, 'MAE': 0.35026878431372477, 'MI': 0.7196112128770917, 'CORRELATION': 0.8858920655062363}, 'times': {'contamination': 0.008496761322021484, 'optimization': 563.972785949707, 'imputation': 12.960479974746704}}, '0.6': {'scores': {'RMSE': 0.47736733503847095, 'MAE': 0.35628454418236766, 'MI': 0.6157654491357567, 'CORRELATION': 0.8790867703136753}, 'times': {'contamination': 0.026967287063598633, 'optimization': 563.972785949707, 'imputation': 35.622944831848145}}, '0.8': {'scores': {'RMSE': 0.5747595088880484, 'MAE': 0.4242587159311907, 'MI': 0.4843046739917606, 'CORRELATION': 0.8188927905931169}, 'times': {'contamination': 0.08214735984802246, 'optimization': 563.972785949707, 'imputation': 87.41280603408813}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 2.5423293855369917, 'MAE': 2.2141103663578803, 'MI': 0.6072901854577394, 'CORRELATION': -0.6360397852133122}, 'times': {'contamination': 0.0005042552947998047, 'optimization': 59585.917899131775, 'imputation': 33.87153220176697}}, '0.1': {'scores': {'RMSE': 1.6013078224502717, 'MAE': 1.2201563721098412, 'MI': 0.2626533928770599, 'CORRELATION': -0.17448314526813025}, 'times': {'contamination': 0.0009407997131347656, 'optimization': 59585.917899131775, 'imputation': 33.780593156814575}}, '0.2': {'scores': {'RMSE': 1.1980243320030763, 'MAE': 0.9205437185390488, 'MI': 0.12523228756285484, 'CORRELATION': 0.053025850949979476}, 'times': {'contamination': 0.0020706653594970703, 'optimization': 59585.917899131775, 'imputation': 33.35025191307068}}, '0.4': {'scores': {'RMSE': 1.3154357320206076, 'MAE': 1.0563143800308983, 'MI': 0.04771994618237419, 'CORRELATION': 0.04644513674411651}, 'times': {'contamination': 0.009225606918334961, 'optimization': 59585.917899131775, 'imputation': 32.76318073272705}}, '0.6': {'scores': {'RMSE': 1.409745596231954, 'MAE': 1.1320098137715748, 'MI': 0.013200537946505414, 'CORRELATION': -0.051391074582830536}, 'times': {'contamination': 0.028786659240722656, 'optimization': 59585.917899131775, 'imputation': 32.95982527732849}}, '0.8': {'scores': {'RMSE': 1.3165198817323216, 'MAE': 1.0603105471734755, 'MI': 0.006831327215000855, 'CORRELATION': -0.010253125321586447}, 'times': {'contamination': 0.11717653274536133, 'optimization': 59585.917899131775, 'imputation': 31.418609857559204}}}}}}} + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") + + test_plots = {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.008000850677490234, 'optimization': 0, 'imputation': 0.0005795955657958984}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0019245147705078125, 'optimization': 0, 'imputation': 0.0005664825439453125}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009830474853515625, 'optimization': 0, 'imputation': 0.0005776882171630859}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04627418518066406, 'optimization': 0, 'imputation': 0.0005333423614501953}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.16058969497680664, 'optimization': 0, 'imputation': 0.0005693435668945312}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.49263739585876465, 'optimization': 0, 'imputation': 0.0005679130554199219}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873972, 'MAE': 0.1361979723519773, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0011115074157714844, 'optimization': 2.84889817237854, 'imputation': 0.1434638500213623}}, '0.1': {'scores': {'RMSE': 0.18152059329152107, 'MAE': 0.09925566629402763, 'MI': 1.1516089897042538, 'CORRELATION': 0.982939835222072}, 'times': {'contamination': 0.004365444183349609, 'optimization': 2.84889817237854, 'imputation': 0.14118409156799316}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.0845903269210229, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01644587516784668, 'optimization': 2.84889817237854, 'imputation': 0.16940855979919434}}, '0.4': {'scores': {'RMSE': 0.7544523683503815, 'MAE': 0.1121804997359425, 'MI': 0.021165172206064526, 'CORRELATION': 0.8141205075707254}, 'times': {'contamination': 0.10604023933410645, 'optimization': 2.84889817237854, 'imputation': 2.0186331272125244}}, '0.6': {'scores': {'RMSE': 0.4355197572001314, 'MAE': 0.13808466247330484, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122918}, 'times': {'contamination': 0.2030637264251709, 'optimization': 2.84889817237854, 'imputation': 2.0608761310577393}}, '0.8': {'scores': {'RMSE': 0.7672558930795491, 'MAE': 0.3298896842843935, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323682}, 'times': {'contamination': 0.5499897003173828, 'optimization': 2.84889817237854, 'imputation': -0.47277092933654785}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0021185874938964844, 'optimization': 514.5863847732544, 'imputation': 34.6202232837677}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.0018591880798339844, 'optimization': 514.5863847732544, 'imputation': 35.190133810043335}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.005822181701660156, 'optimization': 514.5863847732544, 'imputation': 35.46649789810181}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.03864097595214844, 'optimization': 514.5863847732544, 'imputation': 34.30042386054993}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.14191699028015137, 'optimization': 514.5863847732544, 'imputation': 29.5026593208313}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.46815061569213867, 'optimization': 514.5863847732544, 'imputation': 22.864952564239502}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0008444786071777344, 'optimization': 5050.300735235214, 'imputation': 0.6499700546264648}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019328594207763672, 'optimization': 5050.300735235214, 'imputation': 4.424615383148193}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.005669116973876953, 'optimization': 5050.300735235214, 'imputation': 28.64192819595337}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.03133583068847656, 'optimization': 5050.300735235214, 'imputation': 215.96445870399475}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.1293776035308838, 'optimization': 5050.300735235214, 'imputation': 711.7917039394379}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.45540356636047363, 'optimization': 5050.300735235214, 'imputation': 1666.3830137252808}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9458168886934889, 'MAE': 0.7087024488997395, 'MI': 0.11924522547609226, 'CORRELATION': -0.04225238590482719}, 'times': {'contamination': 0.0010085105895996094, 'optimization': 478.6599726676941, 'imputation': 41.931705474853516}}, '0.1': {'scores': {'RMSE': 1.012708832814332, 'MAE': 0.7612398956786116, 'MI': 0.125135259797581, 'CORRELATION': -0.037524204443007164}, 'times': {'contamination': 0.0019328594207763672, 'optimization': 478.6599726676941, 'imputation': 37.289856910705566}}, '0.2': {'scores': {'RMSE': 1.0293662762879399, 'MAE': 0.79543999581101, 'MI': 0.10908095436833125, 'CORRELATION': -0.03892162998680425}, 'times': {'contamination': 0.005481719970703125, 'optimization': 478.6599726676941, 'imputation': 39.732287645339966}}, '0.4': {'scores': {'RMSE': 1.08276653737942, 'MAE': 0.7324224949731254, 'MI': 0.008689250019683584, 'CORRELATION': -0.020719639766949276}, 'times': {'contamination': 0.032985687255859375, 'optimization': 478.6599726676941, 'imputation': 40.06472086906433}}, '0.6': {'scores': {'RMSE': 1.0436806660629465, 'MAE': 0.7612577768282424, 'MI': 0.011650658060022669, 'CORRELATION': -0.0069952780339244845}, 'times': {'contamination': 0.13504815101623535, 'optimization': 478.6599726676941, 'imputation': 41.86172533035278}}, '0.8': {'scores': {'RMSE': 1.0386764847922278, 'MAE': 0.7580243538074385, 'MI': 0.0035404637707733143, 'CORRELATION': -0.0010165957084160128}, 'times': {'contamination': 0.4962472915649414, 'optimization': 478.6599726676941, 'imputation': 44.58724093437195}}}}}}} + Benchmark().generate_plots(runs_plots_scores=test_plots, ticks=x_axis, subplot=True, save_dir="./test_naterq") + """ +if matrix : + run_1_chlorine = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0009789466857910156, 'optimization': 0, 'imputation': 0.000560760498046875}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.002305746078491211, 'optimization': 0, 'imputation': 0.0004634857177734375}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007703065872192383, 'optimization': 0, 'imputation': 0.0004649162292480469}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.047789812088012695, 'optimization': 0, 'imputation': 0.0005023479461669922}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.19488763809204102, 'optimization': 0, 'imputation': 0.0005488395690917969}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6890411376953125, 'optimization': 0, 'imputation': 0.0005776882171630859}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19554703625817557, 'MAE': 0.1437913973228053, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770406565915004}, 'times': {'contamination': 0.0009171962738037109, 'optimization': 0, 'imputation': 0.05464982986450195}}, '0.1': {'scores': {'RMSE': 0.22212985201492597, 'MAE': 0.1368378161074427, 'MI': 1.225240202380491, 'CORRELATION': 0.9627706895400587}, 'times': {'contamination': 0.004944562911987305, 'optimization': 0, 'imputation': 0.070037841796875}}, '0.2': {'scores': {'RMSE': 0.268910630576598, 'MAE': 0.16983805083071585, 'MI': 1.0636573662919013, 'CORRELATION': 0.9453283753208437}, 'times': {'contamination': 0.01749396324157715, 'optimization': 0, 'imputation': 0.07790756225585938}}, '0.4': {'scores': {'RMSE': 0.31430310541683426, 'MAE': 0.2041005558473225, 'MI': 0.9124259582934485, 'CORRELATION': 0.9309696942537548}, 'times': {'contamination': 0.11426258087158203, 'optimization': 0, 'imputation': 0.1478443145751953}}, '0.6': {'scores': {'RMSE': 0.3737964229023613, 'MAE': 0.22131322530176772, 'MI': 0.7775995167572279, 'CORRELATION': 0.9083977308218121}, 'times': {'contamination': 0.2614400386810303, 'optimization': 0, 'imputation': 0.4230384826660156}}, '0.8': {'scores': {'RMSE': 0.9290440261799385, 'MAE': 0.4933255678502781, 'MI': 0.2021428083194056, 'CORRELATION': 0.6461059842947307}, 'times': {'contamination': 0.7493531703948975, 'optimization': 0, 'imputation': 4.412551164627075}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.16435641817881824, 'MAE': 0.13990340223545955, 'MI': 1.3785977665357232, 'CORRELATION': 0.9868224741901116}, 'times': {'contamination': 0.0036211013793945312, 'optimization': 0, 'imputation': 39.150184869766235}}, '0.1': {'scores': {'RMSE': 0.2228247553722344, 'MAE': 0.16815959364081734, 'MI': 1.2340069760129087, 'CORRELATION': 0.9623151173186535}, 'times': {'contamination': 0.002553224563598633, 'optimization': 0, 'imputation': 39.25465536117554}}, '0.2': {'scores': {'RMSE': 0.27923604567760596, 'MAE': 0.19211165697030474, 'MI': 1.0043820035861775, 'CORRELATION': 0.9430094313080399}, 'times': {'contamination': 0.008016109466552734, 'optimization': 0, 'imputation': 39.86703276634216}}, '0.4': {'scores': {'RMSE': 0.3255775619246775, 'MAE': 0.2194073917812186, 'MI': 0.8847163339667148, 'CORRELATION': 0.9259001258177321}, 'times': {'contamination': 0.04792189598083496, 'optimization': 0, 'imputation': 41.36716914176941}}, '0.6': {'scores': {'RMSE': 0.44447910257331374, 'MAE': 0.30600741310945195, 'MI': 0.6723738452451481, 'CORRELATION': 0.857466472714002}, 'times': {'contamination': 0.19208693504333496, 'optimization': 0, 'imputation': 30.92500948905945}}, '0.8': {'scores': {'RMSE': 2.9806206255800913, 'MAE': 1.530963982498524, 'MI': 0.05121884841141813, 'CORRELATION': 0.2903624430928721}, 'times': {'contamination': 0.6799006462097168, 'optimization': 0, 'imputation': 28.389225006103516}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.1560886685592231, 'MAE': 0.10320394166419149, 'MI': 1.2780123906233032, 'CORRELATION': 0.9851724611327715}, 'times': {'contamination': 0.0010797977447509766, 'optimization': 0, 'imputation': 0.8159213066101074}}, '0.1': {'scores': {'RMSE': 0.3006324748663841, 'MAE': 0.17773178955210425, 'MI': 1.2119149147233643, 'CORRELATION': 0.9321993026569703}, 'times': {'contamination': 0.0021529197692871094, 'optimization': 0, 'imputation': 5.404278039932251}}, '0.2': {'scores': {'RMSE': 0.30708253455892426, 'MAE': 0.18786443029344255, 'MI': 1.0350247745925767, 'CORRELATION': 0.9270935540980816}, 'times': {'contamination': 0.007862567901611328, 'optimization': 0, 'imputation': 39.23897194862366}}, '0.4': {'scores': {'RMSE': 0.36627844349732885, 'MAE': 0.23513471435395922, 'MI': 0.8536501396545491, 'CORRELATION': 0.9028949327632931}, 'times': {'contamination': 0.04749464988708496, 'optimization': 0, 'imputation': 291.0960524082184}}, '0.6': {'scores': {'RMSE': 0.44187263450733627, 'MAE': 0.3005295255111392, 'MI': 0.7070128664004881, 'CORRELATION': 0.8600506431175654}, 'times': {'contamination': 0.19056296348571777, 'optimization': 0, 'imputation': 961.3684046268463}}, '0.8': {'scores': {'RMSE': 0.6162987723847368, 'MAE': 0.4408568111584791, 'MI': 0.38562262881823584, 'CORRELATION': 0.7078269987710476}, 'times': {'contamination': 0.6741812229156494, 'optimization': 0, 'imputation': 2265.02947473526}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.2157597331971723, 'MAE': 1.0542417765804475, 'MI': 0.569311657025473, 'CORRELATION': -0.41037521809198385}, 'times': {'contamination': 0.0011243820190429688, 'optimization': 0, 'imputation': 18.271201610565186}}, '0.1': {'scores': {'RMSE': 1.1799455746309517, 'MAE': 1.0537900828112892, 'MI': 0.3698854611544671, 'CORRELATION': -0.30580392001607287}, 'times': {'contamination': 0.0025169849395751953, 'optimization': 0, 'imputation': 18.178789377212524}}, '0.2': {'scores': {'RMSE': 1.341883829249102, 'MAE': 1.1116623537227253, 'MI': 0.22703785144726024, 'CORRELATION': -0.13139818884461385}, 'times': {'contamination': 0.008020877838134766, 'optimization': 0, 'imputation': 18.227224111557007}}, '0.4': {'scores': {'RMSE': 1.4574773306729822, 'MAE': 1.221059892905018, 'MI': 0.1526121106442972, 'CORRELATION': -0.06171770589679702}, 'times': {'contamination': 0.04882335662841797, 'optimization': 0, 'imputation': 18.527106523513794}}, '0.6': {'scores': {'RMSE': 1.4501476980845394, 'MAE': 1.1589217747122664, 'MI': 0.08174182790842249, 'CORRELATION': -0.028201438478978574}, 'times': {'contamination': 0.19412755966186523, 'optimization': 0, 'imputation': 19.096518754959106}}, '0.8': {'scores': {'RMSE': 1.204799199247893, 'MAE': 1.002446633752256, 'MI': 0.08875526330977121, 'CORRELATION': -0.02097728376019728}, 'times': {'contamination': 0.6939215660095215, 'optimization': 0, 'imputation': 19.685445308685303}}}}}}} + run_2_chlorine = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001043081283569336, 'optimization': 0, 'imputation': 0.0008816719055175781}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.002270221710205078, 'optimization': 0, 'imputation': 0.00047469139099121094}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007225513458251953, 'optimization': 0, 'imputation': 0.0004715919494628906}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04382967948913574, 'optimization': 0, 'imputation': 0.0005059242248535156}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.17531085014343262, 'optimization': 0, 'imputation': 0.0005536079406738281}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6192398071289062, 'optimization': 0, 'imputation': 0.0005943775177001953}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19554703625817557, 'MAE': 0.1437913973228053, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770406565915004}, 'times': {'contamination': 0.0009462833404541016, 'optimization': 0, 'imputation': 0.060041189193725586}}, '0.1': {'scores': {'RMSE': 0.22212985201492597, 'MAE': 0.1368378161074427, 'MI': 1.225240202380491, 'CORRELATION': 0.9627706895400587}, 'times': {'contamination': 0.004572868347167969, 'optimization': 0, 'imputation': 0.0699300765991211}}, '0.2': {'scores': {'RMSE': 0.268910630576598, 'MAE': 0.16983805083071585, 'MI': 1.0636573662919013, 'CORRELATION': 0.9453283753208437}, 'times': {'contamination': 0.016742944717407227, 'optimization': 0, 'imputation': 0.07609176635742188}}, '0.4': {'scores': {'RMSE': 0.31430310541683426, 'MAE': 0.2041005558473225, 'MI': 0.9124259582934485, 'CORRELATION': 0.9309696942537548}, 'times': {'contamination': 0.10860323905944824, 'optimization': 0, 'imputation': 0.15946102142333984}}, '0.6': {'scores': {'RMSE': 0.3737964229023613, 'MAE': 0.22131322530176772, 'MI': 0.7775995167572279, 'CORRELATION': 0.9083977308218121}, 'times': {'contamination': 0.2411816120147705, 'optimization': 0, 'imputation': 0.43070363998413086}}, '0.8': {'scores': {'RMSE': 0.9290440261799385, 'MAE': 0.4933255678502781, 'MI': 0.2021428083194056, 'CORRELATION': 0.6461059842947307}, 'times': {'contamination': 0.6789627075195312, 'optimization': 0, 'imputation': 4.46994161605835}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.16435641817881824, 'MAE': 0.13990340223545955, 'MI': 1.3785977665357232, 'CORRELATION': 0.9868224741901116}, 'times': {'contamination': 0.002485513687133789, 'optimization': 0, 'imputation': 39.508928298950195}}, '0.1': {'scores': {'RMSE': 0.2228247553722344, 'MAE': 0.16815959364081734, 'MI': 1.2340069760129087, 'CORRELATION': 0.9623151173186535}, 'times': {'contamination': 0.0023517608642578125, 'optimization': 0, 'imputation': 39.52970552444458}}, '0.2': {'scores': {'RMSE': 0.27923604567760596, 'MAE': 0.19211165697030474, 'MI': 1.0043820035861775, 'CORRELATION': 0.9430094313080399}, 'times': {'contamination': 0.007275581359863281, 'optimization': 0, 'imputation': 39.95721387863159}}, '0.4': {'scores': {'RMSE': 0.3255775619246775, 'MAE': 0.2194073917812186, 'MI': 0.8847163339667148, 'CORRELATION': 0.9259001258177321}, 'times': {'contamination': 0.042914390563964844, 'optimization': 0, 'imputation': 41.303142786026}}, '0.6': {'scores': {'RMSE': 0.44447910257331374, 'MAE': 0.30600741310945195, 'MI': 0.6723738452451481, 'CORRELATION': 0.857466472714002}, 'times': {'contamination': 0.17032194137573242, 'optimization': 0, 'imputation': 30.968651294708252}}, '0.8': {'scores': {'RMSE': 2.9806206255800913, 'MAE': 1.530963982498524, 'MI': 0.05121884841141813, 'CORRELATION': 0.2903624430928721}, 'times': {'contamination': 0.6045393943786621, 'optimization': 0, 'imputation': 28.36435556411743}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.1560886685592231, 'MAE': 0.10320394166419149, 'MI': 1.2780123906233032, 'CORRELATION': 0.9851724611327715}, 'times': {'contamination': 0.000980377197265625, 'optimization': 0, 'imputation': 0.7417066097259521}}, '0.1': {'scores': {'RMSE': 0.3006324748663841, 'MAE': 0.17773178955210425, 'MI': 1.2119149147233643, 'CORRELATION': 0.9321993026569703}, 'times': {'contamination': 0.0019462108612060547, 'optimization': 0, 'imputation': 4.773505687713623}}, '0.2': {'scores': {'RMSE': 0.30708253455892426, 'MAE': 0.18786443029344255, 'MI': 1.0350247745925767, 'CORRELATION': 0.9270935540980816}, 'times': {'contamination': 0.008093833923339844, 'optimization': 0, 'imputation': 34.58026099205017}}, '0.4': {'scores': {'RMSE': 0.36627844349732885, 'MAE': 0.23513471435395922, 'MI': 0.8536501396545491, 'CORRELATION': 0.9028949327632931}, 'times': {'contamination': 0.04369974136352539, 'optimization': 0, 'imputation': 253.98769640922546}}, '0.6': {'scores': {'RMSE': 0.44187263450733627, 'MAE': 0.3005295255111392, 'MI': 0.7070128664004881, 'CORRELATION': 0.8600506431175654}, 'times': {'contamination': 0.16975879669189453, 'optimization': 0, 'imputation': 835.3046026229858}}, '0.8': {'scores': {'RMSE': 0.6162987723847368, 'MAE': 0.4408568111584791, 'MI': 0.38562262881823584, 'CORRELATION': 0.7078269987710476}, 'times': {'contamination': 0.5958583354949951, 'optimization': 0, 'imputation': 1967.7639136314392}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0136434251178998, 'MAE': 0.8848324237744947, 'MI': 0.569311657025473, 'CORRELATION': 0.29914348963401916}, 'times': {'contamination': 0.0010654926300048828, 'optimization': 0, 'imputation': 18.329164743423462}}, '0.1': {'scores': {'RMSE': 1.2969727084789213, 'MAE': 1.096550700485976, 'MI': 0.4844113002067355, 'CORRELATION': -0.14524582877234712}, 'times': {'contamination': 0.0023641586303710938, 'optimization': 0, 'imputation': 18.21089506149292}}, '0.2': {'scores': {'RMSE': 1.0905397356299984, 'MAE': 0.8836097265712998, 'MI': 0.173773514607323, 'CORRELATION': -0.11890703333812934}, 'times': {'contamination': 0.007399082183837891, 'optimization': 0, 'imputation': 18.337430715560913}}, '0.4': {'scores': {'RMSE': 1.4069154761905174, 'MAE': 1.1643367090708647, 'MI': 0.09571825537518668, 'CORRELATION': -0.022364037624607463}, 'times': {'contamination': 0.043769121170043945, 'optimization': 0, 'imputation': 18.840161323547363}}, '0.6': {'scores': {'RMSE': 1.382829866742193, 'MAE': 1.1269958882289104, 'MI': 0.09215558384208698, 'CORRELATION': -0.032372544249182615}, 'times': {'contamination': 0.1728811264038086, 'optimization': 0, 'imputation': 19.076626300811768}}, '0.8': {'scores': {'RMSE': 1.5039750591991847, 'MAE': 1.2211771463532568, 'MI': 0.08522464337328965, 'CORRELATION': 0.002752327584939554}, 'times': {'contamination': 0.6081700325012207, 'optimization': 0, 'imputation': 19.578737258911133}}}}}}} + run_3_chlorine = {'chlorine': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9256738243031312, 'MAE': 0.8788758766429177, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001058340072631836, 'optimization': 0, 'imputation': 0.0009882450103759766}}, '0.1': {'scores': {'RMSE': 0.8239629739455251, 'MAE': 0.7297827051195541, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0022249221801757812, 'optimization': 0, 'imputation': 0.0004658699035644531}}, '0.2': {'scores': {'RMSE': 0.8317409760747367, 'MAE': 0.7138664942301458, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00716400146484375, 'optimization': 0, 'imputation': 0.0004811286926269531}}, '0.4': {'scores': {'RMSE': 0.866178542847881, 'MAE': 0.744937943856253, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04335308074951172, 'optimization': 0, 'imputation': 0.0005075931549072266}}, '0.6': {'scores': {'RMSE': 0.8906205973878023, 'MAE': 0.7677632103385671, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1734917163848877, 'optimization': 0, 'imputation': 0.0005586147308349609}}, '0.8': {'scores': {'RMSE': 0.9231926867636093, 'MAE': 0.7897697041316387, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6150054931640625, 'optimization': 0, 'imputation': 0.0005857944488525391}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.19554703625817557, 'MAE': 0.1437913973228053, 'MI': 1.3195962394272744, 'CORRELATION': 0.9770406565915004}, 'times': {'contamination': 0.0009434223175048828, 'optimization': 0, 'imputation': 0.05546450614929199}}, '0.1': {'scores': {'RMSE': 0.22212985201492597, 'MAE': 0.1368378161074427, 'MI': 1.225240202380491, 'CORRELATION': 0.9627706895400587}, 'times': {'contamination': 0.004517078399658203, 'optimization': 0, 'imputation': 0.06634163856506348}}, '0.2': {'scores': {'RMSE': 0.268910630576598, 'MAE': 0.16983805083071585, 'MI': 1.0636573662919013, 'CORRELATION': 0.9453283753208437}, 'times': {'contamination': 0.017187833786010742, 'optimization': 0, 'imputation': 0.07271552085876465}}, '0.4': {'scores': {'RMSE': 0.31430310541683426, 'MAE': 0.2041005558473225, 'MI': 0.9124259582934485, 'CORRELATION': 0.9309696942537548}, 'times': {'contamination': 0.10760045051574707, 'optimization': 0, 'imputation': 0.15883731842041016}}, '0.6': {'scores': {'RMSE': 0.3737964229023613, 'MAE': 0.22131322530176772, 'MI': 0.7775995167572279, 'CORRELATION': 0.9083977308218121}, 'times': {'contamination': 0.2422795295715332, 'optimization': 0, 'imputation': 0.42018914222717285}}, '0.8': {'scores': {'RMSE': 0.9290440261799385, 'MAE': 0.4933255678502781, 'MI': 0.2021428083194056, 'CORRELATION': 0.6461059842947307}, 'times': {'contamination': 0.680551290512085, 'optimization': 0, 'imputation': 4.376981019973755}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.16435641817881824, 'MAE': 0.13990340223545955, 'MI': 1.3785977665357232, 'CORRELATION': 0.9868224741901116}, 'times': {'contamination': 0.003632068634033203, 'optimization': 0, 'imputation': 39.26680397987366}}, '0.1': {'scores': {'RMSE': 0.2228247553722344, 'MAE': 0.16815959364081734, 'MI': 1.2340069760129087, 'CORRELATION': 0.9623151173186535}, 'times': {'contamination': 0.002412080764770508, 'optimization': 0, 'imputation': 39.86172819137573}}, '0.2': {'scores': {'RMSE': 0.27923604567760596, 'MAE': 0.19211165697030474, 'MI': 1.0043820035861775, 'CORRELATION': 0.9430094313080399}, 'times': {'contamination': 0.007197856903076172, 'optimization': 0, 'imputation': 40.38218545913696}}, '0.4': {'scores': {'RMSE': 0.3255775619246775, 'MAE': 0.2194073917812186, 'MI': 0.8847163339667148, 'CORRELATION': 0.9259001258177321}, 'times': {'contamination': 0.04293513298034668, 'optimization': 0, 'imputation': 41.78527879714966}}, '0.6': {'scores': {'RMSE': 0.44447910257331374, 'MAE': 0.30600741310945195, 'MI': 0.6723738452451481, 'CORRELATION': 0.857466472714002}, 'times': {'contamination': 0.17196941375732422, 'optimization': 0, 'imputation': 31.38751482963562}}, '0.8': {'scores': {'RMSE': 2.9806206255800913, 'MAE': 1.530963982498524, 'MI': 0.05121884841141813, 'CORRELATION': 0.2903624430928721}, 'times': {'contamination': 0.6091823577880859, 'optimization': 0, 'imputation': 28.6495304107666}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.1560886685592231, 'MAE': 0.10320394166419149, 'MI': 1.2780123906233032, 'CORRELATION': 0.9851724611327715}, 'times': {'contamination': 0.0010943412780761719, 'optimization': 0, 'imputation': 0.7426207065582275}}, '0.1': {'scores': {'RMSE': 0.3006324748663841, 'MAE': 0.17773178955210425, 'MI': 1.2119149147233643, 'CORRELATION': 0.9321993026569703}, 'times': {'contamination': 0.001959562301635742, 'optimization': 0, 'imputation': 4.814657926559448}}, '0.2': {'scores': {'RMSE': 0.30708253455892426, 'MAE': 0.18786443029344255, 'MI': 1.0350247745925767, 'CORRELATION': 0.9270935540980816}, 'times': {'contamination': 0.008258581161499023, 'optimization': 0, 'imputation': 34.62715792655945}}, '0.4': {'scores': {'RMSE': 0.36627844349732885, 'MAE': 0.23513471435395922, 'MI': 0.8536501396545491, 'CORRELATION': 0.9028949327632931}, 'times': {'contamination': 0.0439915657043457, 'optimization': 0, 'imputation': 255.53748202323914}}, '0.6': {'scores': {'RMSE': 0.44187263450733627, 'MAE': 0.3005295255111392, 'MI': 0.7070128664004881, 'CORRELATION': 0.8600506431175654}, 'times': {'contamination': 0.16890335083007812, 'optimization': 0, 'imputation': 840.3006525039673}}, '0.8': {'scores': {'RMSE': 0.6162987723847368, 'MAE': 0.4408568111584791, 'MI': 0.38562262881823584, 'CORRELATION': 0.7078269987710476}, 'times': {'contamination': 0.6028788089752197, 'optimization': 0, 'imputation': 1966.1153359413147}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.029108480614302, 'MAE': 0.849997592084472, 'MI': 0.569311657025473, 'CORRELATION': -0.34135585229137194}, 'times': {'contamination': 0.0010657310485839844, 'optimization': 0, 'imputation': 17.878644943237305}}, '0.1': {'scores': {'RMSE': 1.8645374616997576, 'MAE': 1.6163656821639383, 'MI': 0.4813306460709136, 'CORRELATION': -0.39944268926751514}, 'times': {'contamination': 0.0021545886993408203, 'optimization': 0, 'imputation': 18.227890968322754}}, '0.2': {'scores': {'RMSE': 1.1049704980896, 'MAE': 0.9144454227691684, 'MI': 0.17197985846530675, 'CORRELATION': -0.04348618452798679}, 'times': {'contamination': 0.0074846744537353516, 'optimization': 0, 'imputation': 18.08056640625}}, '0.4': {'scores': {'RMSE': 1.3718667824151887, 'MAE': 1.1222752970024972, 'MI': 0.07628371768993472, 'CORRELATION': 0.010136633181027283}, 'times': {'contamination': 0.04388308525085449, 'optimization': 0, 'imputation': 18.583510637283325}}, '0.6': {'scores': {'RMSE': 1.3178885712841644, 'MAE': 1.0812817111954678, 'MI': 0.0898108080896041, 'CORRELATION': -0.020124428247071557}, 'times': {'contamination': 0.17338800430297852, 'optimization': 0, 'imputation': 18.885586261749268}}, '0.8': {'scores': {'RMSE': 1.229438316008386, 'MAE': 0.9979298148457775, 'MI': 0.0590277390664926, 'CORRELATION': 0.011604314744011184}, 'times': {'contamination': 0.6120181083679199, 'optimization': 0, 'imputation': 19.552552461624146}}}}}}} + + run_1_drift = {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.000993967056274414, 'optimization': 0, 'imputation': 0.0006108283996582031}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.002144336700439453, 'optimization': 0, 'imputation': 0.00045013427734375}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007688283920288086, 'optimization': 0, 'imputation': 0.00047278404235839844}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.048220157623291016, 'optimization': 0, 'imputation': 0.0004978179931640625}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.19398927688598633, 'optimization': 0, 'imputation': 0.0005459785461425781}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6935787200927734, 'optimization': 0, 'imputation': 0.0005681514739990234}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873978, 'MAE': 0.13619797235197734, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0009219646453857422, 'optimization': 0, 'imputation': 0.0894620418548584}}, '0.1': {'scores': {'RMSE': 0.18152059329152104, 'MAE': 0.09925566629402761, 'MI': 1.1516089897042538, 'CORRELATION': 0.9829398352220718}, 'times': {'contamination': 0.005087137222290039, 'optimization': 0, 'imputation': 0.09586262702941895}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.08459032692102293, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01807999610900879, 'optimization': 0, 'imputation': 0.11399722099304199}}, '0.4': {'scores': {'RMSE': 0.7544523683503829, 'MAE': 0.11218049973594252, 'MI': 0.021165172206064526, 'CORRELATION': 0.814120507570725}, 'times': {'contamination': 0.11642265319824219, 'optimization': 0, 'imputation': 1.951967716217041}}, '0.6': {'scores': {'RMSE': 0.4355197572001326, 'MAE': 0.1380846624733049, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122915}, 'times': {'contamination': 0.261821985244751, 'optimization': 0, 'imputation': 1.9031248092651367}}, '0.8': {'scores': {'RMSE': 0.7672558930795506, 'MAE': 0.32988968428439397, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323675}, 'times': {'contamination': 0.7516331672668457, 'optimization': 0, 'imputation': 1.9795506000518799}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0034203529357910156, 'optimization': 0, 'imputation': 52.270103931427}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.002481222152709961, 'optimization': 0, 'imputation': 52.581149101257324}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.008099079132080078, 'optimization': 0, 'imputation': 52.98776412010193}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.048757076263427734, 'optimization': 0, 'imputation': 54.611621379852295}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.1966409683227539, 'optimization': 0, 'imputation': 40.79859209060669}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.6912062168121338, 'optimization': 0, 'imputation': 35.179917097091675}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0011439323425292969, 'optimization': 0, 'imputation': 0.8133630752563477}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0021233558654785156, 'optimization': 0, 'imputation': 5.425678014755249}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.0077056884765625, 'optimization': 0, 'imputation': 38.82799983024597}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.048249244689941406, 'optimization': 0, 'imputation': 292.08297657966614}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.1907951831817627, 'optimization': 0, 'imputation': 964.3528797626495}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.6756997108459473, 'optimization': 0, 'imputation': 2268.7152428627014}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9446007612538053, 'MAE': 0.7071227725048369, 'MI': 0.11924522547609226, 'CORRELATION': -0.01944229566442761}, 'times': {'contamination': 0.0011224746704101562, 'optimization': 0, 'imputation': 49.38418102264404}}, '0.1': {'scores': {'RMSE': 1.018077695981055, 'MAE': 0.7618772118737108, 'MI': 0.12567590499764303, 'CORRELATION': -0.044145607442978164}, 'times': {'contamination': 0.0033884048461914062, 'optimization': 0, 'imputation': 49.42437410354614}}, '0.2': {'scores': {'RMSE': 1.0328353866969924, 'MAE': 0.7947328876841107, 'MI': 0.10908095436833125, 'CORRELATION': -0.039066506384315955}, 'times': {'contamination': 0.009317874908447266, 'optimization': 0, 'imputation': 49.775628089904785}}, '0.4': {'scores': {'RMSE': 1.084645568714648, 'MAE': 0.7406884025277696, 'MI': 0.00864358706683903, 'CORRELATION': -0.01785022064101569}, 'times': {'contamination': 0.04832720756530762, 'optimization': 0, 'imputation': 50.35092234611511}}, '0.6': {'scores': {'RMSE': 1.0417931248055596, 'MAE': 0.7544412522908885, 'MI': 0.005311966575218846, 'CORRELATION': -0.007280975078268482}, 'times': {'contamination': 0.19607019424438477, 'optimization': 0, 'imputation': 50.96062636375427}}, '0.8': {'scores': {'RMSE': 1.037173921676863, 'MAE': 0.7567867082423407, 'MI': 0.0035129076969891296, 'CORRELATION': -0.001003745882306244}, 'times': {'contamination': 0.6899962425231934, 'optimization': 0, 'imputation': 51.77077889442444}}}}}}} + run_2_drift = {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010313987731933594, 'optimization': 0, 'imputation': 0.0005719661712646484}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0020673274993896484, 'optimization': 0, 'imputation': 0.0004525184631347656}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0069735050201416016, 'optimization': 0, 'imputation': 0.0004620552062988281}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04297828674316406, 'optimization': 0, 'imputation': 0.0005059242248535156}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1704411506652832, 'optimization': 0, 'imputation': 0.0005540847778320312}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6023464202880859, 'optimization': 0, 'imputation': 0.0005741119384765625}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873978, 'MAE': 0.13619797235197734, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0009391307830810547, 'optimization': 0, 'imputation': 0.09287238121032715}}, '0.1': {'scores': {'RMSE': 0.18152059329152104, 'MAE': 0.09925566629402761, 'MI': 1.1516089897042538, 'CORRELATION': 0.9829398352220718}, 'times': {'contamination': 0.004915475845336914, 'optimization': 0, 'imputation': 0.09826517105102539}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.08459032692102293, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.016765832901000977, 'optimization': 0, 'imputation': 0.11972737312316895}}, '0.4': {'scores': {'RMSE': 0.7544523683503829, 'MAE': 0.11218049973594252, 'MI': 0.021165172206064526, 'CORRELATION': 0.814120507570725}, 'times': {'contamination': 0.10848760604858398, 'optimization': 0, 'imputation': 1.9038522243499756}}, '0.6': {'scores': {'RMSE': 0.4355197572001326, 'MAE': 0.1380846624733049, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122915}, 'times': {'contamination': 0.24063801765441895, 'optimization': 0, 'imputation': 2.0024468898773193}}, '0.8': {'scores': {'RMSE': 0.7672558930795506, 'MAE': 0.32988968428439397, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323675}, 'times': {'contamination': 0.6742842197418213, 'optimization': 0, 'imputation': 2.014857530593872}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0018467903137207031, 'optimization': 0, 'imputation': 51.94705104827881}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.0023064613342285156, 'optimization': 0, 'imputation': 52.207499265670776}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.007190704345703125, 'optimization': 0, 'imputation': 52.6685905456543}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.043596744537353516, 'optimization': 0, 'imputation': 54.60442757606506}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.17136526107788086, 'optimization': 0, 'imputation': 40.812124252319336}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.6082520484924316, 'optimization': 0, 'imputation': 35.65152835845947}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0009832382202148438, 'optimization': 0, 'imputation': 0.736797571182251}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019483566284179688, 'optimization': 0, 'imputation': 4.7518310546875}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.008207082748413086, 'optimization': 0, 'imputation': 33.73034954071045}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.04391932487487793, 'optimization': 0, 'imputation': 253.04820013046265}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.1685347557067871, 'optimization': 0, 'imputation': 839.5933110713959}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.5977535247802734, 'optimization': 0, 'imputation': 1972.2190878391266}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9438252697840334, 'MAE': 0.7074066748495141, 'MI': 0.11924522547609226, 'CORRELATION': -0.021372042533312763}, 'times': {'contamination': 0.0010783672332763672, 'optimization': 0, 'imputation': 48.64605975151062}}, '0.1': {'scores': {'RMSE': 1.0158428722855914, 'MAE': 0.7616120997979859, 'MI': 0.12567590499764303, 'CORRELATION': -0.04445192812896842}, 'times': {'contamination': 0.0023419857025146484, 'optimization': 0, 'imputation': 49.69926905632019}}, '0.2': {'scores': {'RMSE': 1.0306600495361184, 'MAE': 0.7944217777859633, 'MI': 0.10908095436833125, 'CORRELATION': -0.038559181077793894}, 'times': {'contamination': 0.007108926773071289, 'optimization': 0, 'imputation': 49.629971504211426}}, '0.4': {'scores': {'RMSE': 1.084538358231139, 'MAE': 0.7413855539533567, 'MI': 0.00811911881953157, 'CORRELATION': -0.019716554461979955}, 'times': {'contamination': 0.04305720329284668, 'optimization': 0, 'imputation': 49.46027088165283}}, '0.6': {'scores': {'RMSE': 1.0438210426766994, 'MAE': 0.7593955061969807, 'MI': 0.011650658060022669, 'CORRELATION': -0.007809791713096682}, 'times': {'contamination': 0.1705613136291504, 'optimization': 0, 'imputation': 50.77842974662781}}, '0.8': {'scores': {'RMSE': 1.0386852722851065, 'MAE': 0.7587186222490558, 'MI': 0.0035129076969891296, 'CORRELATION': -0.0009994717223948948}, 'times': {'contamination': 0.6103465557098389, 'optimization': 0, 'imputation': 50.92327070236206}}}}}}} + run_3_drift = {'drift': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9234927128429051, 'MAE': 0.7219362152785619, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010309219360351562, 'optimization': 0, 'imputation': 0.0005755424499511719}}, '0.1': {'scores': {'RMSE': 0.9699990038879407, 'MAE': 0.7774057495176013, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0020699501037597656, 'optimization': 0, 'imputation': 0.00048422813415527344}}, '0.2': {'scores': {'RMSE': 0.9914069853975623, 'MAE': 0.8134840739732964, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007096290588378906, 'optimization': 0, 'imputation': 0.000461578369140625}}, '0.4': {'scores': {'RMSE': 1.0552448338389784, 'MAE': 0.7426695186604741, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.043192148208618164, 'optimization': 0, 'imputation': 0.0005095005035400391}}, '0.6': {'scores': {'RMSE': 1.0143105930114702, 'MAE': 0.7610548321723654, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.17184901237487793, 'optimization': 0, 'imputation': 0.0005536079406738281}}, '0.8': {'scores': {'RMSE': 1.010712060535523, 'MAE': 0.7641520748788702, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.6064670085906982, 'optimization': 0, 'imputation': 0.0005743503570556641}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.23303624184873978, 'MAE': 0.13619797235197734, 'MI': 1.2739817718416822, 'CORRELATION': 0.968435455112644}, 'times': {'contamination': 0.0009615421295166016, 'optimization': 0, 'imputation': 0.09218788146972656}}, '0.1': {'scores': {'RMSE': 0.18152059329152104, 'MAE': 0.09925566629402761, 'MI': 1.1516089897042538, 'CORRELATION': 0.9829398352220718}, 'times': {'contamination': 0.00482487678527832, 'optimization': 0, 'imputation': 0.09549617767333984}}, '0.2': {'scores': {'RMSE': 0.13894771223733138, 'MAE': 0.08459032692102293, 'MI': 1.186191167936035, 'CORRELATION': 0.9901338133811375}, 'times': {'contamination': 0.01713728904724121, 'optimization': 0, 'imputation': 0.1129295825958252}}, '0.4': {'scores': {'RMSE': 0.7544523683503829, 'MAE': 0.11218049973594252, 'MI': 0.021165172206064526, 'CORRELATION': 0.814120507570725}, 'times': {'contamination': 0.10881781578063965, 'optimization': 0, 'imputation': 1.9378046989440918}}, '0.6': {'scores': {'RMSE': 0.4355197572001326, 'MAE': 0.1380846624733049, 'MI': 0.10781252370591506, 'CORRELATION': 0.9166777087122915}, 'times': {'contamination': 0.2380077838897705, 'optimization': 0, 'imputation': 1.8785057067871094}}, '0.8': {'scores': {'RMSE': 0.7672558930795506, 'MAE': 0.32988968428439397, 'MI': 0.013509125598802707, 'CORRELATION': 0.7312998041323675}, 'times': {'contamination': 0.6805167198181152, 'optimization': 0, 'imputation': 1.9562773704528809}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5434405584289141, 'MAE': 0.346560495723809, 'MI': 0.7328867182584357, 'CORRELATION': 0.8519431955571422}, 'times': {'contamination': 0.0022056102752685547, 'optimization': 0, 'imputation': 52.07010293006897}}, '0.1': {'scores': {'RMSE': 0.39007056542870916, 'MAE': 0.2753022759369617, 'MI': 0.8280959876205578, 'CORRELATION': 0.9180937736429735}, 'times': {'contamination': 0.002231597900390625, 'optimization': 0, 'imputation': 52.543020248413086}}, '0.2': {'scores': {'RMSE': 0.37254427425455994, 'MAE': 0.2730547993858495, 'MI': 0.7425412593844177, 'CORRELATION': 0.9293322959355041}, 'times': {'contamination': 0.0072672367095947266, 'optimization': 0, 'imputation': 52.88247036933899}}, '0.4': {'scores': {'RMSE': 0.6027573766269363, 'MAE': 0.34494332493982044, 'MI': 0.11876685901414151, 'CORRELATION': 0.8390532279447225}, 'times': {'contamination': 0.04321551322937012, 'optimization': 0, 'imputation': 54.10793352127075}}, '0.6': {'scores': {'RMSE': 0.9004526656857551, 'MAE': 0.4924048353228427, 'MI': 0.011590260996247858, 'CORRELATION': 0.5650541301828254}, 'times': {'contamination': 0.1728806495666504, 'optimization': 0, 'imputation': 40.53373336791992}}, '0.8': {'scores': {'RMSE': 1.0112488396023014, 'MAE': 0.7646823531588104, 'MI': 0.00040669209664367576, 'CORRELATION': 0.0183962968474991}, 'times': {'contamination': 0.6077785491943359, 'optimization': 0, 'imputation': 35.151907444000244}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.4445625930776235, 'MAE': 0.2696133927362288, 'MI': 1.1167751522591498, 'CORRELATION': 0.8944975075266335}, 'times': {'contamination': 0.0010058879852294922, 'optimization': 0, 'imputation': 0.7380530834197998}}, '0.1': {'scores': {'RMSE': 0.2939506418814281, 'MAE': 0.16953644212278182, 'MI': 1.0160968166750064, 'CORRELATION': 0.9531900627237018}, 'times': {'contamination': 0.0019745826721191406, 'optimization': 0, 'imputation': 4.7826457023620605}}, '0.2': {'scores': {'RMSE': 0.2366529609250008, 'MAE': 0.14709529129218185, 'MI': 1.064299483512458, 'CORRELATION': 0.9711348247027318}, 'times': {'contamination': 0.00801849365234375, 'optimization': 0, 'imputation': 33.94813060760498}}, '0.4': {'scores': {'RMSE': 0.4155649406397416, 'MAE': 0.22056702659999994, 'MI': 0.06616526470761779, 'CORRELATION': 0.919934494058292}, 'times': {'contamination': 0.04391813278198242, 'optimization': 0, 'imputation': 255.31524085998535}}, '0.6': {'scores': {'RMSE': 0.38695094864012947, 'MAE': 0.24340565131372927, 'MI': 0.06361822797740405, 'CORRELATION': 0.9249744935121553}, 'times': {'contamination': 0.17044353485107422, 'optimization': 0, 'imputation': 840.7470128536224}}, '0.8': {'scores': {'RMSE': 0.5862696375344495, 'MAE': 0.3968159514130716, 'MI': 0.13422239939628303, 'CORRELATION': 0.8178796825899766}, 'times': {'contamination': 0.5999574661254883, 'optimization': 0, 'imputation': 1974.6101157665253}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9458508648057621, 'MAE': 0.7019459696903068, 'MI': 0.11924522547609226, 'CORRELATION': 0.02915935932568557}, 'times': {'contamination': 0.001056671142578125, 'optimization': 0, 'imputation': 49.42237901687622}}, '0.1': {'scores': {'RMSE': 1.0125309431502871, 'MAE': 0.761136543268339, 'MI': 0.12567590499764303, 'CORRELATION': -0.037161060882302754}, 'times': {'contamination': 0.003415822982788086, 'optimization': 0, 'imputation': 49.04829454421997}}, '0.2': {'scores': {'RMSE': 1.0317754516097355, 'MAE': 0.7952869439926, 'MI': 0.10908095436833125, 'CORRELATION': -0.04155403791391449}, 'times': {'contamination': 0.007429599761962891, 'optimization': 0, 'imputation': 49.42568325996399}}, '0.4': {'scores': {'RMSE': 1.0807965786089415, 'MAE': 0.7326965517264863, 'MI': 0.006171770470542263, 'CORRELATION': -0.020630168509677818}, 'times': {'contamination': 0.042899370193481445, 'optimization': 0, 'imputation': 49.479795694351196}}, '0.6': {'scores': {'RMSE': 1.0441472017887297, 'MAE': 0.7599852461729673, 'MI': 0.01121013333181846, 'CORRELATION': -0.007513931343350665}, 'times': {'contamination': 0.17329692840576172, 'optimization': 0, 'imputation': 50.439927101135254}}, '0.8': {'scores': {'RMSE': 1.0379347892718205, 'MAE': 0.757440007226372, 'MI': 0.0035880775657246428, 'CORRELATION': -0.0014975078469404196}, 'times': {'contamination': 0.6166613101959229, 'optimization': 0, 'imputation': 50.66455388069153}}}}}}} + + run_1_eeg_a = {'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0005795955657958984, 'optimization': 0, 'imputation': 0.0002789497375488281}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010943412780761719, 'optimization': 0, 'imputation': 0.00022482872009277344}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0021677017211914062, 'optimization': 0, 'imputation': 0.00021696090698242188}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.012993097305297852, 'optimization': 0, 'imputation': 0.00023245811462402344}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.04167890548706055, 'optimization': 0, 'imputation': 0.00025534629821777344}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.15235233306884766, 'optimization': 0, 'imputation': 0.0004570484161376953}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.0005352497100830078, 'optimization': 0, 'imputation': 0.02960658073425293}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.0018682479858398438, 'optimization': 0, 'imputation': 0.03319096565246582}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.004289150238037109, 'optimization': 0, 'imputation': 0.03893113136291504}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.028098106384277344, 'optimization': 0, 'imputation': 0.10860562324523926}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.13066554069519043, 'optimization': 0, 'imputation': 0.23463678359985352}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.20974469184875488, 'optimization': 0, 'imputation': 0.45677614212036133}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7434750032306926, 'MAE': 0.5711687107703531, 'MI': 1.0614546580642759, 'CORRELATION': 0.7570103181096193}, 'times': {'contamination': 0.0016989707946777344, 'optimization': 0, 'imputation': 2.45868182182312}}, '0.1': {'scores': {'RMSE': 0.6079049353979786, 'MAE': 0.4565071330548986, 'MI': 0.5897845472515851, 'CORRELATION': 0.7033347467102922}, 'times': {'contamination': 0.0010311603546142578, 'optimization': 0, 'imputation': 2.412322521209717}}, '0.2': {'scores': {'RMSE': 0.5938200686690087, 'MAE': 0.4583475323523134, 'MI': 0.5238356117195857, 'CORRELATION': 0.789556744168648}, 'times': {'contamination': 0.0022623538970947266, 'optimization': 0, 'imputation': 2.4960315227508545}}, '0.4': {'scores': {'RMSE': 0.6922622994445695, 'MAE': 0.5327565871766037, 'MI': 0.3842117779328253, 'CORRELATION': 0.738304743934084}, 'times': {'contamination': 0.01298069953918457, 'optimization': 0, 'imputation': 2.7305381298065186}}, '0.6': {'scores': {'RMSE': 0.7719376402414535, 'MAE': 0.5756544384278333, 'MI': 0.268745121385816, 'CORRELATION': 0.6398387148302656}, 'times': {'contamination': 0.04132866859436035, 'optimization': 0, 'imputation': 2.097337245941162}}, '0.8': {'scores': {'RMSE': 1.0218833589128922, 'MAE': 0.8012134667654269, 'MI': 0.0051679642909252645, 'CORRELATION': 0.06083718960882358}, 'times': {'contamination': 0.14826583862304688, 'optimization': 0, 'imputation': 2.4140400886535645}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.26665906759668434, 'MAE': 0.21589657916392105, 'MI': 1.4930024107375521, 'CORRELATION': 0.9704001503125854}, 'times': {'contamination': 0.0005478858947753906, 'optimization': 0, 'imputation': 0.11310672760009766}}, '0.1': {'scores': {'RMSE': 0.28425094570125403, 'MAE': 0.22787684897303442, 'MI': 1.0594854362146846, 'CORRELATION': 0.9444192673990515}, 'times': {'contamination': 0.0010786056518554688, 'optimization': 0, 'imputation': 0.3150827884674072}}, '0.2': {'scores': {'RMSE': 0.334887339804727, 'MAE': 0.25851830743811066, 'MI': 0.9711245925356778, 'CORRELATION': 0.9390073163681255}, 'times': {'contamination': 0.0022890567779541016, 'optimization': 0, 'imputation': 2.11177921295166}}, '0.4': {'scores': {'RMSE': 0.4719169787140248, 'MAE': 0.35026878431372477, 'MI': 0.7196112128770917, 'CORRELATION': 0.8858920655062363}, 'times': {'contamination': 0.013253211975097656, 'optimization': 0, 'imputation': 16.908517837524414}}, '0.6': {'scores': {'RMSE': 0.47736733503847095, 'MAE': 0.35628454418236766, 'MI': 0.6157654491357567, 'CORRELATION': 0.8790867703136753}, 'times': {'contamination': 0.041519880294799805, 'optimization': 0, 'imputation': 50.78557777404785}}, '0.8': {'scores': {'RMSE': 0.5747595088880484, 'MAE': 0.4242587159311907, 'MI': 0.4843046739917606, 'CORRELATION': 0.8188927905931169}, 'times': {'contamination': 0.14832043647766113, 'optimization': 0, 'imputation': 126.20078611373901}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.458357678276516, 'MAE': 1.1907412656856677, 'MI': 0.8219987547394441, 'CORRELATION': 0.32490952909349474}, 'times': {'contamination': 0.0005905628204345703, 'optimization': 0, 'imputation': 39.80203366279602}}, '0.1': {'scores': {'RMSE': 1.2850820431076562, 'MAE': 1.060164753499244, 'MI': 0.2778923026896115, 'CORRELATION': 0.20633535214093737}, 'times': {'contamination': 0.0011925697326660156, 'optimization': 0, 'imputation': 39.42339515686035}}, '0.2': {'scores': {'RMSE': 1.251878172036014, 'MAE': 0.99269752960842, 'MI': 0.13973052187935872, 'CORRELATION': 0.10410956282246875}, 'times': {'contamination': 0.002384662628173828, 'optimization': 0, 'imputation': 39.37762236595154}}, '0.4': {'scores': {'RMSE': 1.3824391140783348, 'MAE': 1.1213491541016083, 'MI': 0.041359464126164654, 'CORRELATION': 0.04142746993459159}, 'times': {'contamination': 0.01451253890991211, 'optimization': 0, 'imputation': 39.95635533332825}}, '0.6': {'scores': {'RMSE': 1.4105891423231767, 'MAE': 1.126363637928893, 'MI': 0.016249691557241253, 'CORRELATION': -0.06179933990411743}, 'times': {'contamination': 0.043706655502319336, 'optimization': 0, 'imputation': 40.308385610580444}}, '0.8': {'scores': {'RMSE': 1.2717422827656417, 'MAE': 1.018360187093311, 'MI': 0.006362338437000872, 'CORRELATION': -0.03134655880761642}, 'times': {'contamination': 0.15089750289916992, 'optimization': 0, 'imputation': 40.61707377433777}}}}}}} + run_2_eeg_a = {'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0005807876586914062, 'optimization': 0, 'imputation': 0.00028061866760253906}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010428428649902344, 'optimization': 0, 'imputation': 0.00022554397583007812}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0020012855529785156, 'optimization': 0, 'imputation': 0.00021696090698242188}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.011640310287475586, 'optimization': 0, 'imputation': 0.00023436546325683594}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0368037223815918, 'optimization': 0, 'imputation': 0.00026416778564453125}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13379240036010742, 'optimization': 0, 'imputation': 0.0002701282501220703}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.00048232078552246094, 'optimization': 0, 'imputation': 0.02683854103088379}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.0033652782440185547, 'optimization': 0, 'imputation': 0.018512725830078125}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.0062105655670166016, 'optimization': 0, 'imputation': 0.021807193756103516}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.029900074005126953, 'optimization': 0, 'imputation': 0.04688239097595215}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.09384703636169434, 'optimization': 0, 'imputation': 0.12701702117919922}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.20301151275634766, 'optimization': 0, 'imputation': 0.45037055015563965}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7434750032306926, 'MAE': 0.5711687107703531, 'MI': 1.0614546580642759, 'CORRELATION': 0.7570103181096193}, 'times': {'contamination': 0.002616405487060547, 'optimization': 0, 'imputation': 2.4667086601257324}}, '0.1': {'scores': {'RMSE': 0.6079049353979786, 'MAE': 0.4565071330548986, 'MI': 0.5897845472515851, 'CORRELATION': 0.7033347467102922}, 'times': {'contamination': 0.0010187625885009766, 'optimization': 0, 'imputation': 2.4532482624053955}}, '0.2': {'scores': {'RMSE': 0.5938200686690087, 'MAE': 0.4583475323523134, 'MI': 0.5238356117195857, 'CORRELATION': 0.789556744168648}, 'times': {'contamination': 0.002056121826171875, 'optimization': 0, 'imputation': 2.4876415729522705}}, '0.4': {'scores': {'RMSE': 0.6922622994445695, 'MAE': 0.5327565871766037, 'MI': 0.3842117779328253, 'CORRELATION': 0.738304743934084}, 'times': {'contamination': 0.011551380157470703, 'optimization': 0, 'imputation': 2.7408502101898193}}, '0.6': {'scores': {'RMSE': 0.7719376402414535, 'MAE': 0.5756544384278333, 'MI': 0.268745121385816, 'CORRELATION': 0.6398387148302656}, 'times': {'contamination': 0.03650641441345215, 'optimization': 0, 'imputation': 2.079911708831787}}, '0.8': {'scores': {'RMSE': 1.0218833589128922, 'MAE': 0.8012134667654269, 'MI': 0.0051679642909252645, 'CORRELATION': 0.06083718960882358}, 'times': {'contamination': 0.1308588981628418, 'optimization': 0, 'imputation': 2.242360830307007}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.26665906759668434, 'MAE': 0.21589657916392105, 'MI': 1.4930024107375521, 'CORRELATION': 0.9704001503125854}, 'times': {'contamination': 0.00048542022705078125, 'optimization': 0, 'imputation': 0.10239624977111816}}, '0.1': {'scores': {'RMSE': 0.28425094570125403, 'MAE': 0.22787684897303442, 'MI': 1.0594854362146846, 'CORRELATION': 0.9444192673990515}, 'times': {'contamination': 0.0009329319000244141, 'optimization': 0, 'imputation': 0.289858341217041}}, '0.2': {'scores': {'RMSE': 0.334887339804727, 'MAE': 0.25851830743811066, 'MI': 0.9711245925356778, 'CORRELATION': 0.9390073163681255}, 'times': {'contamination': 0.001989603042602539, 'optimization': 0, 'imputation': 1.9265098571777344}}, '0.4': {'scores': {'RMSE': 0.4719169787140248, 'MAE': 0.35026878431372477, 'MI': 0.7196112128770917, 'CORRELATION': 0.8858920655062363}, 'times': {'contamination': 0.011867761611938477, 'optimization': 0, 'imputation': 15.123495101928711}}, '0.6': {'scores': {'RMSE': 0.47736733503847095, 'MAE': 0.35628454418236766, 'MI': 0.6157654491357567, 'CORRELATION': 0.8790867703136753}, 'times': {'contamination': 0.03667879104614258, 'optimization': 0, 'imputation': 45.76521134376526}}, '0.8': {'scores': {'RMSE': 0.5747595088880484, 'MAE': 0.4242587159311907, 'MI': 0.4843046739917606, 'CORRELATION': 0.8188927905931169}, 'times': {'contamination': 0.1310558319091797, 'optimization': 0, 'imputation': 112.25521159172058}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 2.1730235998246674, 'MAE': 1.8953551708094873, 'MI': 0.6072901854577394, 'CORRELATION': -0.7841845292123013}, 'times': {'contamination': 0.0005171298980712891, 'optimization': 0, 'imputation': 39.46493577957153}}, '0.1': {'scores': {'RMSE': 1.2781883021632698, 'MAE': 1.0810892114204538, 'MI': 0.3833337309697582, 'CORRELATION': -0.0447827943706207}, 'times': {'contamination': 0.0010881423950195312, 'optimization': 0, 'imputation': 39.73745322227478}}, '0.2': {'scores': {'RMSE': 1.8180439658597276, 'MAE': 1.546079091559085, 'MI': 0.15454756214708848, 'CORRELATION': -0.08987518519265314}, 'times': {'contamination': 0.0021600723266601562, 'optimization': 0, 'imputation': 40.31570887565613}}, '0.4': {'scores': {'RMSE': 1.3179315405249528, 'MAE': 1.0613913921061846, 'MI': 0.03968232666893745, 'CORRELATION': -0.028655714356183734}, 'times': {'contamination': 0.011851787567138672, 'optimization': 0, 'imputation': 40.20182824134827}}, '0.6': {'scores': {'RMSE': 1.5741948536611974, 'MAE': 1.2941832818055137, 'MI': 0.019932950488884025, 'CORRELATION': -0.08962394375060187}, 'times': {'contamination': 0.03747963905334473, 'optimization': 0, 'imputation': 40.596800088882446}}, '0.8': {'scores': {'RMSE': 1.3051885951402709, 'MAE': 1.0437200819767127, 'MI': 0.008000857444708596, 'CORRELATION': -0.028450119401938145}, 'times': {'contamination': 0.13601994514465332, 'optimization': 0, 'imputation': 40.186676263809204}}}}}}} + run_3_eeg_a = {'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0005962848663330078, 'optimization': 0, 'imputation': 0.0002779960632324219}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010771751403808594, 'optimization': 0, 'imputation': 0.000225067138671875}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0020322799682617188, 'optimization': 0, 'imputation': 0.0002205371856689453}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.011725425720214844, 'optimization': 0, 'imputation': 0.0002334117889404297}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.037230491638183594, 'optimization': 0, 'imputation': 0.00025773048400878906}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1363978385925293, 'optimization': 0, 'imputation': 0.000461578369140625}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.0005204677581787109, 'optimization': 0, 'imputation': 0.027890920639038086}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.007706642150878906, 'optimization': 0, 'imputation': 0.02409815788269043}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.004461050033569336, 'optimization': 0, 'imputation': 0.01602649688720703}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.025922298431396484, 'optimization': 0, 'imputation': 0.03365063667297363}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.08993721008300781, 'optimization': 0, 'imputation': 0.22972464561462402}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.19976544380187988, 'optimization': 0, 'imputation': 0.5038683414459229}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7434750032306926, 'MAE': 0.5711687107703531, 'MI': 1.0614546580642759, 'CORRELATION': 0.7570103181096193}, 'times': {'contamination': 0.0024564266204833984, 'optimization': 0, 'imputation': 2.451982259750366}}, '0.1': {'scores': {'RMSE': 0.6079049353979786, 'MAE': 0.4565071330548986, 'MI': 0.5897845472515851, 'CORRELATION': 0.7033347467102922}, 'times': {'contamination': 0.0009958744049072266, 'optimization': 0, 'imputation': 2.4210727214813232}}, '0.2': {'scores': {'RMSE': 0.5938200686690087, 'MAE': 0.4583475323523134, 'MI': 0.5238356117195857, 'CORRELATION': 0.789556744168648}, 'times': {'contamination': 0.0020804405212402344, 'optimization': 0, 'imputation': 2.4876914024353027}}, '0.4': {'scores': {'RMSE': 0.6922622994445695, 'MAE': 0.5327565871766037, 'MI': 0.3842117779328253, 'CORRELATION': 0.738304743934084}, 'times': {'contamination': 0.011591196060180664, 'optimization': 0, 'imputation': 2.704968214035034}}, '0.6': {'scores': {'RMSE': 0.7719376402414535, 'MAE': 0.5756544384278333, 'MI': 0.268745121385816, 'CORRELATION': 0.6398387148302656}, 'times': {'contamination': 0.037017822265625, 'optimization': 0, 'imputation': 2.085197925567627}}, '0.8': {'scores': {'RMSE': 1.0218833589128922, 'MAE': 0.8012134667654269, 'MI': 0.0051679642909252645, 'CORRELATION': 0.06083718960882358}, 'times': {'contamination': 0.13096380233764648, 'optimization': 0, 'imputation': 2.230935573577881}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.26665906759668434, 'MAE': 0.21589657916392105, 'MI': 1.4930024107375521, 'CORRELATION': 0.9704001503125854}, 'times': {'contamination': 0.0005035400390625, 'optimization': 0, 'imputation': 0.10373473167419434}}, '0.1': {'scores': {'RMSE': 0.28425094570125403, 'MAE': 0.22787684897303442, 'MI': 1.0594854362146846, 'CORRELATION': 0.9444192673990515}, 'times': {'contamination': 0.0009739398956298828, 'optimization': 0, 'imputation': 0.29041051864624023}}, '0.2': {'scores': {'RMSE': 0.334887339804727, 'MAE': 0.25851830743811066, 'MI': 0.9711245925356778, 'CORRELATION': 0.9390073163681255}, 'times': {'contamination': 0.0020368099212646484, 'optimization': 0, 'imputation': 1.9077272415161133}}, '0.4': {'scores': {'RMSE': 0.4719169787140248, 'MAE': 0.35026878431372477, 'MI': 0.7196112128770917, 'CORRELATION': 0.8858920655062363}, 'times': {'contamination': 0.011847257614135742, 'optimization': 0, 'imputation': 15.165152549743652}}, '0.6': {'scores': {'RMSE': 0.47736733503847095, 'MAE': 0.35628454418236766, 'MI': 0.6157654491357567, 'CORRELATION': 0.8790867703136753}, 'times': {'contamination': 0.03836536407470703, 'optimization': 0, 'imputation': 45.737265825271606}}, '0.8': {'scores': {'RMSE': 0.5747595088880484, 'MAE': 0.4242587159311907, 'MI': 0.4843046739917606, 'CORRELATION': 0.8188927905931169}, 'times': {'contamination': 0.13164138793945312, 'optimization': 0, 'imputation': 112.77252864837646}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.9445218729390771, 'MAE': 0.7764946359758815, 'MI': 0.6072901854577393, 'CORRELATION': 0.4652123690672692}, 'times': {'contamination': 0.0005385875701904297, 'optimization': 0, 'imputation': 38.954432249069214}}, '0.1': {'scores': {'RMSE': 1.427133058650169, 'MAE': 1.1789942428859332, 'MI': 0.38333373096975815, 'CORRELATION': 0.08016892207576036}, 'times': {'contamination': 0.0010995864868164062, 'optimization': 0, 'imputation': 39.40138053894043}}, '0.2': {'scores': {'RMSE': 1.6247239269337725, 'MAE': 1.3524401299725073, 'MI': 0.14674431302845103, 'CORRELATION': -0.017095302513357058}, 'times': {'contamination': 0.0021851062774658203, 'optimization': 0, 'imputation': 39.57895803451538}}, '0.4': {'scores': {'RMSE': 1.3639633065606662, 'MAE': 1.104079996447652, 'MI': 0.03582578504493456, 'CORRELATION': -0.01363275389624549}, 'times': {'contamination': 0.01206207275390625, 'optimization': 0, 'imputation': 39.809614419937134}}, '0.6': {'scores': {'RMSE': 1.2890524803504195, 'MAE': 1.0350043378755456, 'MI': 0.015507809363244187, 'CORRELATION': 0.003350494577114745}, 'times': {'contamination': 0.0376124382019043, 'optimization': 0, 'imputation': 39.829596281051636}}, '0.8': {'scores': {'RMSE': 1.3190676928231357, 'MAE': 1.0701855884078935, 'MI': 0.009256360494384585, 'CORRELATION': -0.04252896499221319}, 'times': {'contamination': 0.13460206985473633, 'optimization': 0, 'imputation': 40.27882170677185}}}}}}} + + run_1_eeg_r ={'eegreading': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6937095315379215, 'MAE': 0.5871322524124026, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0008804798126220703, 'optimization': 0, 'imputation': 0.000461578369140625}}, '0.1': {'scores': {'RMSE': 0.8825047928812179, 'MAE': 0.7058469910884912, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001970052719116211, 'optimization': 0, 'imputation': 0.00037169456481933594}}, '0.2': {'scores': {'RMSE': 1.0076040625030085, 'MAE': 0.8133998806656898, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.005462169647216797, 'optimization': 0, 'imputation': 0.00038313865661621094}}, '0.4': {'scores': {'RMSE': 1.014301846668858, 'MAE': 0.8219008090987252, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.042899131774902344, 'optimization': 0, 'imputation': 0.0004208087921142578}}, '0.6': {'scores': {'RMSE': 1.0158383459630567, 'MAE': 0.8210620770500036, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.14824438095092773, 'optimization': 0, 'imputation': 0.00045800209045410156}}, '0.8': {'scores': {'RMSE': 1.01877327240803, 'MAE': 0.8157442592731639, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.5723309516906738, 'optimization': 0, 'imputation': 0.0004761219024658203}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6092168096877171, 'MAE': 0.43725473329243575, 'MI': 0.8113862111415893, 'CORRELATION': 0.6669628813645996}, 'times': {'contamination': 0.0007905960083007812, 'optimization': 0, 'imputation': 0.13503527641296387}}, '0.1': {'scores': {'RMSE': 0.7694804794390452, 'MAE': 0.4693452185509513, 'MI': 0.6303931303314629, 'CORRELATION': 0.6338704662419559}, 'times': {'contamination': 0.004297971725463867, 'optimization': 0, 'imputation': 0.1615924835205078}}, '0.2': {'scores': {'RMSE': 0.54163559631001, 'MAE': 0.3838909357504076, 'MI': 0.6804417798137956, 'CORRELATION': 0.8550799708158656}, 'times': {'contamination': 0.013224363327026367, 'optimization': 0, 'imputation': 0.26840829849243164}}, '0.4': {'scores': {'RMSE': 0.6150678993354385, 'MAE': 0.3994113839683473, 'MI': 0.5964930437182837, 'CORRELATION': 0.8282842809048951}, 'times': {'contamination': 0.10810661315917969, 'optimization': 0, 'imputation': 0.4349708557128906}}, '0.6': {'scores': {'RMSE': 0.8559878849846194, 'MAE': 0.48756796060498914, 'MI': 0.4352238530939769, 'CORRELATION': 0.7114520144242487}, 'times': {'contamination': 0.2178659439086914, 'optimization': 0, 'imputation': 1.5645251274108887}}, '0.8': {'scores': {'RMSE': 1.0028418021086187, 'MAE': 0.6478458585388305, 'MI': 0.26800404550676565, 'CORRELATION': 0.6191696179492256}, 'times': {'contamination': 0.6404330730438232, 'optimization': 0, 'imputation': 1.582526445388794}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.38913260498789515, 'MAE': 0.28887981808629887, 'MI': 0.9003693740232723, 'CORRELATION': 0.8305251080007574}, 'times': {'contamination': 0.001874685287475586, 'optimization': 0, 'imputation': 40.537591218948364}}, '0.1': {'scores': {'RMSE': 0.42262809349896036, 'MAE': 0.31228067649415225, 'MI': 0.8330304029808546, 'CORRELATION': 0.8802327685495391}, 'times': {'contamination': 0.0020978450775146484, 'optimization': 0, 'imputation': 40.59518313407898}}, '0.2': {'scores': {'RMSE': 0.4299898931605415, 'MAE': 0.2914674774962624, 'MI': 0.8303895697315763, 'CORRELATION': 0.9049819009058613}, 'times': {'contamination': 0.0057528018951416016, 'optimization': 0, 'imputation': 41.33087348937988}}, '0.4': {'scores': {'RMSE': 0.4658583297277367, 'MAE': 0.32456738916683475, 'MI': 0.7105191885562022, 'CORRELATION': 0.8900070341144635}, 'times': {'contamination': 0.04328608512878418, 'optimization': 0, 'imputation': 42.92091608047485}}, '0.6': {'scores': {'RMSE': 0.5970596677005412, 'MAE': 0.40317626348969443, 'MI': 0.5057637077329502, 'CORRELATION': 0.8092444114848254}, 'times': {'contamination': 0.14931845664978027, 'optimization': 0, 'imputation': 31.47854471206665}}, '0.8': {'scores': {'RMSE': 4.099584545523784, 'MAE': 1.4360755142687804, 'MI': 0.03924813725195477, 'CORRELATION': 0.21658071586750138}, 'times': {'contamination': 0.5668902397155762, 'optimization': 0, 'imputation': 29.39516806602478}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.738070963229811, 'MAE': 0.5586987523761138, 'MI': 0.9549505679325584, 'CORRELATION': 0.4719896373208298}, 'times': {'contamination': 0.0009539127349853516, 'optimization': 0, 'imputation': 1.3275701999664307}}, '0.1': {'scores': {'RMSE': 0.6776044115374216, 'MAE': 0.4883939650690726, 'MI': 0.6051652352756725, 'CORRELATION': 0.7008457537827716}, 'times': {'contamination': 0.0018296241760253906, 'optimization': 0, 'imputation': 8.570139646530151}}, '0.2': {'scores': {'RMSE': 0.642538776211307, 'MAE': 0.45068800736093795, 'MI': 0.5847752699836343, 'CORRELATION': 0.7870826275047371}, 'times': {'contamination': 0.0065457820892333984, 'optimization': 0, 'imputation': 58.84888243675232}}, '0.4': {'scores': {'RMSE': 0.595402838774376, 'MAE': 0.4200046319465559, 'MI': 0.5780737300771779, 'CORRELATION': 0.8157719741321808}, 'times': {'contamination': 0.04352903366088867, 'optimization': 0, 'imputation': 465.20550632476807}}, '0.6': {'scores': {'RMSE': 0.6457758226280373, 'MAE': 0.465851861042097, 'MI': 0.4940897071221384, 'CORRELATION': 0.7797841684978442}, 'times': {'contamination': 0.14708542823791504, 'optimization': 0, 'imputation': 1450.9498298168182}}, '0.8': {'scores': {'RMSE': 0.7031022809975706, 'MAE': 0.5292159877681492, 'MI': 0.3802525627714059, 'CORRELATION': 0.7224487387493247}, 'times': {'contamination': 0.5660431385040283, 'optimization': 0, 'imputation': 3508.416035413742}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.2460673155138917, 'MAE': 1.0634869086014367, 'MI': 0.1586603634624117, 'CORRELATION': -0.18399931939875536}, 'times': {'contamination': 0.0009419918060302734, 'optimization': 0, 'imputation': 32.42417812347412}}, '0.1': {'scores': {'RMSE': 2.001243933855385, 'MAE': 1.6893755004759776, 'MI': 0.21482702111483185, 'CORRELATION': -0.2726515578948081}, 'times': {'contamination': 0.002130746841430664, 'optimization': 0, 'imputation': 32.82063555717468}}, '0.2': {'scores': {'RMSE': 1.4968455488292518, 'MAE': 1.2561534606234561, 'MI': 0.11222599437685155, 'CORRELATION': -0.07970123586104311}, 'times': {'contamination': 0.005732536315917969, 'optimization': 0, 'imputation': 32.82860803604126}}, '0.4': {'scores': {'RMSE': 1.574972914996495, 'MAE': 1.3016797024316713, 'MI': 0.038422847890191146, 'CORRELATION': 0.0006988421734184097}, 'times': {'contamination': 0.04352855682373047, 'optimization': 0, 'imputation': 33.141374349594116}}, '0.6': {'scores': {'RMSE': 1.5683459025627484, 'MAE': 1.2794570160301593, 'MI': 0.028528807827602636, 'CORRELATION': 0.02617119288717672}, 'times': {'contamination': 0.15021848678588867, 'optimization': 0, 'imputation': 33.46182680130005}}, '0.8': {'scores': {'RMSE': 1.4904222943664387, 'MAE': 1.2130101032490719, 'MI': 0.01197603765471432, 'CORRELATION': -0.009770358122964968}, 'times': {'contamination': 0.5758330821990967, 'optimization': 0, 'imputation': 34.17011761665344}}}}}}} + run_2_eeg_r ={'eegreading': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6937095315379215, 'MAE': 0.5871322524124026, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0010230541229248047, 'optimization': 0, 'imputation': 0.0006470680236816406}}, '0.1': {'scores': {'RMSE': 0.8825047928812179, 'MAE': 0.7058469910884912, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001850128173828125, 'optimization': 0, 'imputation': 0.00038170814514160156}}, '0.2': {'scores': {'RMSE': 1.0076040625030085, 'MAE': 0.8133998806656898, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.005147457122802734, 'optimization': 0, 'imputation': 0.0003783702850341797}}, '0.4': {'scores': {'RMSE': 1.014301846668858, 'MAE': 0.8219008090987252, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03834176063537598, 'optimization': 0, 'imputation': 0.00041866302490234375}}, '0.6': {'scores': {'RMSE': 1.0158383459630567, 'MAE': 0.8210620770500036, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13167929649353027, 'optimization': 0, 'imputation': 0.0004432201385498047}}, '0.8': {'scores': {'RMSE': 1.01877327240803, 'MAE': 0.8157442592731639, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.5066664218902588, 'optimization': 0, 'imputation': 0.00046706199645996094}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6092168096877171, 'MAE': 0.43725473329243575, 'MI': 0.8113862111415893, 'CORRELATION': 0.6669628813645996}, 'times': {'contamination': 0.0010445117950439453, 'optimization': 0, 'imputation': 0.1320631504058838}}, '0.1': {'scores': {'RMSE': 0.7694804794390452, 'MAE': 0.4693452185509513, 'MI': 0.6303931303314629, 'CORRELATION': 0.6338704662419559}, 'times': {'contamination': 0.003664255142211914, 'optimization': 0, 'imputation': 0.15209627151489258}}, '0.2': {'scores': {'RMSE': 0.54163559631001, 'MAE': 0.3838909357504076, 'MI': 0.6804417798137956, 'CORRELATION': 0.8550799708158656}, 'times': {'contamination': 0.012023210525512695, 'optimization': 0, 'imputation': 0.27214813232421875}}, '0.4': {'scores': {'RMSE': 0.6150678993354385, 'MAE': 0.3994113839683473, 'MI': 0.5964930437182837, 'CORRELATION': 0.8282842809048951}, 'times': {'contamination': 0.09858155250549316, 'optimization': 0, 'imputation': 0.46369361877441406}}, '0.6': {'scores': {'RMSE': 0.8559878849846194, 'MAE': 0.48756796060498914, 'MI': 0.4352238530939769, 'CORRELATION': 0.7114520144242487}, 'times': {'contamination': 0.20195341110229492, 'optimization': 0, 'imputation': 1.685807704925537}}, '0.8': {'scores': {'RMSE': 1.0028418021086187, 'MAE': 0.6478458585388305, 'MI': 0.26800404550676565, 'CORRELATION': 0.6191696179492256}, 'times': {'contamination': 0.5733423233032227, 'optimization': 0, 'imputation': 1.7129898071289062}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.38913260498789515, 'MAE': 0.28887981808629887, 'MI': 0.9003693740232723, 'CORRELATION': 0.8305251080007574}, 'times': {'contamination': 0.0027310848236083984, 'optimization': 0, 'imputation': 40.562196254730225}}, '0.1': {'scores': {'RMSE': 0.42262809349896036, 'MAE': 0.31228067649415225, 'MI': 0.8330304029808546, 'CORRELATION': 0.8802327685495391}, 'times': {'contamination': 0.0017714500427246094, 'optimization': 0, 'imputation': 40.81480288505554}}, '0.2': {'scores': {'RMSE': 0.4299898931605415, 'MAE': 0.2914674774962624, 'MI': 0.8303895697315763, 'CORRELATION': 0.9049819009058613}, 'times': {'contamination': 0.0050771236419677734, 'optimization': 0, 'imputation': 41.5697500705719}}, '0.4': {'scores': {'RMSE': 0.4658583297277367, 'MAE': 0.32456738916683475, 'MI': 0.7105191885562022, 'CORRELATION': 0.8900070341144635}, 'times': {'contamination': 0.038306474685668945, 'optimization': 0, 'imputation': 42.954428911209106}}, '0.6': {'scores': {'RMSE': 0.5970596677005412, 'MAE': 0.40317626348969443, 'MI': 0.5057637077329502, 'CORRELATION': 0.8092444114848254}, 'times': {'contamination': 0.13292908668518066, 'optimization': 0, 'imputation': 31.69899296760559}}, '0.8': {'scores': {'RMSE': 4.099584545523784, 'MAE': 1.4360755142687804, 'MI': 0.03924813725195477, 'CORRELATION': 0.21658071586750138}, 'times': {'contamination': 0.5040717124938965, 'optimization': 0, 'imputation': 29.3829562664032}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.738070963229811, 'MAE': 0.5586987523761138, 'MI': 0.9549505679325584, 'CORRELATION': 0.4719896373208298}, 'times': {'contamination': 0.0009276866912841797, 'optimization': 0, 'imputation': 1.1977205276489258}}, '0.1': {'scores': {'RMSE': 0.6776044115374216, 'MAE': 0.4883939650690726, 'MI': 0.6051652352756725, 'CORRELATION': 0.7008457537827716}, 'times': {'contamination': 0.0018367767333984375, 'optimization': 0, 'imputation': 7.56099271774292}}, '0.2': {'scores': {'RMSE': 0.642538776211307, 'MAE': 0.45068800736093795, 'MI': 0.5847752699836343, 'CORRELATION': 0.7870826275047371}, 'times': {'contamination': 0.006151914596557617, 'optimization': 0, 'imputation': 51.4622917175293}}, '0.4': {'scores': {'RMSE': 0.595402838774376, 'MAE': 0.4200046319465559, 'MI': 0.5780737300771779, 'CORRELATION': 0.8157719741321808}, 'times': {'contamination': 0.03921937942504883, 'optimization': 0, 'imputation': 403.01221108436584}}, '0.6': {'scores': {'RMSE': 0.6457758226280373, 'MAE': 0.465851861042097, 'MI': 0.4940897071221384, 'CORRELATION': 0.7797841684978442}, 'times': {'contamination': 0.1312117576599121, 'optimization': 0, 'imputation': 1252.7522320747375}}, '0.8': {'scores': {'RMSE': 0.7031022809975706, 'MAE': 0.5292159877681492, 'MI': 0.3802525627714059, 'CORRELATION': 0.7224487387493247}, 'times': {'contamination': 0.49939608573913574, 'optimization': 0, 'imputation': 3008.1611902713776}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.8912641762825817, 'MAE': 0.7084000070562698, 'MI': 0.1586603634624117, 'CORRELATION': -0.18399931939875533}, 'times': {'contamination': 0.0015058517456054688, 'optimization': 0, 'imputation': 33.12207007408142}}, '0.1': {'scores': {'RMSE': 1.6662202948969496, 'MAE': 1.4470331206045186, 'MI': 0.21482702111483182, 'CORRELATION': -0.011209970742084449}, 'times': {'contamination': 0.0019042491912841797, 'optimization': 0, 'imputation': 33.216208696365356}}, '0.2': {'scores': {'RMSE': 1.598321080858055, 'MAE': 1.3075151194358787, 'MI': 0.09009214652632663, 'CORRELATION': -0.04520366874188189}, 'times': {'contamination': 0.005328655242919922, 'optimization': 0, 'imputation': 33.373263120651245}}, '0.4': {'scores': {'RMSE': 1.6985130268767639, 'MAE': 1.4015302468193533, 'MI': 0.03320292670183938, 'CORRELATION': -0.032377436531015075}, 'times': {'contamination': 0.03847932815551758, 'optimization': 0, 'imputation': 33.49543786048889}}, '0.6': {'scores': {'RMSE': 1.3985294499806777, 'MAE': 1.1199428091645252, 'MI': 0.019976173249096316, 'CORRELATION': -0.011566392273658407}, 'times': {'contamination': 0.13303351402282715, 'optimization': 0, 'imputation': 33.58043313026428}}, '0.8': {'scores': {'RMSE': 1.3502402939508413, 'MAE': 1.0815513948221274, 'MI': 0.008849739620243624, 'CORRELATION': -0.007509192482157953}, 'times': {'contamination': 0.5092241764068604, 'optimization': 0, 'imputation': 34.3874249458313}}}}}}} + run_3_eeg_r ={'eegreading': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6937095315379215, 'MAE': 0.5871322524124026, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0007941722869873047, 'optimization': 0, 'imputation': 0.0005123615264892578}}, '0.1': {'scores': {'RMSE': 0.8825047928812179, 'MAE': 0.7058469910884912, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0018286705017089844, 'optimization': 0, 'imputation': 0.0003674030303955078}}, '0.2': {'scores': {'RMSE': 1.0076040625030085, 'MAE': 0.8133998806656898, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0050699710845947266, 'optimization': 0, 'imputation': 0.00038504600524902344}}, '0.4': {'scores': {'RMSE': 1.014301846668858, 'MAE': 0.8219008090987252, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03862404823303223, 'optimization': 0, 'imputation': 0.000408172607421875}}, '0.6': {'scores': {'RMSE': 1.0158383459630567, 'MAE': 0.8210620770500036, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.13412952423095703, 'optimization': 0, 'imputation': 0.00044083595275878906}}, '0.8': {'scores': {'RMSE': 1.01877327240803, 'MAE': 0.8157442592731639, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.5152099132537842, 'optimization': 0, 'imputation': 0.00046372413635253906}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6092168096877171, 'MAE': 0.43725473329243575, 'MI': 0.8113862111415893, 'CORRELATION': 0.6669628813645996}, 'times': {'contamination': 0.0007405281066894531, 'optimization': 0, 'imputation': 0.12306523323059082}}, '0.1': {'scores': {'RMSE': 0.7694804794390452, 'MAE': 0.4693452185509513, 'MI': 0.6303931303314629, 'CORRELATION': 0.6338704662419559}, 'times': {'contamination': 0.003605365753173828, 'optimization': 0, 'imputation': 0.1472492218017578}}, '0.2': {'scores': {'RMSE': 0.54163559631001, 'MAE': 0.3838909357504076, 'MI': 0.6804417798137956, 'CORRELATION': 0.8550799708158656}, 'times': {'contamination': 0.012052297592163086, 'optimization': 0, 'imputation': 0.26918745040893555}}, '0.4': {'scores': {'RMSE': 0.6150678993354385, 'MAE': 0.3994113839683473, 'MI': 0.5964930437182837, 'CORRELATION': 0.8282842809048951}, 'times': {'contamination': 0.0991828441619873, 'optimization': 0, 'imputation': 0.42386364936828613}}, '0.6': {'scores': {'RMSE': 0.8559878849846194, 'MAE': 0.48756796060498914, 'MI': 0.4352238530939769, 'CORRELATION': 0.7114520144242487}, 'times': {'contamination': 0.20280027389526367, 'optimization': 0, 'imputation': 1.6087658405303955}}, '0.8': {'scores': {'RMSE': 1.0028418021086187, 'MAE': 0.6478458585388305, 'MI': 0.26800404550676565, 'CORRELATION': 0.6191696179492256}, 'times': {'contamination': 0.5785958766937256, 'optimization': 0, 'imputation': 1.6498932838439941}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.38913260498789515, 'MAE': 0.28887981808629887, 'MI': 0.9003693740232723, 'CORRELATION': 0.8305251080007574}, 'times': {'contamination': 0.0019073486328125, 'optimization': 0, 'imputation': 40.961910009384155}}, '0.1': {'scores': {'RMSE': 0.42262809349896036, 'MAE': 0.31228067649415225, 'MI': 0.8330304029808546, 'CORRELATION': 0.8802327685495391}, 'times': {'contamination': 0.0019218921661376953, 'optimization': 0, 'imputation': 40.72626280784607}}, '0.2': {'scores': {'RMSE': 0.4299898931605415, 'MAE': 0.2914674774962624, 'MI': 0.8303895697315763, 'CORRELATION': 0.9049819009058613}, 'times': {'contamination': 0.005232572555541992, 'optimization': 0, 'imputation': 41.4091100692749}}, '0.4': {'scores': {'RMSE': 0.4658583297277367, 'MAE': 0.32456738916683475, 'MI': 0.7105191885562022, 'CORRELATION': 0.8900070341144635}, 'times': {'contamination': 0.03888249397277832, 'optimization': 0, 'imputation': 42.588321924209595}}, '0.6': {'scores': {'RMSE': 0.5970596677005412, 'MAE': 0.40317626348969443, 'MI': 0.5057637077329502, 'CORRELATION': 0.8092444114848254}, 'times': {'contamination': 0.13315606117248535, 'optimization': 0, 'imputation': 31.377464771270752}}, '0.8': {'scores': {'RMSE': 4.099584545523784, 'MAE': 1.4360755142687804, 'MI': 0.03924813725195477, 'CORRELATION': 0.21658071586750138}, 'times': {'contamination': 0.5085079669952393, 'optimization': 0, 'imputation': 29.278695583343506}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.738070963229811, 'MAE': 0.5586987523761138, 'MI': 0.9549505679325584, 'CORRELATION': 0.4719896373208298}, 'times': {'contamination': 0.0008947849273681641, 'optimization': 0, 'imputation': 1.1914288997650146}}, '0.1': {'scores': {'RMSE': 0.6776044115374216, 'MAE': 0.4883939650690726, 'MI': 0.6051652352756725, 'CORRELATION': 0.7008457537827716}, 'times': {'contamination': 0.0017147064208984375, 'optimization': 0, 'imputation': 7.560357332229614}}, '0.2': {'scores': {'RMSE': 0.642538776211307, 'MAE': 0.45068800736093795, 'MI': 0.5847752699836343, 'CORRELATION': 0.7870826275047371}, 'times': {'contamination': 0.006022930145263672, 'optimization': 0, 'imputation': 51.04503679275513}}, '0.4': {'scores': {'RMSE': 0.595402838774376, 'MAE': 0.4200046319465559, 'MI': 0.5780737300771779, 'CORRELATION': 0.8157719741321808}, 'times': {'contamination': 0.04010820388793945, 'optimization': 0, 'imputation': 398.83428263664246}}, '0.6': {'scores': {'RMSE': 0.6457758226280373, 'MAE': 0.465851861042097, 'MI': 0.4940897071221384, 'CORRELATION': 0.7797841684978442}, 'times': {'contamination': 0.13222098350524902, 'optimization': 0, 'imputation': 1240.5351004600525}}, '0.8': {'scores': {'RMSE': 0.7031022809975706, 'MAE': 0.5292159877681492, 'MI': 0.3802525627714059, 'CORRELATION': 0.7224487387493247}, 'times': {'contamination': 0.503342866897583, 'optimization': 0, 'imputation': 2992.4439420700073}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.412449198386205, 'MAE': 1.1797340316621463, 'MI': 0.1586603634624117, 'CORRELATION': 0.18399931939875536}, 'times': {'contamination': 0.0009257793426513672, 'optimization': 0, 'imputation': 32.816872358322144}}, '0.1': {'scores': {'RMSE': 1.214349438722391, 'MAE': 0.9489114962018727, 'MI': 0.21482702111483185, 'CORRELATION': 0.005788004027631091}, 'times': {'contamination': 0.0017857551574707031, 'optimization': 0, 'imputation': 32.76135730743408}}, '0.2': {'scores': {'RMSE': 1.782091818947266, 'MAE': 1.4153354843421815, 'MI': 0.08421561723802416, 'CORRELATION': -0.02189868391699484}, 'times': {'contamination': 0.005289793014526367, 'optimization': 0, 'imputation': 32.418723821640015}}, '0.4': {'scores': {'RMSE': 1.6103900570901488, 'MAE': 1.2938081259781642, 'MI': 0.033310620448000414, 'CORRELATION': -0.020322458032248837}, 'times': {'contamination': 0.03889966011047363, 'optimization': 0, 'imputation': 32.98813772201538}}, '0.6': {'scores': {'RMSE': 1.4819836317124, 'MAE': 1.1973233805055523, 'MI': 0.026208555593730396, 'CORRELATION': -0.007201146491943477}, 'times': {'contamination': 0.13570785522460938, 'optimization': 0, 'imputation': 33.78332853317261}}, '0.8': {'scores': {'RMSE': 1.4711114920296309, 'MAE': 1.186789819475679, 'MI': 0.011634931601549168, 'CORRELATION': 0.005259826021512169}, 'times': {'contamination': 0.5149116516113281, 'optimization': 0, 'imputation': 33.572168588638306}}}}}}} + + run_1_fmri_o = {'fmriobjectviewing': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0389734217605486, 'MAE': 0.8058577685345816, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0014805793762207031, 'optimization': 0, 'imputation': 0.0006771087646484375}}, '0.1': {'scores': {'RMSE': 1.039599691211445, 'MAE': 0.8190561891835487, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0026483535766601562, 'optimization': 0, 'imputation': 0.00042366981506347656}}, '0.2': {'scores': {'RMSE': 1.0062387656710172, 'MAE': 0.7979296742837627, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.006651163101196289, 'optimization': 0, 'imputation': 0.000431060791015625}}, '0.4': {'scores': {'RMSE': 0.9883754343533185, 'MAE': 0.7862876896101476, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.021806955337524414, 'optimization': 0, 'imputation': 0.0005443096160888672}}, '0.6': {'scores': {'RMSE': 0.987097660571777, 'MAE': 0.7834652940902236, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.053050994873046875, 'optimization': 0, 'imputation': 0.0006668567657470703}}, '0.8': {'scores': {'RMSE': 0.9871215644673538, 'MAE': 0.783016411575714, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.12003469467163086, 'optimization': 0, 'imputation': 0.0006778240203857422}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7921010903466756, 'MAE': 0.544583599318027, 'MI': 0.6452684348756488, 'CORRELATION': 0.6564536961355489}, 'times': {'contamination': 0.001192331314086914, 'optimization': 160.06343746185303, 'imputation': 0.01718902587890625}}, '0.1': {'scores': {'RMSE': 0.8734423329225157, 'MAE': 0.6770893621008395, 'MI': 0.17404003258531509, 'CORRELATION': 0.5463883586396225}, 'times': {'contamination': 0.00665283203125, 'optimization': 160.06343746185303, 'imputation': 0.023374319076538086}}, '0.2': {'scores': {'RMSE': 0.8860045404559919, 'MAE': 0.6822309993559906, 'MI': 0.13114386403484066, 'CORRELATION': 0.4879034991275287}, 'times': {'contamination': 0.015312671661376953, 'optimization': 160.06343746185303, 'imputation': 0.028914928436279297}}, '0.4': {'scores': {'RMSE': 0.85668086245811, 'MAE': 0.6554946643451944, 'MI': 0.13136521095105114, 'CORRELATION': 0.512333042486155}, 'times': {'contamination': 0.04993629455566406, 'optimization': 160.06343746185303, 'imputation': 0.025546550750732422}}, '0.6': {'scores': {'RMSE': 0.8734554290811476, 'MAE': 0.668388555663456, 'MI': 0.12207632358330225, 'CORRELATION': 0.4901910978698973}, 'times': {'contamination': 0.11890959739685059, 'optimization': 160.06343746185303, 'imputation': 0.029758214950561523}}, '0.8': {'scores': {'RMSE': 0.9101165604682941, 'MAE': 0.7007441393931623, 'MI': 0.08721517971477473, 'CORRELATION': 0.424583398592384}, 'times': {'contamination': 0.18577051162719727, 'optimization': 160.06343746185303, 'imputation': 0.03443717956542969}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.8717933443365717, 'MAE': 0.6555873875520205, 'MI': 0.5468713896761781, 'CORRELATION': 0.5360081770612317}, 'times': {'contamination': 0.002370595932006836, 'optimization': 47.9361207485199, 'imputation': 1.688127040863037}}, '0.1': {'scores': {'RMSE': 0.8625085002829386, 'MAE': 0.6689945733093743, 'MI': 0.2158458111360233, 'CORRELATION': 0.5598406577746278}, 'times': {'contamination': 0.0027587413787841797, 'optimization': 47.9361207485199, 'imputation': 1.7008082866668701}}, '0.2': {'scores': {'RMSE': 0.8974016981576581, 'MAE': 0.693506918834922, 'MI': 0.12539695399359563, 'CORRELATION': 0.4696294419377184}, 'times': {'contamination': 0.006667137145996094, 'optimization': 47.9361207485199, 'imputation': 1.7494020462036133}}, '0.4': {'scores': {'RMSE': 0.9058118302006622, 'MAE': 0.7072376811266821, 'MI': 0.09089971471437183, 'CORRELATION': 0.4307173907497016}, 'times': {'contamination': 0.021937847137451172, 'optimization': 47.9361207485199, 'imputation': 1.9955649375915527}}, '0.6': {'scores': {'RMSE': 0.9926298063877358, 'MAE': 0.7768854416569236, 'MI': 0.040884571524434955, 'CORRELATION': 0.2900818700028841}, 'times': {'contamination': 0.05295515060424805, 'optimization': 47.9361207485199, 'imputation': 1.580155849456787}}, '0.8': {'scores': {'RMSE': 1.1125302701236894, 'MAE': 0.8727960243823621, 'MI': 0.013958439365190834, 'CORRELATION': 0.16455618813674522}, 'times': {'contamination': 0.12042689323425293, 'optimization': 47.9361207485199, 'imputation': 2.0708627700805664}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6263705795260325, 'MAE': 0.4548865753229437, 'MI': 0.781959674837021, 'CORRELATION': 0.7986062368219096}, 'times': {'contamination': 0.0012576580047607422, 'optimization': 2116.446483373642, 'imputation': 0.22168827056884766}}, '0.1': {'scores': {'RMSE': 0.6899987721177722, 'MAE': 0.5259878926891887, 'MI': 0.395810445074613, 'CORRELATION': 0.7477771679714831}, 'times': {'contamination': 0.002750396728515625, 'optimization': 2116.446483373642, 'imputation': 1.365168809890747}}, '0.2': {'scores': {'RMSE': 0.7621016037924634, 'MAE': 0.5758589580651329, 'MI': 0.24919261959916233, 'CORRELATION': 0.658146326506337}, 'times': {'contamination': 0.006661415100097656, 'optimization': 2116.446483373642, 'imputation': 7.104876279830933}}, '0.4': {'scores': {'RMSE': 0.7902203838415963, 'MAE': 0.5922773198020501, 'MI': 0.19381374823819753, 'CORRELATION': 0.6157623089917651}, 'times': {'contamination': 0.0220491886138916, 'optimization': 2116.446483373642, 'imputation': 45.60467576980591}}, '0.6': {'scores': {'RMSE': 0.8606721167494161, 'MAE': 0.6509795391102093, 'MI': 0.14703461141268756, 'CORRELATION': 0.5349197031621258}, 'times': {'contamination': 0.0532374382019043, 'optimization': 2116.446483373642, 'imputation': 138.31860542297363}}, '0.8': {'scores': {'RMSE': 0.9473077321399332, 'MAE': 0.721873093140729, 'MI': 0.09210269321275755, 'CORRELATION': 0.41686255415646745}, 'times': {'contamination': 0.12064862251281738, 'optimization': 2116.446483373642, 'imputation': 310.311674118042}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.564373760409394, 'MAE': 1.221169321990917, 'MI': 0.5391032425183402, 'CORRELATION': 0.14029162735376388}, 'times': {'contamination': 0.001262664794921875, 'optimization': 4433.628644704819, 'imputation': 363.13744926452637}}, '0.1': {'scores': {'RMSE': 1.5003009239093386, 'MAE': 1.178061468837976, 'MI': 0.09732624088216657, 'CORRELATION': -0.03457815118265904}, 'times': {'contamination': 0.0030307769775390625, 'optimization': 4433.628644704819, 'imputation': 348.3805763721466}}, '0.2': {'scores': {'RMSE': 1.48756511282537, 'MAE': 1.211117150696572, 'MI': 0.03465816567888362, 'CORRELATION': -0.04992241665116051}, 'times': {'contamination': 0.00698542594909668, 'optimization': 4433.628644704819, 'imputation': 349.00921535491943}}, '0.4': {'scores': {'RMSE': 1.3053260371206012, 'MAE': 1.0395260022271195, 'MI': 0.006322235025890169, 'CORRELATION': -0.007140449894312156}, 'times': {'contamination': 0.022205352783203125, 'optimization': 4433.628644704819, 'imputation': 359.71013283729553}}, '0.6': {'scores': {'RMSE': 1.3648044261884822, 'MAE': 1.0965613308947504, 'MI': 0.0030644481561666144, 'CORRELATION': -0.022828258162777018}, 'times': {'contamination': 0.054125308990478516, 'optimization': 4433.628644704819, 'imputation': 363.40745854377747}}, '0.8': {'scores': {'RMSE': 1.397320341356025, 'MAE': 1.109946360446546, 'MI': 0.0028064424558294984, 'CORRELATION': -0.03067013067996843}, 'times': {'contamination': 0.12269306182861328, 'optimization': 4433.628644704819, 'imputation': 359.04918146133423}}}}}}} + run_2_fmri_o ={'fmriobjectviewing': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0389734217605486, 'MAE': 0.8058577685345816, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001322031021118164, 'optimization': 0, 'imputation': 0.0010623931884765625}}, '0.1': {'scores': {'RMSE': 1.039599691211445, 'MAE': 0.8190561891835487, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0029027462005615234, 'optimization': 0, 'imputation': 0.0006194114685058594}}, '0.2': {'scores': {'RMSE': 1.0062387656710172, 'MAE': 0.7979296742837627, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0067632198333740234, 'optimization': 0, 'imputation': 0.0006532669067382812}}, '0.4': {'scores': {'RMSE': 0.9883754343533185, 'MAE': 0.7862876896101476, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0221099853515625, 'optimization': 0, 'imputation': 0.0007634162902832031}}, '0.6': {'scores': {'RMSE': 0.987097660571777, 'MAE': 0.7834652940902236, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0535585880279541, 'optimization': 0, 'imputation': 0.0008707046508789062}}, '0.8': {'scores': {'RMSE': 0.9871215644673538, 'MAE': 0.783016411575714, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1221921443939209, 'optimization': 0, 'imputation': 0.0009012222290039062}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7921010903466756, 'MAE': 0.544583599318027, 'MI': 0.6452684348756488, 'CORRELATION': 0.6564536961355489}, 'times': {'contamination': 0.0013003349304199219, 'optimization': 159.3788959980011, 'imputation': 0.0172426700592041}}, '0.1': {'scores': {'RMSE': 0.8734423329225157, 'MAE': 0.6770893621008395, 'MI': 0.17404003258531509, 'CORRELATION': 0.5463883586396225}, 'times': {'contamination': 0.006696939468383789, 'optimization': 159.3788959980011, 'imputation': 0.023551225662231445}}, '0.2': {'scores': {'RMSE': 0.8860045404559919, 'MAE': 0.6822309993559906, 'MI': 0.13114386403484066, 'CORRELATION': 0.4879034991275287}, 'times': {'contamination': 0.01536703109741211, 'optimization': 159.3788959980011, 'imputation': 0.029254913330078125}}, '0.4': {'scores': {'RMSE': 0.85668086245811, 'MAE': 0.6554946643451944, 'MI': 0.13136521095105114, 'CORRELATION': 0.512333042486155}, 'times': {'contamination': 0.049208879470825195, 'optimization': 159.3788959980011, 'imputation': 0.02578568458557129}}, '0.6': {'scores': {'RMSE': 0.8734554290811476, 'MAE': 0.668388555663456, 'MI': 0.12207632358330225, 'CORRELATION': 0.4901910978698973}, 'times': {'contamination': 0.11803746223449707, 'optimization': 159.3788959980011, 'imputation': 0.01870417594909668}}, '0.8': {'scores': {'RMSE': 0.9101165604682941, 'MAE': 0.7007441393931623, 'MI': 0.08721517971477473, 'CORRELATION': 0.424583398592384}, 'times': {'contamination': 0.1880052089691162, 'optimization': 159.3788959980011, 'imputation': 0.03435778617858887}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.8717933443365717, 'MAE': 0.6555873875520205, 'MI': 0.5468713896761781, 'CORRELATION': 0.5360081770612317}, 'times': {'contamination': 0.002277374267578125, 'optimization': 48.99785590171814, 'imputation': 1.7287921905517578}}, '0.1': {'scores': {'RMSE': 0.8625085002829386, 'MAE': 0.6689945733093743, 'MI': 0.2158458111360233, 'CORRELATION': 0.5598406577746278}, 'times': {'contamination': 0.0032448768615722656, 'optimization': 48.99785590171814, 'imputation': 1.7275073528289795}}, '0.2': {'scores': {'RMSE': 0.8974016981576581, 'MAE': 0.693506918834922, 'MI': 0.12539695399359563, 'CORRELATION': 0.4696294419377184}, 'times': {'contamination': 0.007249355316162109, 'optimization': 48.99785590171814, 'imputation': 1.807462215423584}}, '0.4': {'scores': {'RMSE': 0.9058118302006622, 'MAE': 0.7072376811266821, 'MI': 0.09089971471437183, 'CORRELATION': 0.4307173907497016}, 'times': {'contamination': 0.022536754608154297, 'optimization': 48.99785590171814, 'imputation': 2.028677225112915}}, '0.6': {'scores': {'RMSE': 0.9926298063877358, 'MAE': 0.7768854416569236, 'MI': 0.040884571524434955, 'CORRELATION': 0.2900818700028841}, 'times': {'contamination': 0.054079294204711914, 'optimization': 48.99785590171814, 'imputation': 1.5881314277648926}}, '0.8': {'scores': {'RMSE': 1.1125302701236894, 'MAE': 0.8727960243823621, 'MI': 0.013958439365190834, 'CORRELATION': 0.16455618813674522}, 'times': {'contamination': 0.12213516235351562, 'optimization': 48.99785590171814, 'imputation': 2.0539379119873047}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6263705795260325, 'MAE': 0.4548865753229437, 'MI': 0.781959674837021, 'CORRELATION': 0.7986062368219096}, 'times': {'contamination': 0.0018770694732666016, 'optimization': 2117.437706708908, 'imputation': 0.22168946266174316}}, '0.1': {'scores': {'RMSE': 0.6899987721177722, 'MAE': 0.5259878926891887, 'MI': 0.395810445074613, 'CORRELATION': 0.7477771679714831}, 'times': {'contamination': 0.0028429031372070312, 'optimization': 2117.437706708908, 'imputation': 1.3405146598815918}}, '0.2': {'scores': {'RMSE': 0.7621016037924634, 'MAE': 0.5758589580651329, 'MI': 0.24919261959916233, 'CORRELATION': 0.658146326506337}, 'times': {'contamination': 0.0066187381744384766, 'optimization': 2117.437706708908, 'imputation': 7.121232271194458}}, '0.4': {'scores': {'RMSE': 0.7902203838415963, 'MAE': 0.5922773198020501, 'MI': 0.19381374823819753, 'CORRELATION': 0.6157623089917651}, 'times': {'contamination': 0.023590564727783203, 'optimization': 2117.437706708908, 'imputation': 45.16994309425354}}, '0.6': {'scores': {'RMSE': 0.8606721167494161, 'MAE': 0.6509795391102093, 'MI': 0.14703461141268756, 'CORRELATION': 0.5349197031621258}, 'times': {'contamination': 0.05335497856140137, 'optimization': 2117.437706708908, 'imputation': 138.5317099094391}}, '0.8': {'scores': {'RMSE': 0.9473077321399332, 'MAE': 0.721873093140729, 'MI': 0.09210269321275755, 'CORRELATION': 0.41686255415646745}, 'times': {'contamination': 0.12050509452819824, 'optimization': 2117.437706708908, 'imputation': 309.835489988327}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.5720458604683403, 'MAE': 1.2772582907839167, 'MI': 0.42568306113018717, 'CORRELATION': -0.14831612460275406}, 'times': {'contamination': 0.0014629364013671875, 'optimization': 4468.6066954135895, 'imputation': 352.0414505004883}}, '0.1': {'scores': {'RMSE': 1.4907337776914031, 'MAE': 1.1877772537536995, 'MI': 0.14090901585882215, 'CORRELATION': 0.10008244436430952}, 'times': {'contamination': 0.002844095230102539, 'optimization': 4468.6066954135895, 'imputation': 361.1672565937042}}, '0.2': {'scores': {'RMSE': 1.402763495604196, 'MAE': 1.1258676418974762, 'MI': 0.025496459386318313, 'CORRELATION': -0.02806308194006537}, 'times': {'contamination': 0.006960391998291016, 'optimization': 4468.6066954135895, 'imputation': 359.7817280292511}}, '0.4': {'scores': {'RMSE': 1.3340271011920504, 'MAE': 1.0653643637835586, 'MI': 0.006026542304077175, 'CORRELATION': -0.018817794328124735}, 'times': {'contamination': 0.022516489028930664, 'optimization': 4468.6066954135895, 'imputation': 363.93244767189026}}, '0.6': {'scores': {'RMSE': 1.3622649743673116, 'MAE': 1.0917232146475109, 'MI': 0.003570086902367028, 'CORRELATION': -0.026435073387852663}, 'times': {'contamination': 0.053468942642211914, 'optimization': 4468.6066954135895, 'imputation': 359.8436996936798}}, '0.8': {'scores': {'RMSE': 1.3476859394569145, 'MAE': 1.077975225740552, 'MI': 0.002668480785992482, 'CORRELATION': -0.02083562688022463}, 'times': {'contamination': 0.12250685691833496, 'optimization': 4468.6066954135895, 'imputation': 360.8773157596588}}}}}}} + run_3_fmri_o ={'fmriobjectviewing': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0389734217605486, 'MAE': 0.8058577685345816, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0013332366943359375, 'optimization': 0, 'imputation': 0.0010461807250976562}}, '0.1': {'scores': {'RMSE': 1.039599691211445, 'MAE': 0.8190561891835487, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0028600692749023438, 'optimization': 0, 'imputation': 0.0005738735198974609}}, '0.2': {'scores': {'RMSE': 1.0062387656710172, 'MAE': 0.7979296742837627, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.006735324859619141, 'optimization': 0, 'imputation': 0.0006284713745117188}}, '0.4': {'scores': {'RMSE': 0.9883754343533185, 'MAE': 0.7862876896101476, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.025367259979248047, 'optimization': 0, 'imputation': 0.0009474754333496094}}, '0.6': {'scores': {'RMSE': 0.987097660571777, 'MAE': 0.7834652940902236, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.054114341735839844, 'optimization': 0, 'imputation': 0.0008347034454345703}}, '0.8': {'scores': {'RMSE': 0.9871215644673538, 'MAE': 0.783016411575714, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1226034164428711, 'optimization': 0, 'imputation': 0.0008599758148193359}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7921010903466756, 'MAE': 0.544583599318027, 'MI': 0.6452684348756488, 'CORRELATION': 0.6564536961355489}, 'times': {'contamination': 0.0011966228485107422, 'optimization': 157.82238936424255, 'imputation': 0.017590761184692383}}, '0.1': {'scores': {'RMSE': 0.8734423329225157, 'MAE': 0.6770893621008395, 'MI': 0.17404003258531509, 'CORRELATION': 0.5463883586396225}, 'times': {'contamination': 0.006558895111083984, 'optimization': 157.82238936424255, 'imputation': 0.023885011672973633}}, '0.2': {'scores': {'RMSE': 0.8860045404559919, 'MAE': 0.6822309993559906, 'MI': 0.13114386403484066, 'CORRELATION': 0.4879034991275287}, 'times': {'contamination': 0.015262842178344727, 'optimization': 157.82238936424255, 'imputation': 0.029367446899414062}}, '0.4': {'scores': {'RMSE': 0.85668086245811, 'MAE': 0.6554946643451944, 'MI': 0.13136521095105114, 'CORRELATION': 0.512333042486155}, 'times': {'contamination': 0.04946470260620117, 'optimization': 157.82238936424255, 'imputation': 0.025765419006347656}}, '0.6': {'scores': {'RMSE': 0.8734554290811476, 'MAE': 0.668388555663456, 'MI': 0.12207632358330225, 'CORRELATION': 0.4901910978698973}, 'times': {'contamination': 0.11716961860656738, 'optimization': 157.82238936424255, 'imputation': 0.029912710189819336}}, '0.8': {'scores': {'RMSE': 0.9101165604682941, 'MAE': 0.7007441393931623, 'MI': 0.08721517971477473, 'CORRELATION': 0.424583398592384}, 'times': {'contamination': 0.18310046195983887, 'optimization': 157.82238936424255, 'imputation': 0.034597158432006836}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.8717933443365717, 'MAE': 0.6555873875520205, 'MI': 0.5468713896761781, 'CORRELATION': 0.5360081770612317}, 'times': {'contamination': 0.0025048255920410156, 'optimization': 49.13337993621826, 'imputation': 1.7170403003692627}}, '0.1': {'scores': {'RMSE': 0.8625085002829386, 'MAE': 0.6689945733093743, 'MI': 0.2158458111360233, 'CORRELATION': 0.5598406577746278}, 'times': {'contamination': 0.002758026123046875, 'optimization': 49.13337993621826, 'imputation': 1.782259225845337}}, '0.2': {'scores': {'RMSE': 0.8974016981576581, 'MAE': 0.693506918834922, 'MI': 0.12539695399359563, 'CORRELATION': 0.4696294419377184}, 'times': {'contamination': 0.006672382354736328, 'optimization': 49.13337993621826, 'imputation': 1.8251664638519287}}, '0.4': {'scores': {'RMSE': 0.9058118302006622, 'MAE': 0.7072376811266821, 'MI': 0.09089971471437183, 'CORRELATION': 0.4307173907497016}, 'times': {'contamination': 0.02189946174621582, 'optimization': 49.13337993621826, 'imputation': 2.061220169067383}}, '0.6': {'scores': {'RMSE': 0.9926298063877358, 'MAE': 0.7768854416569236, 'MI': 0.040884571524434955, 'CORRELATION': 0.2900818700028841}, 'times': {'contamination': 0.05326199531555176, 'optimization': 49.13337993621826, 'imputation': 1.6660246849060059}}, '0.8': {'scores': {'RMSE': 1.1125302701236894, 'MAE': 0.8727960243823621, 'MI': 0.013958439365190834, 'CORRELATION': 0.16455618813674522}, 'times': {'contamination': 0.1214754581451416, 'optimization': 49.13337993621826, 'imputation': 2.0595123767852783}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.6263705795260325, 'MAE': 0.4548865753229437, 'MI': 0.781959674837021, 'CORRELATION': 0.7986062368219096}, 'times': {'contamination': 0.0012001991271972656, 'optimization': 2115.569543361664, 'imputation': 0.22057271003723145}}, '0.1': {'scores': {'RMSE': 0.6899987721177722, 'MAE': 0.5259878926891887, 'MI': 0.395810445074613, 'CORRELATION': 0.7477771679714831}, 'times': {'contamination': 0.0028693675994873047, 'optimization': 2115.569543361664, 'imputation': 1.332841396331787}}, '0.2': {'scores': {'RMSE': 0.7621016037924634, 'MAE': 0.5758589580651329, 'MI': 0.24919261959916233, 'CORRELATION': 0.658146326506337}, 'times': {'contamination': 0.0066070556640625, 'optimization': 2115.569543361664, 'imputation': 6.977942943572998}}, '0.4': {'scores': {'RMSE': 0.7902203838415963, 'MAE': 0.5922773198020501, 'MI': 0.19381374823819753, 'CORRELATION': 0.6157623089917651}, 'times': {'contamination': 0.023341894149780273, 'optimization': 2115.569543361664, 'imputation': 45.052905321121216}}, '0.6': {'scores': {'RMSE': 0.8606721167494161, 'MAE': 0.6509795391102093, 'MI': 0.14703461141268756, 'CORRELATION': 0.5349197031621258}, 'times': {'contamination': 0.053314924240112305, 'optimization': 2115.569543361664, 'imputation': 137.877295255661}}, '0.8': {'scores': {'RMSE': 0.9473077321399332, 'MAE': 0.721873093140729, 'MI': 0.09210269321275755, 'CORRELATION': 0.41686255415646745}, 'times': {'contamination': 0.12127208709716797, 'optimization': 2115.569543361664, 'imputation': 309.8284556865692}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.6414396019640038, 'MAE': 1.3240559958757634, 'MI': 0.5559452374102188, 'CORRELATION': -0.019190710334023774}, 'times': {'contamination': 0.001463174819946289, 'optimization': 4286.787290811539, 'imputation': 146.20701241493225}}, '0.1': {'scores': {'RMSE': 1.4931325738251233, 'MAE': 1.2291481963023954, 'MI': 0.10612382874060908, 'CORRELATION': 0.08822883294793381}, 'times': {'contamination': 0.003063201904296875, 'optimization': 4286.787290811539, 'imputation': 145.1298749446869}}, '0.2': {'scores': {'RMSE': 1.3592271642125449, 'MAE': 1.1023068858542104, 'MI': 0.031374496439453406, 'CORRELATION': 0.04531586048012379}, 'times': {'contamination': 0.00700068473815918, 'optimization': 4286.787290811539, 'imputation': 145.86979150772095}}, '0.4': {'scores': {'RMSE': 1.5155884162145739, 'MAE': 1.2095557823362952, 'MI': 0.007762134072031226, 'CORRELATION': -0.01994479803059748}, 'times': {'contamination': 0.022418737411499023, 'optimization': 4286.787290811539, 'imputation': 142.07973980903625}}, '0.6': {'scores': {'RMSE': 1.4205010123384363, 'MAE': 1.140500261582132, 'MI': 0.004244506579222641, 'CORRELATION': -0.017115141060066015}, 'times': {'contamination': 0.05402565002441406, 'optimization': 4286.787290811539, 'imputation': 139.75832986831665}}, '0.8': {'scores': {'RMSE': 1.4393703997870884, 'MAE': 1.1419154482992642, 'MI': 0.0026830949612693445, 'CORRELATION': -0.012083949814718867}, 'times': {'contamination': 0.12264132499694824, 'optimization': 4286.787290811539, 'imputation': 144.70407223701477}}}}}}} + + run_1_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0015919208526611328, 'optimization': 0, 'imputation': 0.0009393692016601562}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0035066604614257812, 'optimization': 0, 'imputation': 0.000621795654296875}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009276866912841797, 'optimization': 0, 'imputation': 0.0006399154663085938}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03150796890258789, 'optimization': 0, 'imputation': 0.0008380413055419922}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.07896685600280762, 'optimization': 0, 'imputation': 0.0009694099426269531}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.18951916694641113, 'optimization': 0, 'imputation': 0.0010123252868652344}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.0014772415161132812, 'optimization': 218.59592175483704, 'imputation': 0.0071277618408203125}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.008227348327636719, 'optimization': 218.59592175483704, 'imputation': 0.0062563419342041016}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.0209505558013916, 'optimization': 218.59592175483704, 'imputation': 0.006833791732788086}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07195234298706055, 'optimization': 218.59592175483704, 'imputation': 0.006715297698974609}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14271855354309082, 'optimization': 218.59592175483704, 'imputation': 0.0066835880279541016}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.2467949390411377, 'optimization': 218.59592175483704, 'imputation': 0.005556821823120117}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.0029337406158447266, 'optimization': 109.35183715820312, 'imputation': 10.097216844558716}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.003466367721557617, 'optimization': 109.35183715820312, 'imputation': 10.141520977020264}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009216070175170898, 'optimization': 109.35183715820312, 'imputation': 10.349437952041626}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.031234025955200195, 'optimization': 109.35183715820312, 'imputation': 11.021637439727783}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07903313636779785, 'optimization': 109.35183715820312, 'imputation': 8.597065448760986}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18648958206176758, 'optimization': 109.35183715820312, 'imputation': 8.484493017196655}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0014984607696533203, 'optimization': 5072.065117120743, 'imputation': 10.695194959640503}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.0036919116973876953, 'optimization': 5072.065117120743, 'imputation': 49.85465955734253}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.009428024291992188, 'optimization': 5072.065117120743, 'imputation': 256.5751883983612}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.03140830993652344, 'optimization': 5072.065117120743, 'imputation': 1478.6712100505829}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.078643798828125, 'optimization': 5072.065117120743, 'imputation': 4524.759085655212}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18721938133239746, 'optimization': 5072.065117120743, 'imputation': 9412.6311917305}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1338764305073745, 'MAE': 0.9621062739244053, 'MI': 0.3598215610903952, 'CORRELATION': 0.025601496823399808}, 'times': {'contamination': 0.0015861988067626953, 'optimization': 4126.59906744957, 'imputation': 342.70579957962036}}, '0.1': {'scores': {'RMSE': 1.0482569597259581, 'MAE': 0.8581623399073744, 'MI': 0.06844413129644446, 'CORRELATION': -0.0014806171782817523}, 'times': {'contamination': 0.0036308765411376953, 'optimization': 4126.59906744957, 'imputation': 343.8899919986725}}, '0.2': {'scores': {'RMSE': 1.108104493013085, 'MAE': 0.8889218664347283, 'MI': 0.01428662029369019, 'CORRELATION': -0.01599849799033619}, 'times': {'contamination': 0.009608030319213867, 'optimization': 4126.59906744957, 'imputation': 337.21221137046814}}, '0.4': {'scores': {'RMSE': 1.0874206799897894, 'MAE': 0.8779506482944299, 'MI': 0.0033213857091975076, 'CORRELATION': -0.020652285279186847}, 'times': {'contamination': 0.031928300857543945, 'optimization': 4126.59906744957, 'imputation': 342.1561577320099}}, '0.6': {'scores': {'RMSE': 1.0761284785747784, 'MAE': 0.8685233944278966, 'MI': 0.0023801932977624415, 'CORRELATION': -0.018156792999867576}, 'times': {'contamination': 0.07982683181762695, 'optimization': 4126.59906744957, 'imputation': 337.27074241638184}}, '0.8': {'scores': {'RMSE': 1.0801585962955265, 'MAE': 0.8717671912922593, 'MI': 0.0011299889668764137, 'CORRELATION': -0.019436732822524068}, 'times': {'contamination': 0.1887671947479248, 'optimization': 4126.59906744957, 'imputation': 350.4762156009674}}}}}}} + run_2_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017461776733398438, 'optimization': 0, 'imputation': 0.001100778579711914}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0038170814514160156, 'optimization': 0, 'imputation': 0.0006277561187744141}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009348630905151367, 'optimization': 0, 'imputation': 0.0006661415100097656}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.03160667419433594, 'optimization': 0, 'imputation': 0.0009412765502929688}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.07952380180358887, 'optimization': 0, 'imputation': 0.001110076904296875}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.18988037109375, 'optimization': 0, 'imputation': 0.0012199878692626953}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.00153350830078125, 'optimization': 222.17338752746582, 'imputation': 0.007252931594848633}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.00762939453125, 'optimization': 222.17338752746582, 'imputation': 0.006178379058837891}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.020302534103393555, 'optimization': 222.17338752746582, 'imputation': 0.006856203079223633}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07085132598876953, 'optimization': 222.17338752746582, 'imputation': 0.006796836853027344}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14228367805480957, 'optimization': 222.17338752746582, 'imputation': 0.006799221038818359}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.24705862998962402, 'optimization': 222.17338752746582, 'imputation': 0.005573272705078125}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.002927064895629883, 'optimization': 109.24887442588806, 'imputation': 10.111968994140625}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.0034782886505126953, 'optimization': 109.24887442588806, 'imputation': 10.12447738647461}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009154081344604492, 'optimization': 109.24887442588806, 'imputation': 10.325854778289795}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.031117677688598633, 'optimization': 109.24887442588806, 'imputation': 11.087183237075806}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07905244827270508, 'optimization': 109.24887442588806, 'imputation': 8.649941444396973}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18860864639282227, 'optimization': 109.24887442588806, 'imputation': 8.43183708190918}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0014863014221191406, 'optimization': 5088.581882238388, 'imputation': 10.688252687454224}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.0037031173706054688, 'optimization': 5088.581882238388, 'imputation': 50.06313109397888}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.00922083854675293, 'optimization': 5088.581882238388, 'imputation': 257.213321685791}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.03141498565673828, 'optimization': 5088.581882238388, 'imputation': 1488.7819337844849}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.07847213745117188, 'optimization': 5088.581882238388, 'imputation': 4525.959330558777}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18671298027038574, 'optimization': 5088.581882238388, 'imputation': 9460.7878510952}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.122220535003296, 'MAE': 0.9644508995813553, 'MI': 0.2759436355942961, 'CORRELATION': 0.09245761750327637}, 'times': {'contamination': 0.0015985965728759766, 'optimization': 4112.733412027359, 'imputation': 338.0099182128906}}, '0.1': {'scores': {'RMSE': 1.0832970108643896, 'MAE': 0.8823888940960694, 'MI': 0.0722893609050923, 'CORRELATION': -0.019930274489311815}, 'times': {'contamination': 0.0035643577575683594, 'optimization': 4112.733412027359, 'imputation': 337.6157658100128}}, '0.2': {'scores': {'RMSE': 1.0767155565632924, 'MAE': 0.8684991669552922, 'MI': 0.009245255133377466, 'CORRELATION': 0.0027516812337193518}, 'times': {'contamination': 0.009638309478759766, 'optimization': 4112.733412027359, 'imputation': 328.19135212898254}}, '0.4': {'scores': {'RMSE': 1.0934522863869605, 'MAE': 0.8840570779852788, 'MI': 0.003369568798431563, 'CORRELATION': -0.021061682051014274}, 'times': {'contamination': 0.03281116485595703, 'optimization': 4112.733412027359, 'imputation': 346.8224673271179}}, '0.6': {'scores': {'RMSE': 1.0783671319985777, 'MAE': 0.8704278560665365, 'MI': 0.00169355769499049, 'CORRELATION': -0.019325646685601}, 'times': {'contamination': 0.08042550086975098, 'optimization': 4112.733412027359, 'imputation': 340.2620213031769}}, '0.8': {'scores': {'RMSE': 1.081513280302422, 'MAE': 0.8746519908670293, 'MI': 0.0011728245783709944, 'CORRELATION': -0.016826349565356294}, 'times': {'contamination': 0.18836283683776855, 'optimization': 4112.733412027359, 'imputation': 341.4021186828613}}}}}}} + run_3_fmri_s = {'fmristoptask': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0591754233439183, 'MAE': 0.8811507908679529, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001561880111694336, 'optimization': 0, 'imputation': 0.0010650157928466797}}, '0.1': {'scores': {'RMSE': 0.9651108444122715, 'MAE': 0.784231196318496, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0035762786865234375, 'optimization': 0, 'imputation': 0.0006108283996582031}}, '0.2': {'scores': {'RMSE': 0.9932773680676918, 'MAE': 0.8034395750738844, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.009912252426147461, 'optimization': 0, 'imputation': 0.000682830810546875}}, '0.4': {'scores': {'RMSE': 1.0058748440484344, 'MAE': 0.8113341021149199, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.033663034439086914, 'optimization': 0, 'imputation': 0.0008401870727539062}}, '0.6': {'scores': {'RMSE': 0.9944066185522102, 'MAE': 0.8023296982336051, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.08425664901733398, 'optimization': 0, 'imputation': 0.0010020732879638672}}, '0.8': {'scores': {'RMSE': 0.9979990505486313, 'MAE': 0.8062359186814159, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.1884922981262207, 'optimization': 0, 'imputation': 0.0009903907775878906}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0815739858856455, 'MAE': 0.8947163048898044, 'MI': 0.23576973507164212, 'CORRELATION': -0.12274682282048005}, 'times': {'contamination': 0.0014843940734863281, 'optimization': 216.33094692230225, 'imputation': 0.006989240646362305}}, '0.1': {'scores': {'RMSE': 0.9695699729418912, 'MAE': 0.7898385707592198, 'MI': 0.06571976951128125, 'CORRELATION': 0.016476991654415008}, 'times': {'contamination': 0.008179664611816406, 'optimization': 216.33094692230225, 'imputation': 0.0062677860260009766}}, '0.2': {'scores': {'RMSE': 1.0023712131611957, 'MAE': 0.8108602788128816, 'MI': 0.02538765630290373, 'CORRELATION': -0.016656543511887868}, 'times': {'contamination': 0.02096843719482422, 'optimization': 216.33094692230225, 'imputation': 0.006853580474853516}}, '0.4': {'scores': {'RMSE': 1.0138537110215022, 'MAE': 0.8167419153197173, 'MI': 0.0038274804707874484, 'CORRELATION': 0.002717578068034049}, 'times': {'contamination': 0.07195258140563965, 'optimization': 216.33094692230225, 'imputation': 0.00666499137878418}}, '0.6': {'scores': {'RMSE': 1.0022937958385385, 'MAE': 0.807293318305244, 'MI': 0.0018376453669024168, 'CORRELATION': 0.004596695453371254}, 'times': {'contamination': 0.14317655563354492, 'optimization': 216.33094692230225, 'imputation': 0.006315708160400391}}, '0.8': {'scores': {'RMSE': 1.0104537937047533, 'MAE': 0.8149091851781165, 'MI': 0.0008945376054130945, 'CORRELATION': -0.0013082054469119196}, 'times': {'contamination': 0.2480306625366211, 'optimization': 216.33094692230225, 'imputation': 0.005487203598022461}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 1.1715750158207363, 'MAE': 0.9389573934580852, 'MI': 0.30612963701823526, 'CORRELATION': -0.22056411372111834}, 'times': {'contamination': 0.0031473636627197266, 'optimization': 110.04800581932068, 'imputation': 10.122947692871094}}, '0.1': {'scores': {'RMSE': 1.0588476372168147, 'MAE': 0.8437403156914149, 'MI': 0.08955991417984446, 'CORRELATION': -0.1963089605999627}, 'times': {'contamination': 0.003419160842895508, 'optimization': 110.04800581932068, 'imputation': 10.181205034255981}}, '0.2': {'scores': {'RMSE': 1.0391969620815695, 'MAE': 0.8364861943065512, 'MI': 0.02582105408815175, 'CORRELATION': -0.09232453336176588}, 'times': {'contamination': 0.009185314178466797, 'optimization': 110.04800581932068, 'imputation': 10.448293685913086}}, '0.4': {'scores': {'RMSE': 1.0340455393837413, 'MAE': 0.832400199311948, 'MI': 0.00520789381175344, 'CORRELATION': -0.04499260926820861}, 'times': {'contamination': 0.030958890914916992, 'optimization': 110.04800581932068, 'imputation': 11.198593139648438}}, '0.6': {'scores': {'RMSE': 4.011139383889788, 'MAE': 3.152797499531786, 'MI': 0.003672509477371519, 'CORRELATION': -0.05413975121078511}, 'times': {'contamination': 0.07897067070007324, 'optimization': 110.04800581932068, 'imputation': 8.581665992736816}}, '0.8': {'scores': {'RMSE': 2.97893158705676, 'MAE': 1.0602936132635719, 'MI': 0.00079094933311715, 'CORRELATION': 0.006947773983399647}, 'times': {'contamination': 0.18915915489196777, 'optimization': 110.04800581932068, 'imputation': 8.440712690353394}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 1.0692148314478316, 'MAE': 0.873400733402723, 'MI': 0.2787388945371119, 'CORRELATION': -0.02021145481191946}, 'times': {'contamination': 0.0015034675598144531, 'optimization': 5124.588714838028, 'imputation': 10.759928226470947}}, '0.1': {'scores': {'RMSE': 0.9719895445677292, 'MAE': 0.7851843420896756, 'MI': 0.0830808565046283, 'CORRELATION': 0.003268635254181307}, 'times': {'contamination': 0.003936767578125, 'optimization': 5124.588714838028, 'imputation': 50.354418992996216}}, '0.2': {'scores': {'RMSE': 0.99753636840165, 'MAE': 0.8012616128674659, 'MI': 0.019093143495502334, 'CORRELATION': 0.02540361203010324}, 'times': {'contamination': 0.009255409240722656, 'optimization': 5124.588714838028, 'imputation': 259.3400568962097}}, '0.4': {'scores': {'RMSE': 1.0155975152475738, 'MAE': 0.8140496119700683, 'MI': 0.004260439955627443, 'CORRELATION': 0.0006423716677864647}, 'times': {'contamination': 0.0312647819519043, 'optimization': 5124.588714838028, 'imputation': 1500.3178548812866}}, '0.6': {'scores': {'RMSE': 1.0040752264526889, 'MAE': 0.8052914143043017, 'MI': 0.0018099723977603893, 'CORRELATION': -0.006621752869444718}, 'times': {'contamination': 0.07852554321289062, 'optimization': 5124.588714838028, 'imputation': 4581.28284406662}}, '0.8': {'scores': {'RMSE': 1.0078811833781343, 'MAE': 0.8090736592195691, 'MI': 0.001033941419470956, 'CORRELATION': -0.003099173821807945}, 'times': {'contamination': 0.18776154518127441, 'optimization': 5124.588714838028, 'imputation': 9590.927385091782}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.146433389804167, 'MAE': 0.9770400477715633, 'MI': 0.3372765709259859, 'CORRELATION': 0.0330859633180261}, 'times': {'contamination': 0.001608133316040039, 'optimization': 4109.78501701355, 'imputation': 347.9514887332916}}, '0.1': {'scores': {'RMSE': 1.0805589422598818, 'MAE': 0.8789487774083494, 'MI': 0.06450452519706741, 'CORRELATION': 0.0050948685955938995}, 'times': {'contamination': 0.0037963390350341797, 'optimization': 4109.78501701355, 'imputation': 342.1326117515564}}, '0.2': {'scores': {'RMSE': 1.113302451577659, 'MAE': 0.8972310309254206, 'MI': 0.013539230335286593, 'CORRELATION': -0.010746184336502297}, 'times': {'contamination': 0.010583162307739258, 'optimization': 4109.78501701355, 'imputation': 347.8061354160309}}, '0.4': {'scores': {'RMSE': 1.1059062825212693, 'MAE': 0.8920096539260874, 'MI': 0.0039427922204060845, 'CORRELATION': -0.021280076256874978}, 'times': {'contamination': 0.03199410438537598, 'optimization': 4109.78501701355, 'imputation': 351.9458327293396}}, '0.6': {'scores': {'RMSE': 1.0740866766668984, 'MAE': 0.8664850080628724, 'MI': 0.0015316126887234942, 'CORRELATION': -0.021487493774034198}, 'times': {'contamination': 0.08084416389465332, 'optimization': 4109.78501701355, 'imputation': 349.9893400669098}}, '0.8': {'scores': {'RMSE': 1.075891210325233, 'MAE': 0.8695393935351904, 'MI': 0.0011319165672490211, 'CORRELATION': -0.017885852991857847}, 'times': {'contamination': 0.19720029830932617, 'optimization': 4109.78501701355, 'imputation': 349.96222448349}}}}}}} + + scores_list, algos, sets = Benchmark().avg_results(run_1_chlorine, run_2_chlorine, run_3_chlorine, run_1_drift, run_2_drift, run_3_drift, run_1_eeg_a, run_2_eeg_a, run_3_eeg_a, run_1_eeg_r, run_2_eeg_r, run_3_eeg_r, run_1_fmri_o, run_2_fmri_o, run_3_fmri_o, run_1_fmri_s, run_2_fmri_s, run_3_fmri_s) + + result = Benchmark().generate_heatmap(scores_list, algos, sets) + + + diff --git a/imputegap/runner_contamination.py b/imputegap/runner_contamination.py index ed426f9..eb29862 100644 --- a/imputegap/runner_contamination.py +++ b/imputegap/runner_contamination.py @@ -8,9 +8,9 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol")) ts_1.normalize(normalizer="min_max") -# 3. contamination of the data with MCAR scenario -infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True) +# 3. contamination of the data with MCAR pattern +incomp_data = ts_1.Contamination.mcar(ts_1.data, series_rate=0.4, missing_rate=0.2, seed=True) # [OPTIONAL] you can plot your raw data / print the contamination -ts_1.print(limit=10) -ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./assets") \ No newline at end of file +ts_1.print(limit_timestamps=12, limit_series=7) +ts_1.plot(ts_1.data, incomp_data, max_series=4, save_path="./assets") diff --git a/imputegap/runner_datasets.py b/imputegap/runner_datasets.py index 4d03375..2302e22 100644 --- a/imputegap/runner_datasets.py +++ b/imputegap/runner_datasets.py @@ -8,10 +8,10 @@ # small one data_n = TimeSeries() data_n.load_timeseries(data=utils.search_path(dataset), max_series=20, max_values=400, header=False) - data_n.plot(raw_data=data_n.data, title="02_"+dataset + " - raw data 20x400", max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) - data_n.plot(raw_data=data_n.data, title="03_"+dataset + " - raw data 01x400", max_series=1, save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, max_series=1, save_path="./dataset/docs/" + dataset + "", display=False) data_n.normalize(normalizer="min_max") - data_n.plot(raw_data=data_n.data, title="04_"+dataset + " - norm min_max data 01x400", max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, max_series=20, save_path="./dataset/docs/" + dataset + "", display=False) # 5x one data_n = TimeSeries() @@ -29,14 +29,14 @@ max_value = 400 data_n.load_timeseries(data=utils.search_path(dataset), max_series=max_series, max_values=max_value, header=False) - data_n.plot(raw_data=data_n.data, title="1_"+dataset + " - raw data - "+str(max_series)+"x"+str(max_value), save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, save_path="./dataset/docs/" + dataset + "", display=False) data_n.normalize(normalizer="min_max") - data_n.plot(raw_data=data_n.data, title="2_" + dataset + " - norm min max "+str(max_series)+"x"+str(max_value), save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, save_path="./dataset/docs/" + dataset + "", display=False) # full one data_n = TimeSeries() data_n.load_timeseries(data=utils.search_path(dataset), header=False) - data_n.plot(raw_data=data_n.data, title="01_"+dataset + " - raw data - NxM", save_path="./dataset/docs/" + dataset + "", display=False) + data_n.plot(input_data=data_n.data, save_path="./dataset/docs/" + dataset + "", display=False) categories, features = Explainer.load_configuration() characteristics, descriptions = Explainer.extract_features(data=data_n.data, features_categories=categories, features_list=features, do_catch24=False) diff --git a/imputegap/runner_explainer.py b/imputegap/runner_explainer.py index 302d64c..f99c0d7 100644 --- a/imputegap/runner_explainer.py +++ b/imputegap/runner_explainer.py @@ -9,7 +9,12 @@ ts_1.load_timeseries(utils.search_path("chlorine")) # 3. call the explanation of your dataset with a specific algorithm to gain insight on the Imputation results -shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, missing_rate=0.25, limitation=50, splitter=35, file_name="chlorine", algorithm="cdrec") +shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, + missing_rate=0.25, + limit_ratio=1, + split_ratio=0.7, + file_name="chlorine", + algorithm="cdrec") # [OPTIONAL] print the results with the impact of each feature. Explainer.print(shap_values, shap_details) \ No newline at end of file diff --git a/imputegap/runner_imputation.py b/imputegap/runner_imputation.py index fbc0e21..ffd4b39 100644 --- a/imputegap/runner_imputation.py +++ b/imputegap/runner_imputation.py @@ -10,10 +10,10 @@ ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +incomp_data = ts_1.Contamination.mcar(ts_1.data) # [OPTIONAL] save your results in a new Time Series object -ts_2 = TimeSeries().import_matrix(infected_data) +ts_2 = TimeSeries().import_matrix(incomp_data) # 4. imputation of the contaminated data # choice of the algorithm, and their parameters (default, automl, or defined by the user) @@ -25,11 +25,11 @@ # >>> cdrec.impute(params={"rank": 5, "epsilon": 0.01, "iterations": 100}) # [OPTIONAL] save your results in a new Time Series object -ts_3 = TimeSeries().import_matrix(cdrec.imputed_matrix) +ts_3 = TimeSeries().import_matrix(cdrec.recov_data) # 5. score the imputation with the raw_data cdrec.score(ts_1.data, ts_3.data) # 6. display the results ts_3.print_results(cdrec.metrics, algorithm="cdrec") -ts_3.plot(raw_data=ts_1.data, infected_data=ts_2.data, imputed_data=ts_3.data, title="imputation", max_series=1, save_path="./assets", display=True) +ts_3.plot(input_data=ts_1.data, incomp_data=ts_2.data, recov_data=ts_3.data, max_series=1, save_path="./assets") diff --git a/imputegap/runner_loading.py b/imputegap/runner_loading.py index b94edb3..9df0ee9 100644 --- a/imputegap/runner_loading.py +++ b/imputegap/runner_loading.py @@ -9,5 +9,5 @@ ts_1.normalize(normalizer="z_score") # [OPTIONAL] you can plot your raw data / print the information -ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets") -ts_1.print(limit=10) \ No newline at end of file +ts_1.plot(input_data=ts_1.data, max_series=10, max_values=100, save_path="./imputegap/assets") +ts_1.print(limit_series=10) \ No newline at end of file diff --git a/imputegap/runner_optimization.py b/imputegap/runner_optimization.py index e3eaa2f..e0f25c4 100644 --- a/imputegap/runner_optimization.py +++ b/imputegap/runner_optimization.py @@ -10,18 +10,18 @@ ts_1.normalize(normalizer="min_max") # 3. contamination of the data -infected_data = ts_1.Contaminate.mcar(ts_1.data) +miss_matrix = ts_1.Contamination.mcar(ts_1.data) # 4. imputation of the contaminated data # imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm -cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 3}}) +cdrec = Imputation.MatrixCompletion.CDRec(miss_matrix).impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 3}}) # 5. score the imputation with the raw_data -cdrec.score(ts_1.data, cdrec.imputed_matrix) +cdrec.score(ts_1.data, cdrec.recov_data) # 6. display the results ts_1.print_results(cdrec.metrics) -ts_1.plot(raw_data=ts_1.data, infected_data=infected_data, imputed_data=cdrec.imputed_matrix, title="imputation", max_series=1, save_path="./assets", display=True) +ts_1.plot(input_data=ts_1.data, incomp_data=miss_matrix, recov_data=cdrec.recov_data, max_series=1, save_path="./assets", display=True) # 7. save hyperparameters utils.save_optimization(optimal_params=cdrec.parameters, algorithm="cdrec", dataset="eeg", optimizer="t") \ No newline at end of file diff --git a/imputegap/tools/__pycache__/utils.cpython-312.pyc b/imputegap/tools/__pycache__/utils.cpython-312.pyc index 354cf48..31921fe 100644 Binary files a/imputegap/tools/__pycache__/utils.cpython-312.pyc and b/imputegap/tools/__pycache__/utils.cpython-312.pyc differ diff --git a/imputegap/tools/utils.py b/imputegap/tools/utils.py index 09eb8a2..8b2e8f9 100644 --- a/imputegap/tools/utils.py +++ b/imputegap/tools/utils.py @@ -140,14 +140,14 @@ def load_parameters(query: str = "default", algorithm: str = "cdrec", dataset: s return (hidden_dim, learning_rate, iterations) elif algorithm == "greedy": n_calls = int(config['greedy']['n_calls']) - selected_metrics = config['greedy']['selected_metrics'] - return (n_calls, [selected_metrics]) + metrics = config['greedy']['metrics'] + return (n_calls, [metrics]) elif algorithm == "bayesian": n_calls = int(config['bayesian']['n_calls']) n_random_starts = int(config['bayesian']['n_random_starts']) acq_func = str(config['bayesian']['acq_func']) - selected_metrics = config['bayesian']['selected_metrics'] - return (n_calls, n_random_starts, acq_func, [selected_metrics]) + metrics = config['bayesian']['metrics'] + return (n_calls, n_random_starts, acq_func, [metrics]) elif algorithm == "pso": n_particles = int(config['pso']['n_particles']) c1 = float(config['pso']['c1']) @@ -155,14 +155,14 @@ def load_parameters(query: str = "default", algorithm: str = "cdrec", dataset: s w = float(config['pso']['w']) iterations = int(config['pso']['iterations']) n_processes = int(config['pso']['n_processes']) - selected_metrics = config['pso']['selected_metrics'] - return (n_particles, c1, c2, w, iterations, n_processes, [selected_metrics]) + metrics = config['pso']['metrics'] + return (n_particles, c1, c2, w, iterations, n_processes, [metrics]) elif algorithm == "sh": num_configs = int(config['sh']['num_configs']) num_iterations = int(config['sh']['num_iterations']) reduction_factor = int(config['sh']['reduction_factor']) - selected_metrics = config['sh']['selected_metrics'] - return (num_configs, num_iterations, reduction_factor, [selected_metrics]) + metrics = config['sh']['metrics'] + return (num_configs, num_iterations, reduction_factor, [metrics]) elif algorithm == "colors": colors = config['colors']['plot'] return colors diff --git a/params/optimal_parameters_e_eeg-alcohol_cdrec.toml b/params/optimal_parameters_e_eeg-alcohol_cdrec.toml index b5c579c..78e041c 100644 --- a/params/optimal_parameters_e_eeg-alcohol_cdrec.toml +++ b/params/optimal_parameters_e_eeg-alcohol_cdrec.toml @@ -1,4 +1,4 @@ [cdrec] -rank = 2 -epsilon = 2.9103006766169248e-5 -iteration = 976 +rank = 9 +epsilon = 0.0010055574182674504 +iteration = 489 diff --git a/params/optimal_parameters_e_eeg-alcohol_stmvl.toml b/params/optimal_parameters_e_eeg-alcohol_stmvl.toml index baa27de..2bf510f 100644 --- a/params/optimal_parameters_e_eeg-alcohol_stmvl.toml +++ b/params/optimal_parameters_e_eeg-alcohol_stmvl.toml @@ -1,4 +1,4 @@ [stmvl] -window_size = 40 -gamma = 0.22504996184885026 -alpha = 6 +window_size = 70 +gamma = 1.0621347452712664e-6 +alpha = 2 diff --git a/requirements.txt b/requirements.txt index b7521bd..9a713ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ pycatch22==0.4.5 scikit-optimize==0.10.2 pyswarms==1.3.0 pytest==8.3.3 +xlsxwriter==3.2.0 types-toml types-setuptools wheel \ No newline at end of file diff --git a/setup.py b/setup.py index 7576956..69761b5 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ include_package_data=True, package_data={ 'imputegap': [ + 'assets/*.png', # Include logo 'env/*.toml', # Include TOML files from env 'params/*.toml', # Include TOML files from params 'dataset/*.txt', # Include TXT files from dataset diff --git a/tests/__pycache__/test_benchmarking.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_benchmarking.cpython-312-pytest-8.3.3.pyc index 7649c4b..65c55e9 100644 Binary files a/tests/__pycache__/test_benchmarking.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_benchmarking.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_contamination_blackout.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_contamination_blackout.cpython-312-pytest-8.3.3.pyc index 93a3e7b..f2c4e64 100644 Binary files a/tests/__pycache__/test_contamination_blackout.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_contamination_blackout.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_contamination_mcar.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_contamination_mcar.cpython-312-pytest-8.3.3.pyc index 7257d8e..819ef17 100644 Binary files a/tests/__pycache__/test_contamination_mcar.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_contamination_mcar.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_contamination_mp.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_contamination_mp.cpython-312-pytest-8.3.3.pyc index 01730a9..54f7f0b 100644 Binary files a/tests/__pycache__/test_contamination_mp.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_contamination_mp.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_exception.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_exception.cpython-312-pytest-8.3.3.pyc index 06b48fe..74356a3 100644 Binary files a/tests/__pycache__/test_exception.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_exception.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_explainer.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_explainer.cpython-312-pytest-8.3.3.pyc index 9bb3468..de56a24 100644 Binary files a/tests/__pycache__/test_explainer.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_explainer.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_imputation.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_imputation.cpython-312-pytest-8.3.3.pyc index 15435cf..d14a711 100644 Binary files a/tests/__pycache__/test_imputation.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_imputation.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_imputation_cdrec.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_imputation_cdrec.cpython-312-pytest-8.3.3.pyc index 6c62056..8393375 100644 Binary files a/tests/__pycache__/test_imputation_cdrec.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_imputation_cdrec.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_imputation_iim.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_imputation_iim.cpython-312-pytest-8.3.3.pyc index c08863b..4fa8990 100644 Binary files a/tests/__pycache__/test_imputation_iim.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_imputation_iim.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_imputation_mrnn.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_imputation_mrnn.cpython-312-pytest-8.3.3.pyc index ca48107..98ce14a 100644 Binary files a/tests/__pycache__/test_imputation_mrnn.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_imputation_mrnn.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_imputation_stmvl.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_imputation_stmvl.cpython-312-pytest-8.3.3.pyc index ce811ba..996a857 100644 Binary files a/tests/__pycache__/test_imputation_stmvl.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_imputation_stmvl.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_loading.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_loading.cpython-312-pytest-8.3.3.pyc index a2427be..a394062 100644 Binary files a/tests/__pycache__/test_loading.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_loading.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_cdrec.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_bayesian_cdrec.cpython-312-pytest-8.3.3.pyc index 8accbf8..7e9b0ce 100644 Binary files a/tests/__pycache__/test_opti_bayesian_cdrec.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_bayesian_cdrec.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_cdrec_eeg.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_bayesian_cdrec_eeg.cpython-312-pytest-8.3.3.pyc index 315c0bf..7b6d6b6 100644 Binary files a/tests/__pycache__/test_opti_bayesian_cdrec_eeg.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_bayesian_cdrec_eeg.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_iim.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_bayesian_iim.cpython-312-pytest-8.3.3.pyc index beb2877..df07886 100644 Binary files a/tests/__pycache__/test_opti_bayesian_iim.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_bayesian_iim.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312-pytest-8.3.3.pyc index 61386eb..0a8b0dc 100644 Binary files a/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_bayesian_mrnn.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312-pytest-8.3.3.pyc index 6388b48..e112783 100644 Binary files a/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_bayesian_stmvl.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_greedy_cdrec.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_greedy_cdrec.cpython-312-pytest-8.3.3.pyc index 07a6100..9b616f2 100644 Binary files a/tests/__pycache__/test_opti_greedy_cdrec.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_greedy_cdrec.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_pso_cdrec.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_pso_cdrec.cpython-312-pytest-8.3.3.pyc index 198b03c..c2e88e5 100644 Binary files a/tests/__pycache__/test_opti_pso_cdrec.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_pso_cdrec.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_opti_sh_cdrec.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_opti_sh_cdrec.cpython-312-pytest-8.3.3.pyc index e4b4406..63a4148 100644 Binary files a/tests/__pycache__/test_opti_sh_cdrec.cpython-312-pytest-8.3.3.pyc and b/tests/__pycache__/test_opti_sh_cdrec.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/__pycache__/test_pipeline.cpython-312-pytest-8.3.3.pyc b/tests/__pycache__/test_pipeline.cpython-312-pytest-8.3.3.pyc new file mode 100644 index 0000000..31e009e Binary files /dev/null and b/tests/__pycache__/test_pipeline.cpython-312-pytest-8.3.3.pyc differ diff --git a/tests/assets/25_01_07_16_44_36_plot.jpg b/tests/assets/25_01_07_16_44_36_plot.jpg new file mode 100644 index 0000000..25bbbdd Binary files /dev/null and b/tests/assets/25_01_07_16_44_36_plot.jpg differ diff --git a/tests/assets/25_01_07_16_52_29_plot.jpg b/tests/assets/25_01_07_16_52_29_plot.jpg new file mode 100644 index 0000000..6636b9b Binary files /dev/null and b/tests/assets/25_01_07_16_52_29_plot.jpg differ diff --git a/tests/assets/shap/chlorine_cdrec_DTL_Beeswarm.png b/tests/assets/shap/chlorine_cdrec_DTL_Beeswarm.png index 1b94847..8390caf 100644 Binary files a/tests/assets/shap/chlorine_cdrec_DTL_Beeswarm.png and b/tests/assets/shap/chlorine_cdrec_DTL_Beeswarm.png differ diff --git a/tests/assets/shap/chlorine_cdrec_DTL_Waterfall.png b/tests/assets/shap/chlorine_cdrec_DTL_Waterfall.png index 826dda3..078055d 100644 Binary files a/tests/assets/shap/chlorine_cdrec_DTL_Waterfall.png and b/tests/assets/shap/chlorine_cdrec_DTL_Waterfall.png differ diff --git a/tests/assets/shap/chlorine_cdrec_results.txt b/tests/assets/shap/chlorine_cdrec_results.txt index 2265b48..bccd693 100644 --- a/tests/assets/shap/chlorine_cdrec_results.txt +++ b/tests/assets/shap/chlorine_cdrec_results.txt @@ -1,22 +1,22 @@ -Feature : 1 cdrec with a score of 90.54 Geometry 10-bin histogram mode DN_HistogramMode_10 -Feature : 12 cdrec with a score of 3.99 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat -Feature : 5 cdrec with a score of 3.83 Correlation Time reversibility CO_trev_1_num -Feature : 18 cdrec with a score of 0.57 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 -Feature : 13 cdrec with a score of 0.37 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd -Feature : 3 cdrec with a score of 0.33 Correlation First minimum of the ACF CO_FirstMin_ac -Feature : 14 cdrec with a score of 0.29 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd -Feature : 6 cdrec with a score of 0.09 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40 -Feature : 0 cdrec with a score of 0.0 Geometry 5-bin histogram mode DN_HistogramMode_5 -Feature : 2 cdrec with a score of 0.0 Correlation First 1/e crossing of the ACF CO_f1ecac -Feature : 4 cdrec with a score of 0.0 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5 +Feature : 1 cdrec with a score of 68.3 Geometry 10-bin histogram mode DN_HistogramMode_10 +Feature : 12 cdrec with a score of 9.34 Correlation Change in autocorrelation timescale after incremental differencing FC_LocalSimple_mean1_tauresrat +Feature : 5 cdrec with a score of 4.68 Correlation Time reversibility CO_trev_1_num +Feature : 6 cdrec with a score of 4.63 Geometry Proportion of high incremental changes in the series MD_hrv_classic_pnn40 +Feature : 13 cdrec with a score of 3.38 Geometry Positive outlier timing DN_OutlierInclude_p_001_mdrmd +Feature : 4 cdrec with a score of 2.68 Correlation Histogram-based automutual information (lag 2, 5 bins) CO_HistogramAMI_even_2_5 +Feature : 2 cdrec with a score of 2.28 Correlation First 1/e crossing of the ACF CO_f1ecac +Feature : 14 cdrec with a score of 1.37 Geometry Negative outlier timing DN_OutlierInclude_n_001_mdrmd +Feature : 17 cdrec with a score of 0.66 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh +Feature : 0 cdrec with a score of 0.65 Geometry 5-bin histogram mode DN_HistogramMode_5 +Feature : 21 cdrec with a score of 0.62 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr +Feature : 8 cdrec with a score of 0.5 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov +Feature : 10 cdrec with a score of 0.5 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff +Feature : 15 cdrec with a score of 0.42 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1 +Feature : 3 cdrec with a score of 0.0 Correlation First minimum of the ACF CO_FirstMin_ac Feature : 7 cdrec with a score of 0.0 Geometry Longest stretch of above-mean values SB_BinaryStats_mean_longstretch1 -Feature : 8 cdrec with a score of 0.0 Geometry Transition matrix column variance SB_TransitionMatrix_3ac_sumdiagcov Feature : 9 cdrec with a score of 0.0 Trend Wangs periodicity metric PD_PeriodicityWang_th0_01 -Feature : 10 cdrec with a score of 0.0 Geometry Goodness of exponential fit to embedding distance distribution CO_Embed2_Dist_tau_d_expfit_meandiff Feature : 11 cdrec with a score of 0.0 Correlation First minimum of the AMI function IN_AutoMutualInfoStats_40_gaussian_fmmi -Feature : 15 cdrec with a score of 0.0 Transformation Power in the lowest 20% of frequencies SP_Summaries_welch_rect_area_5_1 Feature : 16 cdrec with a score of 0.0 Geometry Longest stretch of decreasing values SB_BinaryStats_diff_longstretch0 -Feature : 17 cdrec with a score of 0.0 Trend Entropy of successive pairs in symbolized series SB_MotifThree_quantile_hh +Feature : 18 cdrec with a score of 0.0 Geometry Rescaled range fluctuation analysis (low-scale scaling) SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 Feature : 19 cdrec with a score of 0.0 Geometry Detrended fluctuation analysis (low-scale scaling) SC_FluctAnal_2_dfa_50_1_2_logi_prop_r1 Feature : 20 cdrec with a score of 0.0 Transformation Centroid frequency SP_Summaries_welch_rect_centroid -Feature : 21 cdrec with a score of 0.0 Trend Error of 3-point rolling mean forecast FC_LocalSimple_mean3_stderr diff --git a/tests/assets/shap/chlorine_cdrec_shap_aggregate_plot.png b/tests/assets/shap/chlorine_cdrec_shap_aggregate_plot.png index 136ae00..67e9d33 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_aggregate_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_aggregate_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png b/tests/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png index 842ad10..e7996b2 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_aggregate_reverse_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_correlation_plot.png b/tests/assets/shap/chlorine_cdrec_shap_correlation_plot.png index e83e663..52b72b1 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_correlation_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_correlation_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_geometry_plot.png b/tests/assets/shap/chlorine_cdrec_shap_geometry_plot.png index f0eca2d..032a72f 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_geometry_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_geometry_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_plot.png b/tests/assets/shap/chlorine_cdrec_shap_plot.png index b64bb11..ea1d7ef 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_reverse_plot.png b/tests/assets/shap/chlorine_cdrec_shap_reverse_plot.png index 434a293..6928111 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_reverse_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_reverse_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_transformation_plot.png b/tests/assets/shap/chlorine_cdrec_shap_transformation_plot.png index 9e1c31a..7ba52da 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_transformation_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_transformation_plot.png differ diff --git a/tests/assets/shap/chlorine_cdrec_shap_trend_plot.png b/tests/assets/shap/chlorine_cdrec_shap_trend_plot.png index 75a03f7..238b2e1 100644 Binary files a/tests/assets/shap/chlorine_cdrec_shap_trend_plot.png and b/tests/assets/shap/chlorine_cdrec_shap_trend_plot.png differ diff --git a/tests/params/optimal_parameters_t_eeg_cdrec.toml b/tests/params/optimal_parameters_t_eeg_cdrec.toml new file mode 100644 index 0000000..e47a8de --- /dev/null +++ b/tests/params/optimal_parameters_t_eeg_cdrec.toml @@ -0,0 +1,4 @@ +[cdrec] +rank = 6 +epsilon = 7.662399122383144e-5 +iteration = 329 diff --git a/tests/report.log b/tests/report.log index 22a03f8..a2fbf2c 100644 --- a/tests/report.log +++ b/tests/report.log @@ -1024,3 +1024,1527 @@ 0.00000000e+00, 0.00000000e+00, 5.30719090e-05, 0.00000000e+00]) 2024-11-19 14:01:18,837 - pyswarms.single.global_best - INFO - Optimize for 10 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9} 2024-11-19 14:01:19,620 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 2.919337618172575, best pos: [3.00104154e+00 2.62311705e-01 5.20573580e+02] +2024-12-03 14:47:57,917 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:57,919 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:47:57,921 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:47:57,937 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:47:57,970 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2024-12-03 14:47:57,972 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2024-12-03 14:47:58,029 - shap - INFO - phi = array([ 0.00000000e+00, -1.50023544e-02, 0.00000000e+00, -2.02580848e-05, + 0.00000000e+00, -5.09701368e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 3.61519831e-04, -5.85833154e-05, -1.95096124e-05, + 0.00000000e+00, 0.00000000e+00, 4.17635045e-05, 0.00000000e+00]) +2024-12-03 14:47:58,057 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:58,058 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:47:58,059 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:47:58,103 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:47:58,136 - shap - INFO - np.sum(w_aug) = 16.0 +2024-12-03 14:47:58,138 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:47:58,170 - shap - INFO - phi = array([ 0.00000000e+00, 6.98755365e-03, 0.00000000e+00, 5.50026649e-05, + 0.00000000e+00, 3.23477891e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.79513972e-04, 2.95546563e-05, -1.96320751e-05, + 0.00000000e+00, 0.00000000e+00, 3.64300667e-05, 0.00000000e+00]) +2024-12-03 14:47:58,189 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:58,190 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:47:58,191 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:47:58,238 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:47:58,280 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2024-12-03 14:47:58,282 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2024-12-03 14:47:58,310 - shap - INFO - phi = array([ 0.00000000e+00, 7.83514370e-03, 0.00000000e+00, 5.74602732e-05, + 0.00000000e+00, 3.20042444e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.79854610e-04, 3.35067254e-05, -2.12039317e-05, + 0.00000000e+00, 0.00000000e+00, 3.39326044e-05, 0.00000000e+00]) +2024-12-03 14:47:58,328 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:58,330 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:47:58,331 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:47:58,381 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:47:58,426 - shap - INFO - np.sum(w_aug) = 16.0 +2024-12-03 14:47:58,428 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:47:58,460 - shap - INFO - phi = array([ 0.00000000e+00, 7.83300758e-03, 0.00000000e+00, -3.77635568e-05, + 0.00000000e+00, 3.22798631e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.06331673e-04, 3.42744327e-05, -2.10990653e-05, + 0.00000000e+00, 0.00000000e+00, 3.66266024e-05, 0.00000000e+00]) +2024-12-03 14:47:58,515 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:58,519 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:47:58,522 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:47:58,581 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:47:58,642 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2024-12-03 14:47:58,645 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2024-12-03 14:47:58,775 - shap - INFO - phi = array([ 0.00000000e+00, 7.85856112e-03, 0.00000000e+00, -3.68080669e-05, + 0.00000000e+00, 3.21980994e-04, -3.16954647e-05, 0.00000000e+00, + 0.00000000e+00, 2.10353156e-04, 3.22765781e-05, -1.67189690e-05, + 0.00000000e+00, 0.00000000e+00, 3.62269527e-05, 0.00000000e+00]) +2024-12-03 14:47:58,807 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:58,817 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:47:58,819 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:47:58,883 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:47:58,943 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2024-12-03 14:47:58,946 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:47:59,136 - shap - INFO - phi = array([ 0.00000000e+00, -1.54999197e-02, 0.00000000e+00, -1.76832343e-05, + 0.00000000e+00, -7.80192307e-04, 3.02281224e-05, 0.00000000e+00, + 0.00000000e+00, -1.35021310e-03, -5.77968265e-05, 9.95378354e-05, + 0.00000000e+00, 0.00000000e+00, -1.95827178e-04, 0.00000000e+00]) +2024-12-03 14:47:59,180 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:59,187 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2024-12-03 14:47:59,195 - shap - INFO - num_paired_subset_sizes = 8 +2024-12-03 14:47:59,240 - shap - INFO - weight_left = 0.528623703595323 +2024-12-03 14:47:59,300 - shap - INFO - np.sum(w_aug) = 17.999999999999996 +2024-12-03 14:47:59,303 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999999 +2024-12-03 14:47:59,496 - shap - INFO - phi = array([ 0.00000000e+00, 8.73719721e-03, 8.70989873e-05, 0.00000000e+00, + 3.82326113e-03, 1.31848484e-02, 5.20288498e-03, 9.31428142e-03, + 4.69081262e-03, 2.61527800e-03, 1.35926816e-02, 6.77844836e-04, + 1.74856500e-02, -1.47938398e-03, 6.19260167e-03, 2.13404411e-05, + -8.58253570e-05, 2.94089462e-03]) +2024-12-03 14:47:59,534 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:59,537 - shap - INFO - remaining_weight_vector = array([0.21046803, 0.16837443, 0.14432094, 0.12951879, 0.12026745, + 0.11480075, 0.11224962]) +2024-12-03 14:47:59,537 - shap - INFO - num_paired_subset_sizes = 9 +2024-12-03 14:47:59,582 - shap - INFO - weight_left = 0.5381032434909889 +2024-12-03 14:47:59,618 - shap - INFO - np.sum(w_aug) = 19.0 +2024-12-03 14:47:59,620 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2024-12-03 14:47:59,683 - shap - INFO - phi = array([-4.27027606e-04, 6.05087701e-03, 1.50737404e-03, -6.98188770e-05, + 4.15252868e-03, -1.19120284e-04, 4.61029650e-03, 2.89296701e-03, + 3.16197712e-03, 1.08629331e-02, 4.47721753e-04, 1.13389074e-02, + -3.31697616e-04, 3.70992275e-03, -3.04629404e-03, -1.59238173e-03, + -1.83253053e-03, 3.71123323e-04, 1.31799752e-03]) +2024-12-03 14:47:59,709 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:59,713 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2024-12-03 14:47:59,715 - shap - INFO - num_paired_subset_sizes = 8 +2024-12-03 14:47:59,767 - shap - INFO - weight_left = 0.528623703595323 +2024-12-03 14:47:59,817 - shap - INFO - np.sum(w_aug) = 18.0 +2024-12-03 14:47:59,819 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2024-12-03 14:47:59,860 - shap - INFO - phi = array([-0.00999016, -0.01238472, 0.00114272, 0.00017878, 0.00491261, + 0.00068625, 0.00612538, 0. , 0.00258732, 0.01295154, + 0.00075907, 0.01299131, -0.00110445, 0.00643902, -0.00222172, + 0. , 0.00056204, 0.00261269]) +2024-12-03 14:47:59,889 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:47:59,891 - shap - INFO - remaining_weight_vector = array([0.21046803, 0.16837443, 0.14432094, 0.12951879, 0.12026745, + 0.11480075, 0.11224962]) +2024-12-03 14:47:59,892 - shap - INFO - num_paired_subset_sizes = 9 +2024-12-03 14:47:59,948 - shap - INFO - weight_left = 0.5381032434909889 +2024-12-03 14:47:59,990 - shap - INFO - np.sum(w_aug) = 19.0 +2024-12-03 14:47:59,991 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999996 +2024-12-03 14:48:00,022 - shap - INFO - phi = array([-9.31286008e-03, -1.31748326e-02, 1.29378700e-03, 1.69462623e-04, + 4.55101767e-03, 3.09714247e-04, 6.49898379e-03, 0.00000000e+00, + 2.49537394e-03, 5.44153541e-03, 9.05871572e-04, 4.47591939e-05, + -1.78593088e-04, 6.48523956e-03, -1.85529336e-03, -4.81050964e-05, + -4.71326274e-04, 1.19859543e-03, 2.93209247e-03]) +2024-12-03 14:48:00,044 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:48:00,046 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2024-12-03 14:48:00,047 - shap - INFO - num_paired_subset_sizes = 8 +2024-12-03 14:48:00,094 - shap - INFO - weight_left = 0.5181019626448611 +2024-12-03 14:48:00,144 - shap - INFO - np.sum(w_aug) = 17.000000000000007 +2024-12-03 14:48:00,146 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:48:00,180 - shap - INFO - phi = array([-0.01516889, -0.02375469, 0.00212133, -0.00010974, 0.00498366, + 0.00019539, 0.00562799, -0.00049359, 0.00024028, 0.00213718, + -0.00126649, -0.00473163, -0.00189329, 0.00249173, -0.00193585, + -0.00016726, 0.00179716]) +2024-12-03 14:48:00,202 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:48:00,203 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:48:00,204 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:48:00,264 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:48:00,317 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2024-12-03 14:48:00,319 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2024-12-03 14:48:00,351 - shap - INFO - phi = array([-0.01549215, -0.0247453 , 0.00192913, -0.00018629, 0.00321626, + -0.00060339, 0.0020008 , -0.000509 , 0.00221582, -0.00186174, + -0.00520739, -0.00220376, 0.0011295 , -0.00219331, -0.00024098, + 0.00131353]) +2024-12-03 14:48:00,379 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:48:00,381 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:48:00,383 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:48:00,460 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:48:00,526 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2024-12-03 14:48:00,531 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:48:00,618 - shap - INFO - phi = array([-1.80105641e-02, -3.21919336e-02, 3.95965453e-04, -1.83903487e-05, + 1.29916230e-03, -7.92199895e-04, 4.29598682e-04, -4.15250765e-04, + 9.51280340e-04, -3.07515091e-03, -5.30119057e-03, -9.01749893e-03, + 8.06046464e-05, -2.29043339e-03, -9.87074497e-05, -1.69110457e-04]) +2024-12-03 14:48:00,714 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:48:00,720 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:48:00,724 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:48:00,752 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:48:00,818 - shap - INFO - np.sum(w_aug) = 16.0 +2024-12-03 14:48:00,869 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:48:01,122 - shap - INFO - phi = array([-1.72785041e-02, -3.45600067e-02, 0.00000000e+00, -9.32098977e-05, + 1.65728458e-04, -6.49205028e-04, -8.24981328e-05, 4.10920449e-03, + -1.10003219e-04, 0.00000000e+00, -5.15045042e-03, -7.98900699e-03, + 0.00000000e+00, 0.00000000e+00, -8.28004351e-05, -1.51021332e-04]) +2024-12-03 14:48:01,200 - shap - INFO - num_full_subsets = 2 +2024-12-03 14:48:01,204 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2024-12-03 14:48:01,207 - shap - INFO - num_paired_subset_sizes = 7 +2024-12-03 14:48:01,234 - shap - INFO - weight_left = 0.5063344810024111 +2024-12-03 14:48:01,304 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2024-12-03 14:48:01,306 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2024-12-03 14:48:01,483 - shap - INFO - phi = array([ 0.00000000e+00, -1.58921592e-02, 0.00000000e+00, -2.01331423e-05, + -5.04716525e-06, -5.07116946e-04, 2.85889491e-05, -5.10054262e-04, + 0.00000000e+00, 3.59597734e-04, -5.66061833e-05, -2.68997150e-05, + 0.00000000e+00, 0.00000000e+00, 5.30719090e-05, 0.00000000e+00]) +2024-12-03 14:52:09,112 - pyswarms.single.global_best - INFO - Optimize for 10 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9} +2024-12-03 14:52:11,562 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 2.919337618172575, best pos: [3.00104154e+00 2.62311705e-01 5.20573580e+02] +2025-01-06 13:06:32,043 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:32,047 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:32,051 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:32,081 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:32,138 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:06:32,140 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-06 13:06:32,217 - shap - INFO - phi = array([ 0.00000000e+00, -1.50023544e-02, 0.00000000e+00, -2.02580848e-05, + 0.00000000e+00, -5.09701368e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 3.61519831e-04, -5.85833154e-05, -1.95096124e-05, + 0.00000000e+00, 0.00000000e+00, 4.17635045e-05, 0.00000000e+00]) +2025-01-06 13:06:32,249 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:32,262 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:32,267 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:32,323 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:32,397 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-06 13:06:32,409 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:32,510 - shap - INFO - phi = array([ 0.00000000e+00, 6.98755365e-03, 0.00000000e+00, 5.50026649e-05, + 0.00000000e+00, 3.23477891e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.79513972e-04, 2.95546563e-05, -1.96320751e-05, + 0.00000000e+00, 0.00000000e+00, 3.64300667e-05, 0.00000000e+00]) +2025-01-06 13:06:32,557 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:32,568 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:32,574 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:32,617 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:32,695 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:06:32,701 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-06 13:06:32,855 - shap - INFO - phi = array([ 0.00000000e+00, 7.83514370e-03, 0.00000000e+00, 5.74602732e-05, + 0.00000000e+00, 3.20042444e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.79854610e-04, 3.35067254e-05, -2.12039317e-05, + 0.00000000e+00, 0.00000000e+00, 3.39326044e-05, 0.00000000e+00]) +2025-01-06 13:06:32,882 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:32,908 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:32,918 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:32,965 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:33,034 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-06 13:06:33,041 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:33,237 - shap - INFO - phi = array([ 0.00000000e+00, 7.83300758e-03, 0.00000000e+00, -3.77635568e-05, + 0.00000000e+00, 3.22798631e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.06331673e-04, 3.42744327e-05, -2.10990653e-05, + 0.00000000e+00, 0.00000000e+00, 3.66266024e-05, 0.00000000e+00]) +2025-01-06 13:06:33,310 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:33,318 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:33,323 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:33,359 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:33,414 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-06 13:06:33,416 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-06 13:06:33,461 - shap - INFO - phi = array([ 0.00000000e+00, 7.85856112e-03, 0.00000000e+00, -3.68080669e-05, + 0.00000000e+00, 3.21980994e-04, -3.16954647e-05, 0.00000000e+00, + 0.00000000e+00, 2.10353156e-04, 3.22765781e-05, -1.67189690e-05, + 0.00000000e+00, 0.00000000e+00, 3.62269527e-05, 0.00000000e+00]) +2025-01-06 13:06:33,482 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:33,498 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:33,499 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:33,571 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:33,655 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-06 13:06:33,658 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:33,698 - shap - INFO - phi = array([ 0.00000000e+00, -1.54999197e-02, 0.00000000e+00, -1.76832343e-05, + 0.00000000e+00, -7.80192307e-04, 3.02281224e-05, 0.00000000e+00, + 0.00000000e+00, -1.35021310e-03, -5.77968265e-05, 9.95378354e-05, + 0.00000000e+00, 0.00000000e+00, -1.95827178e-04, 0.00000000e+00]) +2025-01-06 13:06:33,747 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:33,759 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2025-01-06 13:06:33,761 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-06 13:06:33,818 - shap - INFO - weight_left = 0.528623703595323 +2025-01-06 13:06:33,893 - shap - INFO - np.sum(w_aug) = 17.999999999999996 +2025-01-06 13:06:33,895 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999999 +2025-01-06 13:06:33,952 - shap - INFO - phi = array([ 0.00000000e+00, 8.73719721e-03, 8.70989873e-05, 0.00000000e+00, + 3.82326113e-03, 1.31848484e-02, 5.20288498e-03, 9.31428142e-03, + 4.69081262e-03, 2.61527800e-03, 1.35926816e-02, 6.77844836e-04, + 1.74856500e-02, -1.47938398e-03, 6.19260167e-03, 2.13404411e-05, + -8.58253570e-05, 2.94089462e-03]) +2025-01-06 13:06:34,005 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:34,015 - shap - INFO - remaining_weight_vector = array([0.21046803, 0.16837443, 0.14432094, 0.12951879, 0.12026745, + 0.11480075, 0.11224962]) +2025-01-06 13:06:34,019 - shap - INFO - num_paired_subset_sizes = 9 +2025-01-06 13:06:34,069 - shap - INFO - weight_left = 0.5381032434909889 +2025-01-06 13:06:34,143 - shap - INFO - np.sum(w_aug) = 19.0 +2025-01-06 13:06:34,146 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2025-01-06 13:06:34,301 - shap - INFO - phi = array([-4.27027606e-04, 6.05087701e-03, 1.50737404e-03, -6.98188770e-05, + 4.15252868e-03, -1.19120284e-04, 4.61029650e-03, 2.89296701e-03, + 3.16197712e-03, 1.08629331e-02, 4.47721753e-04, 1.13389074e-02, + -3.31697616e-04, 3.70992275e-03, -3.04629404e-03, -1.59238173e-03, + -1.83253053e-03, 3.71123323e-04, 1.31799752e-03]) +2025-01-06 13:06:34,348 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:34,367 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2025-01-06 13:06:34,378 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-06 13:06:34,418 - shap - INFO - weight_left = 0.528623703595323 +2025-01-06 13:06:34,539 - shap - INFO - np.sum(w_aug) = 18.0 +2025-01-06 13:06:34,545 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2025-01-06 13:06:34,614 - shap - INFO - phi = array([-0.00999016, -0.01238472, 0.00114272, 0.00017878, 0.00491261, + 0.00068625, 0.00612538, 0. , 0.00258732, 0.01295154, + 0.00075907, 0.01299131, -0.00110445, 0.00643902, -0.00222172, + 0. , 0.00056204, 0.00261269]) +2025-01-06 13:06:34,721 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:34,729 - shap - INFO - remaining_weight_vector = array([0.21046803, 0.16837443, 0.14432094, 0.12951879, 0.12026745, + 0.11480075, 0.11224962]) +2025-01-06 13:06:34,737 - shap - INFO - num_paired_subset_sizes = 9 +2025-01-06 13:06:34,846 - shap - INFO - weight_left = 0.5381032434909889 +2025-01-06 13:06:34,951 - shap - INFO - np.sum(w_aug) = 19.0 +2025-01-06 13:06:34,958 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999996 +2025-01-06 13:06:35,243 - shap - INFO - phi = array([-9.31286008e-03, -1.31748326e-02, 1.29378700e-03, 1.69462623e-04, + 4.55101767e-03, 3.09714247e-04, 6.49898379e-03, 0.00000000e+00, + 2.49537394e-03, 5.44153541e-03, 9.05871572e-04, 4.47591939e-05, + -1.78593088e-04, 6.48523956e-03, -1.85529336e-03, -4.81050964e-05, + -4.71326274e-04, 1.19859543e-03, 2.93209247e-03]) +2025-01-06 13:06:35,319 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:35,328 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-06 13:06:35,333 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-06 13:06:35,375 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-06 13:06:35,500 - shap - INFO - np.sum(w_aug) = 17.000000000000007 +2025-01-06 13:06:35,507 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:35,661 - shap - INFO - phi = array([-0.01516889, -0.02375469, 0.00212133, -0.00010974, 0.00498366, + 0.00019539, 0.00562799, -0.00049359, 0.00024028, 0.00213718, + -0.00126649, -0.00473163, -0.00189329, 0.00249173, -0.00193585, + -0.00016726, 0.00179716]) +2025-01-06 13:06:35,718 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:35,751 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:35,760 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:35,800 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:35,887 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:06:35,891 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-06 13:06:36,056 - shap - INFO - phi = array([-0.01549215, -0.0247453 , 0.00192913, -0.00018629, 0.00321626, + -0.00060339, 0.0020008 , -0.000509 , 0.00221582, -0.00186174, + -0.00520739, -0.00220376, 0.0011295 , -0.00219331, -0.00024098, + 0.00131353]) +2025-01-06 13:06:36,115 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:36,125 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:36,139 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:36,177 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:36,242 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-06 13:06:36,247 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:36,320 - shap - INFO - phi = array([-1.80105641e-02, -3.21919336e-02, 3.95965453e-04, -1.83903487e-05, + 1.29916230e-03, -7.92199895e-04, 4.29598682e-04, -4.15250765e-04, + 9.51280340e-04, -3.07515091e-03, -5.30119057e-03, -9.01749893e-03, + 8.06046464e-05, -2.29043339e-03, -9.87074497e-05, -1.69110457e-04]) +2025-01-06 13:06:36,379 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:36,390 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:36,393 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:36,435 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:36,506 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-06 13:06:36,514 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:36,665 - shap - INFO - phi = array([-1.72785041e-02, -3.45600067e-02, 0.00000000e+00, -9.32098977e-05, + 1.65728458e-04, -6.49205028e-04, -8.24981328e-05, 4.10920449e-03, + -1.10003219e-04, 0.00000000e+00, -5.15045042e-03, -7.98900699e-03, + 0.00000000e+00, 0.00000000e+00, -8.28004351e-05, -1.51021332e-04]) +2025-01-06 13:06:36,699 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:06:36,714 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:06:36,729 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:06:36,775 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:06:36,839 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:06:36,844 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:06:36,977 - shap - INFO - phi = array([ 0.00000000e+00, -1.58921592e-02, 0.00000000e+00, -2.01331423e-05, + -5.04716525e-06, -5.07116946e-04, 2.85889491e-05, -5.10054262e-04, + 0.00000000e+00, 3.59597734e-04, -5.66061833e-05, -2.68997150e-05, + 0.00000000e+00, 0.00000000e+00, 5.30719090e-05, 0.00000000e+00]) +2025-01-06 13:13:42,268 - pyswarms.single.global_best - INFO - Optimize for 10 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9} +2025-01-06 13:13:44,377 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 2.919337618172575, best pos: [3.00104154e+00 2.62311705e-01 5.20573580e+02] +2025-01-06 13:43:27,077 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:27,084 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:27,089 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:27,121 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:27,203 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:43:27,208 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-06 13:43:27,466 - shap - INFO - phi = array([ 0.00000000e+00, -1.50023544e-02, 0.00000000e+00, -2.02580848e-05, + 0.00000000e+00, -5.09701368e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 3.61519831e-04, -5.85833154e-05, -1.95096124e-05, + 0.00000000e+00, 0.00000000e+00, 4.17635045e-05, 0.00000000e+00]) +2025-01-06 13:43:27,538 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:27,545 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:27,552 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:27,612 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:27,719 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-06 13:43:27,724 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:27,821 - shap - INFO - phi = array([ 0.00000000e+00, 6.98755365e-03, 0.00000000e+00, 5.50026649e-05, + 0.00000000e+00, 3.23477891e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.79513972e-04, 2.95546563e-05, -1.96320751e-05, + 0.00000000e+00, 0.00000000e+00, 3.64300667e-05, 0.00000000e+00]) +2025-01-06 13:43:27,904 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:27,911 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:27,918 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:27,962 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:28,049 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:43:28,055 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-06 13:43:28,201 - shap - INFO - phi = array([ 0.00000000e+00, 7.83514370e-03, 0.00000000e+00, 5.74602732e-05, + 0.00000000e+00, 3.20042444e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.79854610e-04, 3.35067254e-05, -2.12039317e-05, + 0.00000000e+00, 0.00000000e+00, 3.39326044e-05, 0.00000000e+00]) +2025-01-06 13:43:28,238 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:28,245 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:28,252 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:28,326 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:28,413 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-06 13:43:28,418 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:28,498 - shap - INFO - phi = array([ 0.00000000e+00, 7.83300758e-03, 0.00000000e+00, -3.77635568e-05, + 0.00000000e+00, 3.22798631e-04, 0.00000000e+00, 0.00000000e+00, + 0.00000000e+00, 2.06331673e-04, 3.42744327e-05, -2.10990653e-05, + 0.00000000e+00, 0.00000000e+00, 3.66266024e-05, 0.00000000e+00]) +2025-01-06 13:43:28,543 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:28,563 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:28,567 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:28,616 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:28,682 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-06 13:43:28,688 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-06 13:43:28,834 - shap - INFO - phi = array([ 0.00000000e+00, 7.85856112e-03, 0.00000000e+00, -3.68080669e-05, + 0.00000000e+00, 3.21980994e-04, -3.16954647e-05, 0.00000000e+00, + 0.00000000e+00, 2.10353156e-04, 3.22765781e-05, -1.67189690e-05, + 0.00000000e+00, 0.00000000e+00, 3.62269527e-05, 0.00000000e+00]) +2025-01-06 13:43:28,879 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:28,888 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:28,895 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:28,942 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:29,020 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-06 13:43:29,024 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:29,101 - shap - INFO - phi = array([ 0.00000000e+00, -1.54999197e-02, 0.00000000e+00, -1.76832343e-05, + 0.00000000e+00, -7.80192307e-04, 3.02281224e-05, 0.00000000e+00, + 0.00000000e+00, -1.35021310e-03, -5.77968265e-05, 9.95378354e-05, + 0.00000000e+00, 0.00000000e+00, -1.95827178e-04, 0.00000000e+00]) +2025-01-06 13:43:29,142 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:29,153 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2025-01-06 13:43:29,157 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-06 13:43:29,208 - shap - INFO - weight_left = 0.528623703595323 +2025-01-06 13:43:29,282 - shap - INFO - np.sum(w_aug) = 17.999999999999996 +2025-01-06 13:43:29,288 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999999 +2025-01-06 13:43:29,476 - shap - INFO - phi = array([ 0.00000000e+00, 8.73719721e-03, 8.70989873e-05, 0.00000000e+00, + 3.82326113e-03, 1.31848484e-02, 5.20288498e-03, 9.31428142e-03, + 4.69081262e-03, 2.61527800e-03, 1.35926816e-02, 6.77844836e-04, + 1.74856500e-02, -1.47938398e-03, 6.19260167e-03, 2.13404411e-05, + -8.58253570e-05, 2.94089462e-03]) +2025-01-06 13:43:29,523 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:29,532 - shap - INFO - remaining_weight_vector = array([0.21046803, 0.16837443, 0.14432094, 0.12951879, 0.12026745, + 0.11480075, 0.11224962]) +2025-01-06 13:43:29,541 - shap - INFO - num_paired_subset_sizes = 9 +2025-01-06 13:43:29,586 - shap - INFO - weight_left = 0.5381032434909889 +2025-01-06 13:43:29,672 - shap - INFO - np.sum(w_aug) = 19.0 +2025-01-06 13:43:29,677 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2025-01-06 13:43:29,778 - shap - INFO - phi = array([-4.27027606e-04, 6.05087701e-03, 1.50737404e-03, -6.98188770e-05, + 4.15252868e-03, -1.19120284e-04, 4.61029650e-03, 2.89296701e-03, + 3.16197712e-03, 1.08629331e-02, 4.47721753e-04, 1.13389074e-02, + -3.31697616e-04, 3.70992275e-03, -3.04629404e-03, -1.59238173e-03, + -1.83253053e-03, 3.71123323e-04, 1.31799752e-03]) +2025-01-06 13:43:29,813 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:29,823 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2025-01-06 13:43:29,830 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-06 13:43:29,884 - shap - INFO - weight_left = 0.528623703595323 +2025-01-06 13:43:29,955 - shap - INFO - np.sum(w_aug) = 18.0 +2025-01-06 13:43:29,960 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2025-01-06 13:43:30,087 - shap - INFO - phi = array([-0.00999016, -0.01238472, 0.00114272, 0.00017878, 0.00491261, + 0.00068625, 0.00612538, 0. , 0.00258732, 0.01295154, + 0.00075907, 0.01299131, -0.00110445, 0.00643902, -0.00222172, + 0. , 0.00056204, 0.00261269]) +2025-01-06 13:43:30,111 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:30,116 - shap - INFO - remaining_weight_vector = array([0.21046803, 0.16837443, 0.14432094, 0.12951879, 0.12026745, + 0.11480075, 0.11224962]) +2025-01-06 13:43:30,120 - shap - INFO - num_paired_subset_sizes = 9 +2025-01-06 13:43:30,166 - shap - INFO - weight_left = 0.5381032434909889 +2025-01-06 13:43:30,235 - shap - INFO - np.sum(w_aug) = 19.0 +2025-01-06 13:43:30,241 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999996 +2025-01-06 13:43:30,428 - shap - INFO - phi = array([-9.31286008e-03, -1.31748326e-02, 1.29378700e-03, 1.69462623e-04, + 4.55101767e-03, 3.09714247e-04, 6.49898379e-03, 0.00000000e+00, + 2.49537394e-03, 5.44153541e-03, 9.05871572e-04, 4.47591939e-05, + -1.78593088e-04, 6.48523956e-03, -1.85529336e-03, -4.81050964e-05, + -4.71326274e-04, 1.19859543e-03, 2.93209247e-03]) +2025-01-06 13:43:30,476 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:30,486 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-06 13:43:30,494 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-06 13:43:30,546 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-06 13:43:30,623 - shap - INFO - np.sum(w_aug) = 17.000000000000007 +2025-01-06 13:43:30,627 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:30,786 - shap - INFO - phi = array([-0.01516889, -0.02375469, 0.00212133, -0.00010974, 0.00498366, + 0.00019539, 0.00562799, -0.00049359, 0.00024028, 0.00213718, + -0.00126649, -0.00473163, -0.00189329, 0.00249173, -0.00193585, + -0.00016726, 0.00179716]) +2025-01-06 13:43:30,833 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:30,845 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:30,852 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:30,906 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:30,974 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:43:30,978 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-06 13:43:31,101 - shap - INFO - phi = array([-0.01549215, -0.0247453 , 0.00192913, -0.00018629, 0.00321626, + -0.00060339, 0.0020008 , -0.000509 , 0.00221582, -0.00186174, + -0.00520739, -0.00220376, 0.0011295 , -0.00219331, -0.00024098, + 0.00131353]) +2025-01-06 13:43:31,139 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:31,147 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:31,152 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:31,218 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:31,288 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-06 13:43:31,293 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:31,370 - shap - INFO - phi = array([-1.80105641e-02, -3.21919336e-02, 3.95965453e-04, -1.83903487e-05, + 1.29916230e-03, -7.92199895e-04, 4.29598682e-04, -4.15250765e-04, + 9.51280340e-04, -3.07515091e-03, -5.30119057e-03, -9.01749893e-03, + 8.06046464e-05, -2.29043339e-03, -9.87074497e-05, -1.69110457e-04]) +2025-01-06 13:43:31,410 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:31,417 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:31,427 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:31,491 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:31,572 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-06 13:43:31,578 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:31,741 - shap - INFO - phi = array([-1.72785041e-02, -3.45600067e-02, 0.00000000e+00, -9.32098977e-05, + 1.65728458e-04, -6.49205028e-04, -8.24981328e-05, 4.10920449e-03, + -1.10003219e-04, 0.00000000e+00, -5.15045042e-03, -7.98900699e-03, + 0.00000000e+00, 0.00000000e+00, -8.28004351e-05, -1.51021332e-04]) +2025-01-06 13:43:31,786 - shap - INFO - num_full_subsets = 2 +2025-01-06 13:43:31,792 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-06 13:43:31,800 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-06 13:43:31,856 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-06 13:43:31,924 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-06 13:43:31,929 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-06 13:43:32,026 - shap - INFO - phi = array([ 0.00000000e+00, -1.58921592e-02, 0.00000000e+00, -2.01331423e-05, + -5.04716525e-06, -5.07116946e-04, 2.85889491e-05, -5.10054262e-04, + 0.00000000e+00, 3.59597734e-04, -5.66061833e-05, -2.68997150e-05, + 0.00000000e+00, 0.00000000e+00, 5.30719090e-05, 0.00000000e+00]) +2025-01-06 13:54:44,750 - pyswarms.single.global_best - INFO - Optimize for 10 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9} +2025-01-06 13:54:46,443 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 2.919337618172575, best pos: [3.00104154e+00 2.62311705e-01 5.20573580e+02] +2025-01-07 16:46:06,540 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:06,543 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:06,545 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:06,562 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:06,658 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:06,661 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:06,726 - shap - INFO - phi = array([-2.67584583e-04, 4.54874204e-04, -2.12921474e-04, -8.80036637e-05, + -6.77190920e-04, -1.01968170e-02, 7.89056666e-05, 8.12669484e-04, + 6.72676528e-04, -9.12744445e-05, -7.02297524e-06, -7.03814786e-04, + 0.00000000e+00, 2.82385329e-04, -2.86428087e-04]) +2025-01-07 16:46:06,797 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:06,800 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:06,801 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:06,821 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:06,915 - shap - INFO - np.sum(w_aug) = 14.999999999999995 +2025-01-07 16:46:06,918 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:07,054 - shap - INFO - phi = array([-7.81146701e-04, -1.31145732e-03, -1.51103623e-03, -4.84196414e-04, + -2.66267018e-03, -1.31033397e-02, 5.59950068e-05, -1.98881930e-03, + 6.61894340e-04, -7.64641856e-06, 0.00000000e+00, -1.71512578e-03, + 0.00000000e+00, -7.82886420e-04, -6.49018935e-04]) +2025-01-07 16:46:07,128 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:07,132 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:07,136 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:07,175 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:07,349 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:07,352 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:07,445 - shap - INFO - phi = array([-6.99326374e-04, -1.21491526e-03, -1.96642702e-03, -4.87996698e-04, + -2.62566274e-03, -1.32211779e-02, 5.10021798e-05, 1.05955228e-03, + 8.23562723e-04, 0.00000000e+00, 0.00000000e+00, -1.48566819e-03, + 0.00000000e+00, -8.05584034e-04, -6.31004662e-04]) +2025-01-07 16:46:07,544 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:07,547 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:07,550 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:07,584 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:07,774 - shap - INFO - np.sum(w_aug) = 15.0 +2025-01-07 16:46:07,782 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:07,990 - shap - INFO - phi = array([ 0.00055132, 0.00080249, 0.00482684, 0.00044417, 0.00162313, + 0.03106691, 0.00012181, 0.0005926 , -0.00173065, 0. , + 0. , 0.00155849, 0. , 0.00059313, 0.0006236 ]) +2025-01-07 16:46:08,059 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:08,072 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:08,076 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:08,119 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:08,381 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:08,390 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:08,504 - shap - INFO - phi = array([ 4.75096160e-04, 5.89401496e-04, 4.54338766e-03, 3.17691759e-04, + 1.47268505e-02, 3.05520843e-02, 0.00000000e+00, 2.01433853e-03, + -1.52104662e-03, 5.75041152e-05, 2.73763343e-05, 1.60161546e-03, + 1.80258234e-05, -1.10016189e-04, 1.17176421e-03]) +2025-01-07 16:46:08,558 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:08,566 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:08,574 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:08,618 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:08,807 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:08,810 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:08,975 - shap - INFO - phi = array([ 0.00072165, 0.00089001, -0.00242069, 0.00036468, 0.00101318, + 0.02944315, -0.00020673, -0.00136733, -0.00148088, 0.00022258, + 0. , 0.00193096, 0. , 0.00039217, 0.00052343]) +2025-01-07 16:46:09,002 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:09,006 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:09,010 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:09,080 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:09,259 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:09,262 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:09,373 - shap - INFO - phi = array([ 5.24253271e-04, 7.74517123e-04, -9.43274847e-04, 4.99708253e-04, + 1.48167093e-03, 2.88502236e-02, -4.12466882e-04, 5.90159308e-04, + -2.27713341e-03, -7.83334398e-05, 0.00000000e+00, 1.50932990e-03, + 0.00000000e+00, 4.05326242e-04, 3.88933698e-04]) +2025-01-07 16:46:09,465 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:09,471 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:09,474 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:09,503 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:09,732 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:09,737 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:09,897 - shap - INFO - phi = array([ 1.41136270e-03, 1.64726760e-03, 7.74331577e-05, 9.04204543e-04, + -1.15219383e-04, -1.68307608e-03, 1.05395439e-04, -1.56737949e-04, + 3.99712252e-04, 4.81142720e-04, 0.00000000e+00, 2.86798879e-03, + 0.00000000e+00, 5.72497294e-04, 1.75483691e-03]) +2025-01-07 16:46:09,991 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:09,994 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:09,997 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:10,027 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:10,230 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:10,233 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:10,407 - shap - INFO - phi = array([-2.19349768e-04, 4.47083679e-04, -1.07707236e-03, -1.29324929e-04, + -6.02386244e-04, -1.08497333e-02, 7.15872222e-05, 1.39378834e-03, + 3.92632345e-04, -9.29091226e-05, 0.00000000e+00, -2.80252174e-04, + 0.00000000e+00, 1.02802145e-03, -2.76748349e-04]) +2025-01-07 16:46:10,518 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:10,521 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:10,524 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:10,552 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:10,803 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:10,806 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:11,013 - shap - INFO - phi = array([ 6.31387327e-04, 1.02698816e-03, -5.17737793e-04, -1.28133406e-04, + -7.19067964e-04, -9.56600974e-03, 8.54016075e-05, 2.03132040e-03, + 6.77195787e-04, -8.18816809e-05, 0.00000000e+00, 4.82530674e-04, + 0.00000000e+00, 2.64820120e-04, 5.66745227e-05]) +2025-01-07 16:46:11,065 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:11,076 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:11,077 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:11,100 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:11,196 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:11,198 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:11,243 - shap - INFO - phi = array([-7.29408047e-04, -1.30745821e-03, -5.56707052e-04, -1.28656411e-04, + -2.67092125e-03, -1.28026514e-02, 5.54486275e-05, -1.41730004e-03, + 6.65268341e-04, -8.33328074e-05, 0.00000000e+00, -1.70382546e-03, + 0.00000000e+00, -7.38609749e-04, -7.01473022e-04]) +2025-01-07 16:46:11,302 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:11,312 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:11,316 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:11,349 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:11,489 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:11,492 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:11,619 - shap - INFO - phi = array([ 6.32943619e-04, 1.14151815e-03, 3.37997365e-03, 4.23979772e-04, + -7.67435167e-04, -9.43113805e-03, -1.65225513e-04, 1.71612148e-03, + 6.95601545e-04, -1.01328065e-04, 4.67772037e-06, 1.07010580e-03, + 0.00000000e+00, 1.22637208e-03, 9.30465646e-05]) +2025-01-07 16:46:11,696 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:11,699 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:11,703 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:11,754 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:11,940 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:11,946 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:12,174 - shap - INFO - phi = array([-7.42976397e-04, -1.31229743e-03, -5.52550324e-04, -5.08621145e-04, + -2.66624786e-03, -1.28713458e-02, 5.15261445e-05, -1.70256329e-03, + 6.61860993e-04, -8.08700983e-05, -5.97118203e-06, -1.73228167e-03, + 0.00000000e+00, -7.87982921e-04, -7.25072198e-04]) +2025-01-07 16:46:12,250 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:12,255 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:12,262 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:12,293 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:12,512 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:12,522 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:12,700 - shap - INFO - phi = array([-7.71435600e-04, -1.30902728e-03, -1.52737676e-03, -4.73369688e-04, + -2.66726097e-03, -1.31954082e-02, 6.14895126e-05, -1.89138962e-03, + 6.62431526e-04, -7.74333809e-06, 0.00000000e+00, -1.70830085e-03, + 0.00000000e+00, -7.92383318e-04, -6.59679441e-04]) +2025-01-07 16:46:12,804 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:12,807 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:12,811 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:12,839 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:13,062 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:13,066 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:13,213 - shap - INFO - phi = array([-7.74336769e-04, -1.31333249e-03, -1.49779736e-03, -4.96678741e-04, + -2.67604436e-03, -1.30017009e-02, 5.38821581e-05, -1.71188709e-03, + 6.59728513e-04, -8.54898534e-05, 0.00000000e+00, -1.69437197e-03, + 0.00000000e+00, -7.86263898e-04, -7.00462556e-04]) +2025-01-07 16:46:13,303 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:13,307 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2025-01-07 16:46:13,310 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:46:13,340 - shap - INFO - weight_left = 0.528623703595323 +2025-01-07 16:46:13,515 - shap - INFO - np.sum(w_aug) = 17.999999999999996 +2025-01-07 16:46:13,526 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999997 +2025-01-07 16:46:13,768 - shap - INFO - phi = array([ 0.00058523, 0.00114286, -0.00016186, 0.00835774, 0.02281079, + 0.05576885, 0.00251814, 0. , 0.00490764, 0.01128858, + 0. , 0.04539661, 0.00037131, 0.01593381, 0. , + -0.0003411 , 0.00028357, 0.01180148]) +2025-01-07 16:46:13,871 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:13,874 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-07 16:46:13,878 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:46:13,908 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-07 16:46:14,079 - shap - INFO - np.sum(w_aug) = 17.000000000000004 +2025-01-07 16:46:14,098 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:46:14,255 - shap - INFO - phi = array([-8.80441018e-04, 8.34078258e-04, 2.50981724e-03, 6.43332488e-03, + 0.00000000e+00, 5.52259213e-02, 0.00000000e+00, 9.80162127e-03, + 0.00000000e+00, 3.94723829e-02, -8.28958192e-04, 1.39390029e-02, + 0.00000000e+00, -6.28670501e-04, 8.72332968e-05, -1.31967978e-03, + 1.04304985e-02]) +2025-01-07 16:46:14,361 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:14,365 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-07 16:46:14,368 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:46:14,396 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-07 16:46:14,584 - shap - INFO - np.sum(w_aug) = 17.000000000000004 +2025-01-07 16:46:14,602 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:14,819 - shap - INFO - phi = array([-0.00151493, 0.00043641, 0.00164074, 0.00484211, 0.01907955, + 0.03626199, -0.0002205 , 0.01029992, -0.00108018, 0.03618885, + -0.00280896, 0.01285191, 0. , -0.00082259, 0.00017408, + -0.0013437 , 0.01042786]) +2025-01-07 16:46:14,923 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:14,926 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-07 16:46:14,929 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:46:14,958 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-07 16:46:15,061 - shap - INFO - np.sum(w_aug) = 17.000000000000004 +2025-01-07 16:46:15,064 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:15,175 - shap - INFO - phi = array([-1.04759772e-03, 3.36506994e-04, 1.00221759e-03, 5.89689087e-03, + 2.29384578e-02, 2.15240930e-02, -1.62121323e-04, 9.73701591e-03, + -1.43576790e-03, 1.34812935e-02, -2.52488629e-03, 1.29724524e-02, + 2.84652337e-05, 1.88401418e-04, 3.44578750e-04, -3.92335793e-04, + 1.04702809e-02]) +2025-01-07 16:46:15,239 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:15,242 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-07 16:46:15,243 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:46:15,293 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-07 16:46:15,432 - shap - INFO - np.sum(w_aug) = 17.000000000000004 +2025-01-07 16:46:15,436 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:15,593 - shap - INFO - phi = array([ 1.56858468e-03, 1.16215469e-04, 3.35573814e-03, 5.82715907e-03, + 2.00005836e-02, 1.55622657e-03, -6.42524171e-05, 4.30040193e-03, + -2.08116947e-04, 2.61126689e-03, 3.13973346e-04, 6.02749987e-03, + 0.00000000e+00, 1.47158113e-03, 3.95421805e-04, 7.38624970e-04, + 2.42717643e-03]) +2025-01-07 16:46:15,683 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:15,685 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:46:15,688 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:15,721 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:46:15,907 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-07 16:46:15,911 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:15,987 - shap - INFO - phi = array([ 2.05550234e-03, -9.40782985e-05, 5.95180461e-04, 3.44831878e-03, + 1.93302587e-02, -3.72493233e-03, 8.14890018e-05, 4.97228392e-03, + -5.18402965e-04, 2.03518062e-03, 1.48783257e-05, 5.20450617e-03, + 0.00000000e+00, 1.37855977e-03, 3.85573709e-04, 2.48419317e-03]) +2025-01-07 16:46:16,075 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:16,078 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:46:16,080 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:16,115 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:46:16,459 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:46:16,468 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:16,722 - shap - INFO - phi = array([ 0.00263821, -0.00597123, 0.0005245 , 0.00131621, 0.01322747, + -0.00838257, 0. , 0.00110078, 0. , 0.00027454, + 0.00085713, 0.00356037, 0. , 0.00151045, 0.00073949, + 0.00162693]) +2025-01-07 16:46:16,789 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:16,796 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:46:16,802 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:16,835 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:46:17,013 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:46:17,019 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:46:17,214 - shap - INFO - phi = array([ 1.26978613e-03, -7.24247688e-03, -6.72591919e-04, 1.00525208e-03, + 1.10386788e-02, -9.97036628e-03, 0.00000000e+00, 8.55935099e-04, + 1.31027236e-04, 6.28550899e-05, 5.96476616e-04, 2.97840684e-03, + 4.13216324e-05, -4.90728484e-04, 7.80751334e-04, 1.08966538e-03]) +2025-01-07 16:46:17,315 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:17,321 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:17,324 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:17,351 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:17,536 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:17,541 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:17,663 - shap - INFO - phi = array([ 7.08454673e-04, 6.74374781e-04, -1.87054644e-03, 2.41733884e-04, + 1.79882049e-02, 3.15417075e-02, -1.17581345e-04, 3.01295703e-03, + 3.37466389e-03, 6.33266508e-04, 1.17689653e-03, 1.58750903e-03, + 2.22546608e-05, -4.83027863e-04, 1.08990830e-03]) +2025-01-07 16:46:17,762 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:17,767 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:17,770 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:17,795 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:18,003 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:18,019 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:18,181 - shap - INFO - phi = array([ 5.24232626e-04, 8.41674429e-04, -1.98603465e-03, 3.95026720e-04, + 1.69185988e-02, 3.22301667e-02, -6.74270281e-05, 3.07702688e-03, + 3.15749107e-03, 1.09544275e-04, 8.99500964e-04, 1.60298897e-03, + 0.00000000e+00, -3.78205423e-04, 1.19815298e-03]) +2025-01-07 16:46:18,270 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:18,274 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:46:18,277 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:18,308 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:46:18,492 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:46:18,497 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:46:18,094 - shap - INFO - phi = array([ 6.01684677e-04, 8.06723921e-04, 2.79739803e-03, -1.19263580e-04, + 5.68008632e-04, 1.60551271e-02, 3.01166516e-02, -1.29915749e-04, + 2.76349229e-03, -1.47444382e-03, 5.00779372e-05, 1.12590822e-03, + 1.68806559e-03, 0.00000000e+00, 4.82849136e-04, 9.20232290e-04]) +2025-01-07 16:46:18,190 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:18,193 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:18,196 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:18,226 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:18,440 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:18,446 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:18,648 - shap - INFO - phi = array([6.92934344e-04, 9.14510932e-04, 5.40340308e-03, 5.57427457e-04, + 1.36677191e-02, 3.39283790e-02, 1.03330763e-04, 1.56227899e-03, + 3.36974875e-03, 2.80825192e-05, 1.43348474e-03, 1.65342687e-03, + 4.60460449e-06, 1.48810556e-04, 6.68387560e-04]) +2025-01-07 16:46:18,736 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:18,739 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:18,742 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:18,774 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:18,966 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:18,969 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:19,152 - shap - INFO - phi = array([8.48988301e-04, 9.24829692e-04, 5.41181864e-03, 6.41989531e-04, + 1.70977207e-03, 3.37204682e-02, 1.33174179e-04, 9.99003606e-04, + 1.64109555e-03, 3.02050972e-04, 1.34154369e-03, 1.68837622e-03, + 3.80094066e-06, 6.58194968e-05, 9.71321677e-04]) +2025-01-07 16:46:19,246 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:19,250 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:19,252 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:19,284 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:19,515 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:19,518 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:19,543 - shap - INFO - phi = array([ 2.38562223e-05, 6.85736749e-04, -1.40365664e-03, 3.50310863e-04, + 9.10406979e-04, 2.91572237e-02, 1.80826653e-04, -1.67468739e-03, + 1.11352436e-03, -2.76395053e-04, 1.06744077e-03, 2.12641276e-03, + 0.00000000e+00, 1.64898696e-04, 7.99892900e-04]) +2025-01-07 16:46:19,597 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:19,601 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:19,605 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:19,654 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:19,768 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:19,771 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:19,872 - shap - INFO - phi = array([0.00035843, 0.00064271, 0.00341537, 0.00018593, 0.01303335, + 0.03027784, 0. , 0.00207235, 0.00011374, 0. , + 0.00015063, 0.00161144, 0. , 0. , 0.00102207]) +2025-01-07 16:46:19,923 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:19,928 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:19,931 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:19,978 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:20,144 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:20,148 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:20,286 - shap - INFO - phi = array([ 5.12666266e-04, 4.93000322e-04, 2.27563031e-03, 2.78883520e-04, + 1.34153419e-02, 2.85507233e-02, 0.00000000e+00, 2.24894674e-03, + -5.11521691e-04, 9.63176321e-05, -2.16366527e-04, 1.61366572e-03, + 0.00000000e+00, -1.06513185e-03, 8.97098295e-04]) +2025-01-07 16:46:20,343 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:20,347 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:20,349 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:20,403 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:20,594 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:20,599 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:20,744 - shap - INFO - phi = array([ 0.00061325, 0.00061139, -0.00206844, 0.00054907, 0.00324298, + 0.03063067, 0.00014719, 0.00116491, -0.00078919, -0.00010907, + 0.0011646 , 0.00150647, 0. , -0.00047805, 0.00075058]) +2025-01-07 16:46:20,782 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:20,787 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:20,792 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:20,855 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:21,057 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:21,062 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:21,119 - shap - INFO - phi = array([ 0.00055676, 0.00075274, -0.00242449, 0.0004759 , 0.00115078, + 0.03011287, 0.00012763, 0.00063498, -0.00166507, -0.0003334 , + 0.00125513, 0.00150328, 0. , 0.00012847, 0.00057387]) +2025-01-07 16:46:21,159 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:21,168 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:21,171 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:21,222 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:21,412 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:21,415 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:21,464 - shap - INFO - phi = array([ 0.0007157 , 0.00078563, -0.00284349, 0.00067339, 0.00111311, + 0.02387617, -0.00051464, -0.00138165, -0.00103554, 0.00015928, + 0.00098792, 0.00217813, 0. , 0.00024185, 0.0005511 ]) +2025-01-07 16:46:21,522 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:21,544 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:21,547 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:21,584 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:21,810 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:21,826 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:21,983 - shap - INFO - phi = array([ 0.00075332, 0.00090142, -0.00202252, 0.00089488, -0.00021755, + 0.02163008, -0.00022287, -0.00098124, -0.00061778, 0.00030565, + 0.00086374, 0.0023383 , 0. , 0.00010184, 0.0008781 ]) +2025-01-07 16:46:22,056 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:22,067 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:22,071 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:22,108 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:22,345 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:22,349 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:22,446 - shap - INFO - phi = array([ 0.00085019, 0.00103511, -0.00169358, 0.000478 , -0.00063297, + 0.02684247, 0.00017795, -0.00100344, 0.00132131, 0.00028741, + 0.0010349 , 0.00230292, 0. , 0.0005691 , 0.00078173]) +2025-01-07 16:46:22,520 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:22,548 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:22,550 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:22,581 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:22,705 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:22,708 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:22,853 - shap - INFO - phi = array([ 0.00103398, 0.00123532, -0.00120489, 0.00078706, -0.0001817 , + 0.02584646, 0.00019187, -0.00063998, 0.00138624, 0.00024654, + 0.00101845, 0.00257917, 0. , 0.00030764, 0.00111062]) +2025-01-07 16:46:22,892 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:22,899 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:22,901 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:22,967 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:23,144 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:23,147 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:23,173 - shap - INFO - phi = array([ 1.04602352e-03, 1.25370410e-03, -5.20936501e-04, 8.87711779e-04, + -7.75666941e-04, 2.28888036e-02, 2.05859324e-04, -3.86487550e-04, + 1.30738002e-03, 4.82141521e-04, 9.80477434e-04, 2.48460906e-03, + 7.56191546e-06, 8.99984151e-04, 1.37209708e-03]) +2025-01-07 16:46:23,207 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:23,216 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:23,222 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:23,279 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:23,459 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:23,462 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:23,496 - shap - INFO - phi = array([ 0.00155413, 0.00166495, 0.00348249, 0.00097028, -0.0004765 , + -0.00093779, 0.00012703, 0.00234448, 0.00109567, -0.00011276, + 0.0009623 , 0.00313182, 0. , 0.00071959, 0.00174558]) +2025-01-07 16:46:23,534 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:23,538 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:23,540 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:23,596 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:23,804 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:23,820 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:23,979 - shap - INFO - phi = array([-4.49042544e-04, -8.16441591e-04, -2.47606185e-03, -9.48404790e-05, + -5.51237048e-04, -1.16606288e-02, 6.93312610e-05, -9.47177184e-04, + 5.92477410e-04, -1.26237683e-04, -2.39953010e-04, -7.81815805e-04, + 0.00000000e+00, 2.67884022e-04, -2.80564245e-04]) +2025-01-07 16:46:24,036 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:24,058 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:24,066 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:24,105 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:24,289 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:24,308 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:24,412 - shap - INFO - phi = array([-7.72149589e-04, -1.33056259e-03, -2.50182155e-03, -5.35935946e-04, + -2.63015620e-03, -1.31557817e-02, 6.47707169e-05, -2.01598683e-03, + 5.71966317e-04, -1.29544992e-04, -4.61615574e-04, -1.78972153e-03, + 0.00000000e+00, -7.81147400e-04, -6.31758939e-04]) +2025-01-07 16:46:24,469 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:24,498 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:46:24,499 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:24,543 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:46:24,740 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:46:24,757 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:25,055 - shap - INFO - phi = array([ 1.08758776e-03, -8.58361579e-03, -1.86529558e-03, 7.35562996e-04, + 9.69949647e-03, -1.04378747e-02, -3.24691935e-04, 5.56482163e-04, + -3.24339049e-03, -1.65205167e-05, 2.04900162e-04, 1.99317383e-03, + 0.00000000e+00, -8.54871933e-04, 3.33015937e-04, 8.74770725e-04]) +2025-01-07 16:46:25,121 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:25,125 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:25,129 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:25,160 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:25,360 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:25,371 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:25,588 - shap - INFO - phi = array([ 0.00098907, -0.00430859, -0.00208456, 0.00083822, 0.00365748, + -0.0108543 , -0.0002259 , 0.00019327, -0.00216786, 0.00047463, + 0.00055714, 0.00201496, 0. , -0.00052038, 0.00084802]) +2025-01-07 16:46:25,686 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:25,690 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:25,693 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:25,722 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:25,826 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:25,828 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:25,863 - shap - INFO - phi = array([ 0.00114334, -0.00298866, -0.00124281, 0.00089809, 0.00090131, + -0.00999124, -0.00024532, 0.00028165, -0.00205957, 0.00051476, + 0.0006784 , 0.00217682, 0. , -0.00035336, 0.0008456 ]) +2025-01-07 16:46:25,918 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:25,929 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:25,930 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:25,975 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:26,118 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:26,120 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:26,163 - shap - INFO - phi = array([ 0.00060043, 0.00081749, -0.00238412, 0.0002488 , -0.00016808, + -0.00062034, -0.00036086, 0.00053212, 0.00172659, -0.00020204, + 0.00018867, 0.00174667, 0. , -0.00216652, 0.00062255]) +2025-01-07 16:46:26,243 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:26,246 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:26,247 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:26,284 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:26,456 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:26,461 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:26,582 - shap - INFO - phi = array([ 0.00097436, 0.00098575, -0.0018485 , 0.00064937, -0.00131058, + 0.00377701, -0.00043033, -0.0010453 , 0.00024982, 0.00053551, + 0.00065658, 0.00224947, 0. , -0.00148118, 0.00091286]) +2025-01-07 16:46:26,672 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:26,675 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:26,677 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:26,711 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:26,895 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:26,898 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:46:26,986 - shap - INFO - phi = array([ 1.24488966e-03, 1.03355206e-03, -1.73097198e-03, 8.67145131e-04, + -1.69368538e-03, 4.79217240e-03, 9.68935725e-05, 2.28241687e-03, + 4.02670528e-04, 5.80315106e-04, 8.61935397e-04, 2.67477262e-03, + 0.00000000e+00, -8.49100110e-04, 1.36471393e-03]) +2025-01-07 16:46:27,073 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:27,076 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:27,078 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:27,115 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:27,329 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:27,333 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:27,453 - shap - INFO - phi = array([ 0.00134722, 0.00105426, -0.00104048, 0.00105066, -0.00045908, + 0.00272654, -0.0003769 , 0.00275243, 0.00109325, 0.00061308, + 0.00082598, 0.00255197, 0. , -0.00120071, 0.00148715]) +2025-01-07 16:46:27,502 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:27,513 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:27,518 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:27,575 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:27,774 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:46:27,779 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:28,026 - shap - INFO - phi = array([ 1.36251521e-03, 1.41039709e-03, -5.88150196e-04, 1.30609355e-03, + -1.47383555e-03, 1.61571406e-03, -3.80200545e-04, 3.21223772e-03, + 1.20654201e-03, -5.36510354e-05, 9.12875666e-04, 2.25812163e-03, + 0.00000000e+00, -1.20223669e-03, 1.13701589e-03]) +2025-01-07 16:46:28,069 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:46:28,089 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:46:28,102 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:46:28,142 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:46:28,424 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:46:28,427 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:46:28,522 - shap - INFO - phi = array([ 8.67765225e-04, 1.13963983e-03, -3.57627144e-04, 4.65868179e-04, + -6.70585430e-04, -2.68268680e-03, 9.63252380e-05, 1.55143836e-03, + 9.70693911e-04, -1.03369131e-04, 0.00000000e+00, 8.84416666e-04, + 0.00000000e+00, -4.99036526e-04, 3.41851276e-05]) +2025-01-07 16:53:31,901 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:31,911 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:31,918 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:31,980 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:32,232 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:32,238 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:32,400 - shap - INFO - phi = array([ 8.21709289e-05, 6.19815250e-03, -3.96657488e-04, 1.90152054e-05, + -7.04878074e-04, 3.33915816e-05, 6.97397643e-04, -4.17028144e-04, + -5.50585942e-03, -5.88358099e-03, 2.31810185e-04, 1.53151631e-04, + -7.44747985e-05, -3.15486495e-05, -2.02618552e-04]) +2025-01-07 16:53:32,470 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:32,479 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:32,485 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:32,523 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:32,648 - shap - INFO - np.sum(w_aug) = 14.999999999999995 +2025-01-07 16:53:32,652 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:32,796 - shap - INFO - phi = array([ 3.58421538e-05, 6.14245063e-03, -3.48296127e-04, 2.38641050e-05, + -3.44052634e-04, -6.73965274e-05, -9.07254734e-05, -4.42483087e-04, + -5.92248048e-03, 1.08428267e-03, 1.10376978e-04, -6.16886059e-05, + -1.41625864e-05, -3.57576868e-05, -2.18517304e-04]) +2025-01-07 16:53:32,833 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:32,861 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:32,865 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:32,916 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:33,073 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:33,077 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:33,257 - shap - INFO - phi = array([ 7.96284040e-05, 7.05078685e-03, 2.21795559e-04, 2.24818570e-05, + -2.89758244e-04, -5.43659398e-05, -5.91356965e-04, -2.84296189e-04, + 4.38317037e-03, 1.33288061e-03, 3.59167456e-04, -2.10117107e-05, + -7.36676345e-05, -2.54449092e-05, -5.65320782e-05]) +2025-01-07 16:53:33,327 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:33,339 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:33,345 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:33,383 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:33,532 - shap - INFO - np.sum(w_aug) = 15.0 +2025-01-07 16:53:33,540 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:33,723 - shap - INFO - phi = array([ 9.81385144e-05, 7.02831451e-03, 2.65109614e-04, 2.31060111e-05, + -2.90778860e-04, -5.72686267e-05, -5.98882839e-04, -2.52005058e-04, + 4.40231643e-03, 1.11052927e-03, 6.45488498e-04, -1.86545779e-05, + 3.81953735e-05, -2.25171568e-05, -4.63712548e-05]) +2025-01-07 16:53:33,810 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:33,823 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:33,832 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:33,864 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:34,072 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:34,076 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:34,180 - shap - INFO - phi = array([-9.38987149e-05, 8.63623119e-03, 2.67925185e-04, 2.61966805e-05, + -2.79859326e-04, -6.84198123e-05, -5.86831531e-04, -2.45974630e-04, + 4.31169550e-03, 1.05516589e-03, 6.59717954e-04, -2.27443723e-05, + -5.55009042e-05, -2.84007425e-05, -4.79566745e-05]) +2025-01-07 16:53:34,228 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:34,243 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:34,248 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:34,301 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:34,417 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:34,420 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:34,510 - shap - INFO - phi = array([-6.54676865e-05, 8.65765660e-03, 2.11496797e-03, 4.48321453e-05, + -2.72672060e-04, 3.46228642e-05, -5.79831810e-04, -2.34652474e-04, + 3.87313982e-03, 1.03002846e-03, 2.18245155e-04, -2.37258585e-05, + -8.35195049e-05, -1.70436638e-05, -4.52641122e-05]) +2025-01-07 16:53:34,611 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:34,614 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:34,616 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:34,638 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:34,805 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:53:34,810 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:35,005 - shap - INFO - phi = array([-3.27849964e-04, 8.17738734e-03, -3.72041027e-04, -2.20545635e-04, + -2.54660520e-04, 3.62060520e-05, -6.52847191e-04, -2.77141760e-04, + -6.95589833e-04, 1.01607343e-03, 1.28654635e-04, -1.98621079e-05, + 7.60224516e-05, -3.21388776e-05, -7.48411074e-05]) +2025-01-07 16:53:35,096 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:35,102 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:35,107 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:35,136 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:35,320 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:35,324 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:35,503 - shap - INFO - phi = array([-3.22366512e-04, 8.17936579e-03, -3.85625669e-04, -2.17705295e-04, + -2.61726297e-04, 3.14778609e-05, -6.50524996e-04, -2.81204826e-04, + -6.92264985e-04, 1.03040572e-03, 1.29088411e-04, -1.58035741e-05, + -4.89486926e-05, -3.42597599e-05, -7.65485193e-05]) +2025-01-07 16:53:35,583 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:35,590 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:35,597 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:35,637 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:35,801 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:53:35,807 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:35,963 - shap - INFO - phi = array([-9.43994356e-04, -2.32453438e-02, -1.99225231e-04, 0.00000000e+00, + 1.79414602e-03, 2.72681180e-04, 4.54975863e-04, 8.72889454e-04, + -2.85837474e-03, -8.21606653e-03, -5.41373113e-05, -1.69508567e-04, + 1.80444946e-05, 2.86607078e-04, 2.69677496e-04]) +2025-01-07 16:53:36,025 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:36,031 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:36,044 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:36,086 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:36,359 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:36,366 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:36,553 - shap - INFO - phi = array([ 5.60450704e-04, -2.27347344e-02, -2.35640866e-04, 5.63990676e-05, + 1.37221247e-03, 0.00000000e+00, 8.20533726e-04, 8.74866794e-04, + -2.86409644e-03, 3.85223626e-03, 1.31051329e-04, 3.19535582e-05, + 1.11507371e-04, -1.98640096e-05, 3.31037819e-04]) +2025-01-07 16:53:36,670 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:36,674 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:36,678 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:36,709 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:36,814 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:36,817 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:36,973 - shap - INFO - phi = array([ 5.92769218e-04, -2.06446994e-02, -6.28297571e-04, 5.03744422e-05, + -2.23118944e-04, 1.80837083e-05, 8.29375827e-04, 9.50458858e-04, + -2.96334102e-03, 3.70542161e-03, 1.34612778e-04, 1.25199735e-04, + 1.04583768e-04, 0.00000000e+00, 3.04263944e-04]) +2025-01-07 16:53:37,074 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:37,080 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:37,085 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:37,115 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:37,267 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:37,270 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:37,415 - shap - INFO - phi = array([ 3.12367755e-04, 6.57218627e-03, -2.99413430e-04, 1.39891860e-04, + -2.53721730e-04, -1.51365228e-04, 9.43448476e-04, -2.48875829e-04, + 4.51736816e-03, -1.11860281e-03, -2.69914595e-03, 3.27794210e-05, + 0.00000000e+00, -2.75940291e-05, -1.42037207e-04]) +2025-01-07 16:53:37,514 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:37,518 - shap - INFO - remaining_weight_vector = array([0.20732477, 0.16660026, 0.14353254, 0.12957798, 0.12116383, + 0.11662019, 0.11518043]) +2025-01-07 16:53:37,522 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:53:37,548 - shap - INFO - weight_left = 0.528623703595323 +2025-01-07 16:53:37,652 - shap - INFO - np.sum(w_aug) = 18.0 +2025-01-07 16:53:37,656 - shap - INFO - np.sum(self.kernelWeights) = 0.9999999999999998 +2025-01-07 16:53:37,763 - shap - INFO - phi = array([ 1.52931812e-04, 6.23076893e-03, -3.54035883e-04, 4.11189089e-03, + 1.23635481e-02, 3.95043383e-02, 7.63927738e-03, 1.39197607e-04, + 4.17054890e-03, 1.50289574e-02, -1.91558614e-03, 2.35456380e-02, + 2.07097496e-04, 1.15989771e-02, 9.74077826e-05, 6.16396819e-04, + 0.00000000e+00, 5.93167465e-03]) +2025-01-07 16:53:37,861 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:37,867 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:37,871 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:37,899 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:38,030 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-07 16:53:38,036 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:38,210 - shap - INFO - phi = array([ 2.58880005e-04, 5.65326023e-03, -5.44463289e-04, 4.03913243e-03, + 0.00000000e+00, 3.38111729e-02, -7.15445672e-05, 1.34608510e-02, + -2.07873293e-03, 2.17319128e-02, -1.13995954e-03, 9.83042339e-03, + -1.77713279e-04, 3.88208629e-04, 3.26349250e-04, 7.76532390e-03]) +2025-01-07 16:53:38,289 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:38,296 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:38,301 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:38,334 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:38,505 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-07 16:53:38,507 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:38,591 - shap - INFO - phi = array([ 0.00046782, 0.00471332, 0.00118618, 0.00465704, 0.00313526, + 0.01420531, 0.00106116, 0.00419015, -0.00216968, 0.02543491, + -0.00036717, 0.0111545 , -0.00025317, 0.00082347, 0.00032708, + 0.00732251]) +2025-01-07 16:53:38,699 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:38,701 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:38,703 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:38,730 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:38,843 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:53:38,847 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:39,041 - shap - INFO - phi = array([ 4.07259572e-04, 5.42093566e-03, 6.96299623e-04, 5.21979515e-03, + 4.19783742e-03, 7.28903050e-03, 1.06606216e-03, 2.79872002e-03, + -2.42119585e-03, 8.34631491e-03, -7.13318283e-04, 7.75519002e-03, + 6.90157938e-05, 8.73206690e-04, 5.27427870e-04, 6.81324023e-03]) +2025-01-07 16:53:39,108 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:39,118 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:39,126 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:39,157 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:39,330 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:53:39,334 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:39,536 - shap - INFO - phi = array([-7.36514163e-04, 6.58629630e-03, 7.87340268e-04, 3.91261391e-04, + 3.58165014e-03, 5.19103627e-04, -4.37581515e-04, 1.49674864e-03, + -3.71584991e-03, -6.51843350e-03, -1.18826294e-04, 3.23780030e-04, + 2.11376872e-05, 4.30133400e-04, 5.76459642e-04, 1.28902602e-03]) +2025-01-07 16:53:39,632 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:39,637 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:39,642 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:39,673 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:39,819 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-07 16:53:39,823 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:40,000 - shap - INFO - phi = array([-1.48351282e-03, 5.91038281e-03, -6.02743095e-04, 1.82868120e-04, + 2.50668266e-03, 2.60519648e-04, 5.99887391e-04, 1.49424906e-03, + -4.26603723e-03, -7.14041407e-03, -1.21953126e-04, 1.90469778e-04, + -9.26681441e-05, 4.26917165e-04, -3.40224823e-05, 8.55284409e-04]) +2025-01-07 16:53:40,109 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:40,112 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:40,114 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:40,142 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:40,271 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:53:40,274 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:40,463 - shap - INFO - phi = array([-1.83203275e-03, -2.21553119e-02, -5.36684873e-04, 2.35759040e-04, + 1.92406145e-03, 2.61725570e-04, -1.95240184e-04, 8.85753401e-04, + -2.85442995e-03, -7.97062051e-03, -2.75046723e-03, 3.51253437e-04, + 9.18198299e-05, 3.68510087e-04, 0.00000000e+00, 5.43357035e-04]) +2025-01-07 16:53:40,564 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:40,568 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:40,572 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:40,601 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:40,783 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:53:40,787 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:40,961 - shap - INFO - phi = array([-9.39084692e-04, -2.45260286e-02, -8.11736865e-04, 0.00000000e+00, + 1.62861523e-03, -6.88433290e-05, -1.99850391e-04, 9.15745202e-04, + -2.74315784e-03, -8.50072598e-03, -5.74551425e-05, -1.15499395e-04, + -1.43925705e-04, 3.60401486e-04, 0.00000000e+00, 2.39656942e-04]) +2025-01-07 16:53:41,006 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:41,028 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:41,029 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:41,080 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:41,224 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:53:41,229 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:41,373 - shap - INFO - phi = array([ 1.23741184e-04, 7.21631507e-03, -2.30090061e-04, 7.41255046e-05, + 2.69331846e-03, 4.08105151e-04, 6.95755065e-06, 9.66893670e-04, + 4.26977299e-03, 3.05124371e-03, -3.17164773e-04, 1.85641482e-05, + -5.41442979e-05, -3.84276867e-05, 1.60244986e-04]) +2025-01-07 16:53:41,428 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:41,450 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:41,452 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:40,885 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:41,031 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:41,033 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:41,127 - shap - INFO - phi = array([ 1.79054612e-04, 7.46196491e-03, -2.41433796e-04, 1.75043315e-04, + 2.96871880e-03, 1.76729533e-04, 6.22499625e-04, 1.00684754e-03, + 4.23339980e-03, 1.09975145e-03, 4.52729077e-04, 1.35529253e-04, + 1.22573942e-04, -3.69454191e-05, 3.54207955e-04]) +2025-01-07 16:53:41,247 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:41,251 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:53:41,254 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:41,289 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:53:41,436 - shap - INFO - np.sum(w_aug) = 16.0 +2025-01-07 16:53:41,441 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:41,659 - shap - INFO - phi = array([ 2.23052207e-05, 6.69284798e-03, 2.92794039e-03, -2.39413112e-04, + 8.00889590e-05, 3.34385963e-03, -1.21583433e-03, 6.50309457e-04, + 1.01313028e-03, -4.48175489e-03, -2.70009164e-04, 5.50594594e-04, + 1.62873202e-04, 4.72040305e-05, -1.39373493e-05, -2.14410446e-04]) +2025-01-07 16:53:41,737 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:41,741 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:41,743 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:41,772 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:41,910 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:41,913 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:42,092 - shap - INFO - phi = array([ 1.85172996e-04, 7.47560222e-03, 6.32689238e-03, 8.69897570e-05, + 2.01949963e-03, 1.13255879e-03, -4.81507438e-04, -1.92599138e-04, + 4.20077990e-03, -3.58376299e-04, 4.49315210e-04, 2.40926544e-04, + 7.14755354e-05, -2.85885479e-05, -2.04257729e-05]) +2025-01-07 16:53:42,190 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:42,193 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:42,196 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:42,232 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:42,375 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:42,378 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:42,559 - shap - INFO - phi = array([ 1.12590677e-04, 9.11592620e-03, 6.79563680e-03, -1.29444922e-05, + -5.34336367e-04, 7.93123727e-04, -4.04250750e-04, -2.74823476e-04, + -1.20288625e-04, -4.53772528e-03, 7.02383439e-04, -3.35928377e-05, + 1.19664929e-04, -1.94027816e-05, 3.85125644e-05]) +2025-01-07 16:53:42,641 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:42,645 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:42,648 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:42,669 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:42,737 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:53:42,739 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:42,846 - shap - INFO - phi = array([-2.27937593e-04, 8.42715544e-03, 8.86591549e-05, -1.98516817e-04, + -1.16046611e-03, 0.00000000e+00, -5.52975149e-04, -2.61506569e-04, + -6.22159421e-04, -6.07725473e-03, 2.41151198e-04, -1.81735866e-05, + -7.11433063e-05, -2.75354649e-05, -3.23354403e-05]) +2025-01-07 16:53:42,863 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:42,866 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:42,866 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:42,941 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:43,047 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:43,050 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:43,138 - shap - INFO - phi = array([ 1.27568779e-04, 6.58520611e-03, 3.82919652e-03, 2.93569869e-05, + 2.09620201e-03, -1.28177783e-03, -4.81570627e-04, 1.32305800e-03, + -2.96841106e-03, 7.25448495e-04, -5.64875238e-04, -6.71937931e-05, + 2.52465118e-04, -2.26062296e-05, 2.42519227e-04]) +2025-01-07 16:53:43,199 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:43,204 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:43,210 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:43,257 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:43,390 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:43,394 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:43,507 - shap - INFO - phi = array([ 6.67554728e-05, 6.31790581e-03, 2.03124685e-03, 1.08449285e-04, + 2.68628101e-03, -2.10897887e-03, -5.01221254e-04, 1.14164558e-03, + -4.32427966e-03, 7.97605628e-04, -6.68627511e-04, 1.29903153e-04, + -1.57526913e-04, -2.20340748e-05, 4.94559321e-04]) +2025-01-07 16:53:43,591 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:43,595 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:43,597 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:43,634 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:43,802 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:43,806 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:53:44,088 - shap - INFO - phi = array([ 2.49042005e-04, 5.76898743e-03, -3.27955109e-04, 1.27633855e-04, + -2.56067168e-04, -5.72804082e-04, -5.93964063e-04, -3.29498749e-04, + -4.83126828e-03, -6.20200332e-04, -2.01769452e-04, 9.40026162e-05, + 1.79184742e-04, -2.86064230e-05, -2.37242463e-04]) +2025-01-07 16:53:44,162 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:53:44,170 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:53:44,176 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:53:44,210 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:53:44,372 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:53:44,379 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:53:44,582 - shap - INFO - phi = array([ 5.45598296e-05, 5.74892754e-03, -4.11823788e-04, 2.36563488e-05, + -1.02063880e-03, 9.52492223e-05, -5.54450485e-04, -4.02629152e-04, + -5.94660846e-03, -6.34692389e-03, 9.70444001e-05, 1.31980399e-04, + -6.83831714e-05, -3.50668628e-05, -3.70976335e-04]) +2025-01-07 16:59:16,119 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:16,121 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:16,123 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:16,141 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:16,190 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:16,192 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:16,241 - shap - INFO - phi = array([ 4.50235944e-05, 1.20086523e-02, 3.08140670e-04, 2.62011507e-04, + 3.35013260e-04, 4.47443018e-04, 0.00000000e+00, 3.37259475e-05, + 4.82593617e-04, -2.34447585e-04, 2.39977005e-04, 2.82753713e-05, + 1.17558748e-04, 4.77314632e-05]) +2025-01-07 16:59:16,271 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:16,277 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:16,278 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:16,336 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:16,412 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:16,414 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:16,493 - shap - INFO - phi = array([ 4.51664431e-05, 1.20093718e-02, 1.66881545e-04, 2.61737092e-04, + 3.35214889e-04, 4.47532003e-04, 0.00000000e+00, 3.29758628e-05, + 4.82724659e-04, -2.34291913e-04, 2.41011529e-04, 2.84237204e-05, + 1.16380523e-04, 4.83602734e-05]) +2025-01-07 16:59:16,567 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:16,581 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:16,585 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:16,624 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:16,731 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:16,737 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:16,994 - shap - INFO - phi = array([-2.14450868e-05, -4.72254289e-03, 1.63820775e-04, -2.32396911e-04, + -5.29399503e-04, -6.42892762e-04, -1.25481827e-05, -6.18033486e-05, + 4.55357225e-04, 4.04709294e-04, -8.30499248e-05, -1.26075981e-05, + -5.68803020e-05, -6.56356704e-05]) +2025-01-07 16:59:17,039 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:17,055 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:17,078 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:17,124 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:17,292 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:17,299 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:17,397 - shap - INFO - phi = array([-3.12750346e-05, -4.73006903e-03, 1.63980001e-04, -2.35502357e-04, + -6.25784762e-04, -6.37366631e-04, -1.26375088e-05, -1.15054932e-04, + 4.54738285e-04, 3.99174295e-04, -8.36095364e-05, -6.69192349e-05, + 0.00000000e+00, -8.35690998e-05]) +2025-01-07 16:59:17,445 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:17,463 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:17,479 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:17,518 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:17,667 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:17,675 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:17,813 - shap - INFO - phi = array([-4.96256409e-05, -4.74661131e-03, -2.78750599e-04, -2.92413143e-04, + -4.90894234e-04, 1.65940393e-04, -1.09211452e-05, 2.93537609e-05, + 4.58016968e-04, -2.51151106e-04, -9.12371201e-05, -7.01721872e-05, + -5.80325159e-05, -6.85403864e-05]) +2025-01-07 16:59:17,858 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:17,873 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:17,878 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:17,919 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:17,990 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:17,993 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:18,055 - shap - INFO - phi = array([-1.26246501e-04, -4.72867787e-03, -1.90534986e-04, -1.81736469e-04, + 3.37458304e-04, 1.70912831e-04, -6.29875165e-06, 2.94277216e-05, + 4.74304385e-04, -2.33535436e-04, -9.14438096e-05, 3.75681760e-05, + -5.70719888e-05, 4.73846553e-05]) +2025-01-07 16:59:18,075 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:18,093 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:18,095 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:18,152 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:18,237 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:18,242 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:18,371 - shap - INFO - phi = array([ 6.59522494e-05, -4.72924654e-03, -1.43389413e-04, 1.05967917e-04, + 3.21511530e-04, 3.96012255e-04, 1.80353331e-04, 1.85798211e-05, + -3.28389287e-03, -2.34694301e-04, -6.78296047e-05, 1.47873095e-05, + -5.80019002e-05, 3.79539360e-05]) +2025-01-07 16:59:18,412 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:18,415 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:18,416 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:18,484 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:18,578 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:18,582 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:18,622 - shap - INFO - phi = array([ 7.05346494e-05, -3.62417724e-04, -1.91060815e-04, 3.09843257e-04, + 3.16990932e-04, -3.49955804e-04, -1.28361189e-04, 3.26119912e-05, + 4.76636507e-04, 3.82413567e-04, -6.45359835e-05, 3.84961291e-05, + 0.00000000e+00, 3.62919569e-05]) +2025-01-07 16:59:18,713 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:18,716 - shap - INFO - remaining_weight_vector = array([0.23108621, 0.18664656, 0.16176035, 0.14705486, 0.13865173, + 0.13480029]) +2025-01-07 16:59:18,718 - shap - INFO - num_paired_subset_sizes = 8 +2025-01-07 16:59:18,750 - shap - INFO - weight_left = 0.5181019626448611 +2025-01-07 16:59:18,870 - shap - INFO - np.sum(w_aug) = 17.000000000000007 +2025-01-07 16:59:18,877 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:59:19,045 - shap - INFO - phi = array([ 9.05075918e-05, 1.15019284e-02, 0.00000000e+00, 2.92421975e-03, + 6.43182240e-03, 2.46369198e-02, 7.33795901e-03, 0.00000000e+00, + 5.13036630e-03, 3.02541694e-02, -1.42737272e-03, 9.73674031e-03, + 0.00000000e+00, 5.12021795e-03, 1.86305524e-04, 4.57815494e-05, + 6.27384170e-03]) +2025-01-07 16:59:19,093 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:19,112 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:59:19,131 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:19,167 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:59:19,265 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:59:19,270 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:19,371 - shap - INFO - phi = array([-0.00070543, 0.00906576, 0.00032295, 0.00230616, 0.00033942, + 0.02180148, 0. , 0.0244788 , -0.00097528, 0.00610099, + -0.00312005, 0.00692025, -0.00022199, 0. , -0.00039933, + 0.00601806]) +2025-01-07 16:59:19,433 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:19,442 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:59:19,446 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:19,479 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:59:19,574 - shap - INFO - np.sum(w_aug) = 15.999999999999998 +2025-01-07 16:59:19,578 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:19,741 - shap - INFO - phi = array([ 2.04766926e-04, -3.83166503e-03, 1.95971155e-03, 2.96039957e-03, + 2.19566226e-03, 1.44008553e-02, 3.00429534e-04, 1.23512291e-02, + -1.45938163e-03, 7.46313783e-03, -1.83639961e-03, 6.13684031e-03, + 1.73981592e-04, -7.35965116e-05, -3.63874486e-04, 7.25279819e-03]) +2025-01-07 16:59:19,768 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:19,776 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:59:19,781 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:19,835 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:59:19,927 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:59:19,930 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:19,994 - shap - INFO - phi = array([ 2.18070514e-04, -4.10936047e-03, 1.60905947e-03, 3.56807933e-03, + 9.17446794e-04, 5.20365335e-03, 1.68898862e-04, 3.10805459e-03, + -1.71571213e-03, 2.04331378e-03, -1.60995739e-03, 4.35493994e-03, + 5.54857685e-04, 0.00000000e+00, 6.91769941e-05, 7.30200192e-03]) +2025-01-07 16:59:20,033 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:20,042 - shap - INFO - remaining_weight_vector = array([0.22727203, 0.18465852, 0.16115653, 0.14772682, 0.14069221, + 0.13849389]) +2025-01-07 16:59:20,044 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:20,105 - shap - INFO - weight_left = 0.5063344810024111 +2025-01-07 16:59:20,214 - shap - INFO - np.sum(w_aug) = 15.999999999999996 +2025-01-07 16:59:20,218 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:20,400 - shap - INFO - phi = array([-5.03705053e-03, -8.60478574e-03, 1.58948889e-04, -1.14160179e-04, + -6.66206848e-04, 3.26869799e-04, -7.46757511e-05, -2.41959000e-04, + -3.46764131e-03, -3.35840563e-03, -3.57579906e-03, -1.41387673e-04, + 8.52038234e-06, -7.57902249e-04, -8.09957613e-05, 9.82043620e-05]) +2025-01-07 16:59:20,484 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:20,489 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:59:20,493 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:20,528 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:59:20,624 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:59:20,627 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:59:20,800 - shap - INFO - phi = array([-5.67022785e-03, -1.04862197e-02, 1.08023219e-04, -1.14063075e-04, + -8.94469725e-04, 1.36183817e-04, 6.07708347e-04, 5.81641150e-04, + -4.36406125e-03, -3.65141610e-03, -3.66864403e-03, -1.28969498e-04, + -6.84517649e-05, -7.32851830e-04, 3.70156372e-05]) +2025-01-07 16:59:20,894 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:20,898 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:59:20,902 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:20,937 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:59:21,029 - shap - INFO - np.sum(w_aug) = 14.999999999999996 +2025-01-07 16:59:21,035 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:59:21,242 - shap - INFO - phi = array([-5.93520370e-03, -4.20588682e-02, 5.54532004e-04, 2.76197953e-04, + -5.81728735e-04, 3.85428337e-04, -1.15886504e-04, -3.04831960e-04, + -1.44595317e-03, -3.01651880e-03, -7.04588941e-03, 8.06044454e-05, + 2.60061588e-04, -2.80747192e-04, -7.27595583e-05]) +2025-01-07 16:59:21,347 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:21,352 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:59:21,354 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:21,388 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:59:21,485 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:59:21,489 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:59:21,596 - shap - INFO - phi = array([-4.21957678e-03, -4.54881423e-02, -9.69852919e-05, -2.74234751e-04, + -7.52772605e-04, -1.75421802e-04, -1.31803415e-04, -3.48650059e-04, + -1.88796513e-03, -3.30565502e-03, -3.23721213e-03, -1.38532618e-04, + -1.67335176e-04, -2.76886038e-04, -7.13051571e-05]) +2025-01-07 16:59:21,700 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:21,704 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:21,707 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:21,738 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:21,836 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:21,838 - shap - INFO - np.sum(self.kernelWeights) = 1.0 +2025-01-07 16:59:21,983 - shap - INFO - phi = array([-8.49470131e-06, -4.73175435e-03, -2.49363735e-04, -2.26573575e-04, + -5.12434884e-04, 1.76571339e-04, -1.34617477e-04, -5.40154224e-05, + 2.00991069e-04, 4.04544674e-04, -8.71199040e-05, 0.00000000e+00, + 0.00000000e+00, -6.15920259e-05]) +2025-01-07 16:59:22,089 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:22,092 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:22,095 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:22,125 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:22,236 - shap - INFO - np.sum(w_aug) = 14.000000000000002 +2025-01-07 16:59:22,282 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000004 +2025-01-07 16:59:22,530 - shap - INFO - phi = array([-5.60143797e-05, -4.75514520e-03, -2.78939587e-04, -2.69015284e-04, + -5.35837706e-04, 1.44565213e-04, 1.48929467e-04, -1.15980617e-04, + 7.49251361e-05, 3.86470774e-04, -9.07718759e-05, -7.17143558e-05, + -5.93925579e-05, -1.07170848e-04]) +2025-01-07 16:59:22,612 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:22,615 - shap - INFO - remaining_weight_vector = array([0.25989514, 0.21264148, 0.1871245 , 0.17326343, 0.16707545]) +2025-01-07 16:59:22,619 - shap - INFO - num_paired_subset_sizes = 7 +2025-01-07 16:59:22,652 - shap - INFO - weight_left = 0.4930585722173909 +2025-01-07 16:59:22,757 - shap - INFO - np.sum(w_aug) = 14.999999999999998 +2025-01-07 16:59:22,759 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:22,901 - shap - INFO - phi = array([-1.18426256e-04, -4.74886197e-03, 1.06046820e-04, -2.49446040e-04, + 9.34941097e-05, -5.05344233e-04, -5.94120018e-04, 2.18996834e-04, + 1.67577074e-05, -3.30223795e-03, -2.52131796e-04, -6.04208837e-05, + 1.61476763e-05, -5.66140993e-05, 3.22772649e-05]) +2025-01-07 16:59:23,001 - shap - INFO - num_full_subsets = 2 +2025-01-07 16:59:23,003 - shap - INFO - remaining_weight_vector = array([0.25514743, 0.21049663, 0.18710811, 0.17541386, 0.17183398]) +2025-01-07 16:59:23,006 - shap - INFO - num_paired_subset_sizes = 6 +2025-01-07 16:59:23,039 - shap - INFO - weight_left = 0.47792874825587944 +2025-01-07 16:59:23,145 - shap - INFO - np.sum(w_aug) = 14.0 +2025-01-07 16:59:23,153 - shap - INFO - np.sum(self.kernelWeights) = 1.0000000000000002 +2025-01-07 16:59:23,244 - shap - INFO - phi = array([ 7.17123036e-05, 1.12981811e-03, 3.08594010e-04, 3.09667106e-04, + -4.92928712e-04, 4.47419329e-04, 0.00000000e+00, 3.24089510e-05, + 5.08976312e-04, -2.52382030e-04, 2.40086385e-04, 3.93498054e-05, + 1.35728754e-04, 4.78694781e-05]) diff --git a/tests/reports/report_0/eegalcohol_mcar_CORRELATION.jpg b/tests/reports/report_0/eegalcohol_mcar_CORRELATION.jpg index fee156e..1ffc652 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_CORRELATION.jpg and b/tests/reports/report_0/eegalcohol_mcar_CORRELATION.jpg differ diff --git a/tests/reports/report_0/eegalcohol_mcar_MAE.jpg b/tests/reports/report_0/eegalcohol_mcar_MAE.jpg index 667348a..0480f42 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_MAE.jpg and b/tests/reports/report_0/eegalcohol_mcar_MAE.jpg differ diff --git a/tests/reports/report_0/eegalcohol_mcar_MI.jpg b/tests/reports/report_0/eegalcohol_mcar_MI.jpg index 0e6c9c4..78f7913 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_MI.jpg and b/tests/reports/report_0/eegalcohol_mcar_MI.jpg differ diff --git a/tests/reports/report_0/eegalcohol_mcar_RMSE.jpg b/tests/reports/report_0/eegalcohol_mcar_RMSE.jpg index dfa5076..db22d98 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_RMSE.jpg and b/tests/reports/report_0/eegalcohol_mcar_RMSE.jpg differ diff --git a/tests/reports/report_0/eegalcohol_mcar_contamination_time.jpg b/tests/reports/report_0/eegalcohol_mcar_contamination_time.jpg index 000be56..4181873 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_contamination_time.jpg and b/tests/reports/report_0/eegalcohol_mcar_contamination_time.jpg differ diff --git a/tests/reports/report_0/eegalcohol_mcar_imputation_time.jpg b/tests/reports/report_0/eegalcohol_mcar_imputation_time.jpg index f1a2bbb..29d9cb4 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_imputation_time.jpg and b/tests/reports/report_0/eegalcohol_mcar_imputation_time.jpg differ diff --git a/tests/reports/report_0/eegalcohol_mcar_optimization_time.jpg b/tests/reports/report_0/eegalcohol_mcar_optimization_time.jpg index da4cb7c..4bcdd44 100644 Binary files a/tests/reports/report_0/eegalcohol_mcar_optimization_time.jpg and b/tests/reports/report_0/eegalcohol_mcar_optimization_time.jpg differ diff --git a/tests/reports/report_0/report_eeg-alcohol.txt b/tests/reports/report_0/report_eeg-alcohol.txt index 1666e28..bd111d6 100644 --- a/tests/reports/report_0/report_eeg-alcohol.txt +++ b/tests/reports/report_0/report_eeg-alcohol.txt @@ -1,33 +1,33 @@ -dictionary of results : {'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5197922283008971, 'MAE': 0.4543356516868202, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0005006790161132812, 'optimization': 0, 'imputation': 0.0003077983856201172}}, '0.1': {'scores': {'RMSE': 1.0659202645786816, 'MAE': 0.9085417731383956, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00016045570373535156, 'optimization': 0, 'imputation': 8.916854858398438e-05}}, '0.2': {'scores': {'RMSE': 1.1400385999631493, 'MAE': 0.9394950730289477, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00021266937255859375, 'optimization': 0, 'imputation': 8.797645568847656e-05}}, '0.4': {'scores': {'RMSE': 1.0333061850175014, 'MAE': 0.817160720129779, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0005984306335449219, 'optimization': 0, 'imputation': 8.702278137207031e-05}}, '0.6': {'scores': {'RMSE': 1.0938413270459857, 'MAE': 0.8545290213993658, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017769336700439453, 'optimization': 0, 'imputation': 0.00021004676818847656}}, '0.8': {'scores': {'RMSE': 1.07436956341757, 'MAE': 0.8291370178635111, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017156600952148438, 'optimization': 0, 'imputation': 0.0001709461212158203}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.37483452324301586, 'MAE': 0.3375262694281006, 'MI': 1.0397207708399179, 'CORRELATION': 0.7365655689896633}, 'times': {'contamination': 0.0002193450927734375, 'optimization': 0.38846516609191895, 'imputation': 0.004354953765869141}}, '0.1': {'scores': {'RMSE': 1.3799678230195285, 'MAE': 1.1003322284844623, 'MI': 1.732867951399863, 'CORRELATION': -0.500100644242659}, 'times': {'contamination': 0.0001366138458251953, 'optimization': 0.38846516609191895, 'imputation': 0.0026137828826904297}}, '0.2': {'scores': {'RMSE': 0.5279485898506157, 'MAE': 0.42431581904234256, 'MI': 1.342409426595628, 'CORRELATION': 0.9071070625126642}, 'times': {'contamination': 0.00019049644470214844, 'optimization': 0.38846516609191895, 'imputation': 0.011534452438354492}}, '0.4': {'scores': {'RMSE': 0.6529812630837011, 'MAE': 0.42858056477338186, 'MI': 0.8905639332827393, 'CORRELATION': 0.7722811146383882}, 'times': {'contamination': 0.000324249267578125, 'optimization': 0.38846516609191895, 'imputation': 0.004487276077270508}}, '0.6': {'scores': {'RMSE': 0.6798826891423311, 'MAE': 0.47135122101632737, 'MI': 0.6001676421795947, 'CORRELATION': 0.7742382236368857}, 'times': {'contamination': 0.0006392002105712891, 'optimization': 0.38846516609191895, 'imputation': 0.007712364196777344}}, '0.8': {'scores': {'RMSE': 0.7608485588056992, 'MAE': 0.5479154581689161, 'MI': 0.42721564894947844, 'CORRELATION': 0.7017141157422242}, 'times': {'contamination': 0.001476287841796875, 'optimization': 0.38846516609191895, 'imputation': 0.007761716842651367}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.3251125774837754, 'MAE': 0.26797673641099284, 'MI': 1.0397207708399179, 'CORRELATION': 0.6142581896031455}, 'times': {'contamination': 0.0003612041473388672, 'optimization': 0.20192217826843262, 'imputation': 0.038892269134521484}}, '0.1': {'scores': {'RMSE': 0.299492451492057, 'MAE': 0.26432871720074347, 'MI': 1.9061547465398494, 'CORRELATION': 0.967896575643492}, 'times': {'contamination': 0.0001418590545654297, 'optimization': 0.20192217826843262, 'imputation': 0.04026174545288086}}, '0.2': {'scores': {'RMSE': 0.32852543256899075, 'MAE': 0.27202573018354975, 'MI': 1.5996631161656454, 'CORRELATION': 0.9558373872353643}, 'times': {'contamination': 0.00017714500427246094, 'optimization': 0.20192217826843262, 'imputation': 0.036742210388183594}}, '0.4': {'scores': {'RMSE': 0.4508488005700101, 'MAE': 0.34941433537269606, 'MI': 0.8543113555966528, 'CORRELATION': 0.8959297471926679}, 'times': {'contamination': 0.0003261566162109375, 'optimization': 0.20192217826843262, 'imputation': 0.05383801460266113}}, '0.6': {'scores': {'RMSE': 18.797539991079297, 'MAE': 7.812583796335101, 'MI': 0.36244773022350796, 'CORRELATION': 0.6210142190959098}, 'times': {'contamination': 0.0006361007690429688, 'optimization': 0.20192217826843262, 'imputation': 0.03060746192932129}}, '0.8': {'scores': {'RMSE': 3.1451455567216193, 'MAE': 1.1637520656636082, 'MI': 0.0643204354315137, 'CORRELATION': 0.22737088719870605}, 'times': {'contamination': 0.001417398452758789, 'optimization': 0.20192217826843262, 'imputation': 0.03860116004943848}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.2311363556202525, 'MAE': 0.22809317150257158, 'MI': 0.6931471805599452, 'CORRELATION': 0.8754093900930757}, 'times': {'contamination': 0.0001418590545654297, 'optimization': 0.8676083087921143, 'imputation': 0.008327007293701172}}, '0.1': {'scores': {'RMSE': 0.21734571962767568, 'MAE': 0.20142183555276616, 'MI': 1.4941751382893083, 'CORRELATION': 0.9836625389334559}, 'times': {'contamination': 0.0005736351013183594, 'optimization': 0.8676083087921143, 'imputation': 0.017409324645996094}}, '0.2': {'scores': {'RMSE': 0.2763681623559098, 'MAE': 0.21205899863451294, 'MI': 1.692828654044598, 'CORRELATION': 0.9663556239228223}, 'times': {'contamination': 0.0006301403045654297, 'optimization': 0.8676083087921143, 'imputation': 0.05528140068054199}}, '0.4': {'scores': {'RMSE': 0.32470532661816204, 'MAE': 0.24836184775095202, 'MI': 1.0631520030142667, 'CORRELATION': 0.9435024215665483}, 'times': {'contamination': 0.00033092498779296875, 'optimization': 0.8676083087921143, 'imputation': 0.4662141799926758}}, '0.6': {'scores': {'RMSE': 0.45693859713260937, 'MAE': 0.3350566242376081, 'MI': 0.836724518636222, 'CORRELATION': 0.9015668975756113}, 'times': {'contamination': 0.0018763542175292969, 'optimization': 0.8676083087921143, 'imputation': 1.346935749053955}}, '0.8': {'scores': {'RMSE': 0.7301676007328138, 'MAE': 0.5391664379693699, 'MI': 0.43198783605819785, 'CORRELATION': 0.7329833767632488}, 'times': {'contamination': 0.0029129981994628906, 'optimization': 0.8676083087921143, 'imputation': 1.884641408920288}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 1.8888571092045499, 'MAE': 1.8500318680460206, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00013375282287597656, 'optimization': 41.83157467842102, 'imputation': 11.03073787689209}}, '0.1': {'scores': {'RMSE': 1.5313532904397844, 'MAE': 1.3237393808067868, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0007200241088867188, 'optimization': 41.83157467842102, 'imputation': 10.905119180679321}}, '0.2': {'scores': {'RMSE': 1.3755299921307305, 'MAE': 1.1483280216501393, 'MI': 0.397640786553919, 'CORRELATION': 0.02308952990216339}, 'times': {'contamination': 0.0004470348358154297, 'optimization': 41.83157467842102, 'imputation': 11.329344034194946}}, '0.4': {'scores': {'RMSE': 1.2745248166990786, 'MAE': 1.0870430599973084, 'MI': 0.1572442415087993, 'CORRELATION': -0.13151214992987914}, 'times': {'contamination': 0.0009887218475341797, 'optimization': 41.83157467842102, 'imputation': 14.02935528755188}}, '0.6': {'scores': {'RMSE': 1.0836713249663261, 'MAE': 0.8821860011769823, 'MI': 0.11389826782453707, 'CORRELATION': 0.14049086543856532}, 'times': {'contamination': 0.0020742416381835938, 'optimization': 41.83157467842102, 'imputation': 13.940861463546753}}, '0.8': {'scores': {'RMSE': 1.3072027837327744, 'MAE': 1.0359769879313836, 'MI': 0.07343223617106094, 'CORRELATION': 0.020957442976883907}, 'times': {'contamination': 0.0034284591674804688, 'optimization': 41.83157467842102, 'imputation': 13.018044233322144}}}}}}} +dictionary of results : {'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 0.5197922283008971, 'MAE': 0.4543356516868202, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00595545768737793, 'optimization': 0, 'imputation': 0.00043392181396484375}}, '0.1': {'scores': {'RMSE': 1.0659202645786816, 'MAE': 0.9085417731383956, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0005295276641845703, 'optimization': 0, 'imputation': 0.0002522468566894531}}, '0.2': {'scores': {'RMSE': 1.1400385999631493, 'MAE': 0.9394950730289477, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0006201267242431641, 'optimization': 0, 'imputation': 0.0008230209350585938}}, '0.4': {'scores': {'RMSE': 1.0333061850175014, 'MAE': 0.817160720129779, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.001007080078125, 'optimization': 0, 'imputation': 0.00022459030151367188}}, '0.6': {'scores': {'RMSE': 1.0938413270459857, 'MAE': 0.8545290213993658, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0009987354278564453, 'optimization': 0, 'imputation': 0.00011801719665527344}}, '0.8': {'scores': {'RMSE': 1.07436956341757, 'MAE': 0.8291370178635111, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.003017902374267578, 'optimization': 0, 'imputation': 0.00011444091796875}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.37483452324301586, 'MAE': 0.3375262694281006, 'MI': 1.0397207708399179, 'CORRELATION': 0.7365655689896633}, 'times': {'contamination': 0.00025963783264160156, 'optimization': 0.7803182601928711, 'imputation': 0.0062770843505859375}}, '0.1': {'scores': {'RMSE': 1.3799678230195285, 'MAE': 1.1003322284844623, 'MI': 1.732867951399863, 'CORRELATION': -0.500100644242659}, 'times': {'contamination': 0.0001938343048095703, 'optimization': 0.7803182601928711, 'imputation': 0.003858327865600586}}, '0.2': {'scores': {'RMSE': 0.5279485898506157, 'MAE': 0.42431581904234256, 'MI': 1.342409426595628, 'CORRELATION': 0.9071070625126642}, 'times': {'contamination': 0.0002734661102294922, 'optimization': 0.7803182601928711, 'imputation': 0.0160219669342041}}, '0.4': {'scores': {'RMSE': 0.6529812630837011, 'MAE': 0.42858056477338186, 'MI': 0.8905639332827393, 'CORRELATION': 0.7722811146383882}, 'times': {'contamination': 0.00046372413635253906, 'optimization': 0.7803182601928711, 'imputation': 0.0058972835540771484}}, '0.6': {'scores': {'RMSE': 0.6798826891423311, 'MAE': 0.47135122101632737, 'MI': 0.6001676421795947, 'CORRELATION': 0.7742382236368857}, 'times': {'contamination': 0.0010447502136230469, 'optimization': 0.7803182601928711, 'imputation': 0.004141092300415039}}, '0.8': {'scores': {'RMSE': 0.7608485588056992, 'MAE': 0.5479154581689161, 'MI': 0.42721564894947844, 'CORRELATION': 0.7017141157422242}, 'times': {'contamination': 0.0019161701202392578, 'optimization': 0.7803182601928711, 'imputation': 0.003822803497314453}}}}, 'stmvl': {'bayesian': {'0.05': {'scores': {'RMSE': 0.3251125774837754, 'MAE': 0.26797673641099284, 'MI': 1.0397207708399179, 'CORRELATION': 0.6142581896031455}, 'times': {'contamination': 0.00019931793212890625, 'optimization': 0.2204442024230957, 'imputation': 0.07355213165283203}}, '0.1': {'scores': {'RMSE': 0.299492451492057, 'MAE': 0.26432871720074347, 'MI': 1.9061547465398494, 'CORRELATION': 0.967896575643492}, 'times': {'contamination': 0.00019979476928710938, 'optimization': 0.2204442024230957, 'imputation': 0.04834103584289551}}, '0.2': {'scores': {'RMSE': 0.32852543256899075, 'MAE': 0.27202573018354975, 'MI': 1.5996631161656454, 'CORRELATION': 0.9558373872353643}, 'times': {'contamination': 0.0002765655517578125, 'optimization': 0.2204442024230957, 'imputation': 0.04274296760559082}}, '0.4': {'scores': {'RMSE': 0.4508488005700101, 'MAE': 0.34941433537269606, 'MI': 0.8543113555966528, 'CORRELATION': 0.8959297471926679}, 'times': {'contamination': 0.0004570484161376953, 'optimization': 0.2204442024230957, 'imputation': 0.04534268379211426}}, '0.6': {'scores': {'RMSE': 18.797539991079297, 'MAE': 7.812583796335101, 'MI': 0.36244773022350796, 'CORRELATION': 0.6210142190959098}, 'times': {'contamination': 0.0008726119995117188, 'optimization': 0.2204442024230957, 'imputation': 0.06519889831542969}}, '0.8': {'scores': {'RMSE': 3.1451455567216193, 'MAE': 1.1637520656636082, 'MI': 0.0643204354315137, 'CORRELATION': 0.22737088719870605}, 'times': {'contamination': 0.0024149417877197266, 'optimization': 0.2204442024230957, 'imputation': 0.04286837577819824}}}}, 'iim': {'bayesian': {'0.05': {'scores': {'RMSE': 0.2311363556202525, 'MAE': 0.22809317150257158, 'MI': 0.6931471805599452, 'CORRELATION': 0.8754093900930757}, 'times': {'contamination': 0.00020170211791992188, 'optimization': 1.6157011985778809, 'imputation': 0.026927947998046875}}, '0.1': {'scores': {'RMSE': 0.21734571962767568, 'MAE': 0.20142183555276616, 'MI': 1.4941751382893083, 'CORRELATION': 0.9836625389334559}, 'times': {'contamination': 0.0008146762847900391, 'optimization': 1.6157011985778809, 'imputation': 0.01802802085876465}}, '0.2': {'scores': {'RMSE': 0.2763681623559098, 'MAE': 0.21205899863451294, 'MI': 1.692828654044598, 'CORRELATION': 0.9663556239228223}, 'times': {'contamination': 0.0003628730773925781, 'optimization': 1.6157011985778809, 'imputation': 0.0547788143157959}}, '0.4': {'scores': {'RMSE': 0.32470532661816204, 'MAE': 0.24836184775095202, 'MI': 1.0631520030142667, 'CORRELATION': 0.9435024215665483}, 'times': {'contamination': 0.0004475116729736328, 'optimization': 1.6157011985778809, 'imputation': 0.35022640228271484}}, '0.6': {'scores': {'RMSE': 0.45693859713260937, 'MAE': 0.3350566242376081, 'MI': 0.836724518636222, 'CORRELATION': 0.9015668975756113}, 'times': {'contamination': 0.001636505126953125, 'optimization': 1.6157011985778809, 'imputation': 1.4350597858428955}}, '0.8': {'scores': {'RMSE': 0.7301676007328138, 'MAE': 0.5391664379693699, 'MI': 0.43198783605819785, 'CORRELATION': 0.7329833767632488}, 'times': {'contamination': 0.0025839805603027344, 'optimization': 1.6157011985778809, 'imputation': 2.5229721069335938}}}}, 'mrnn': {'bayesian': {'0.05': {'scores': {'RMSE': 0.7954004036752882, 'MAE': 0.698210953766905, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00024056434631347656, 'optimization': 55.21971392631531, 'imputation': 16.342230558395386}}, '0.1': {'scores': {'RMSE': 2.6936568895656348, 'MAE': 2.489424521179574, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0004248619079589844, 'optimization': 55.21971392631531, 'imputation': 15.168364524841309}}, '0.2': {'scores': {'RMSE': 1.857852634005059, 'MAE': 1.5224319279942484, 'MI': 0.397640786553919, 'CORRELATION': -0.21223871496262464}, 'times': {'contamination': 0.0010728836059570312, 'optimization': 55.21971392631531, 'imputation': 16.22831082344055}}, '0.4': {'scores': {'RMSE': 1.0181578738028223, 'MAE': 0.8619744787865197, 'MI': 0.18313742175518144, 'CORRELATION': 0.18834546499652363}, 'times': {'contamination': 0.0010895729064941406, 'optimization': 55.21971392631531, 'imputation': 11.819524765014648}}, '0.6': {'scores': {'RMSE': 1.2300573444617369, 'MAE': 1.0028692814560194, 'MI': 0.1459222830540006, 'CORRELATION': 0.07062587700867294}, 'times': {'contamination': 0.0009198188781738281, 'optimization': 55.21971392631531, 'imputation': 11.623413324356079}}, '0.8': {'scores': {'RMSE': 1.2392952729487146, 'MAE': 0.9582228155928016, 'MI': 0.06542651966529484, 'CORRELATION': 0.015442327855018706}, 'times': {'contamination': 0.0027763843536376953, 'optimization': 55.21971392631531, 'imputation': 17.062708854675293}}}}}}} -| dataset_value | algorithm_value | optimizer_value | scenario_value | x_value | RMSE | MAE | MI | CORRELATION | time_contamination | time_optimization | time_imputation | -| eegalcohol | mcar | mean | bayesian | 0.05 | 0.5197922283008971 | 0.4543356516868202 | 0.0 | 0 | 0.0005006790161132812 sec | 0 sec| 0.0003077983856201172 sec | -| eegalcohol | mcar | mean | bayesian | 0.1 | 1.0659202645786816 | 0.9085417731383956 | 0.0 | 0 | 0.00016045570373535156 sec | 0 sec| 8.916854858398438e-05 sec | -| eegalcohol | mcar | mean | bayesian | 0.2 | 1.1400385999631493 | 0.9394950730289477 | 0.0 | 0 | 0.00021266937255859375 sec | 0 sec| 8.797645568847656e-05 sec | -| eegalcohol | mcar | mean | bayesian | 0.4 | 1.0333061850175014 | 0.817160720129779 | 0.0 | 0 | 0.0005984306335449219 sec | 0 sec| 8.702278137207031e-05 sec | -| eegalcohol | mcar | mean | bayesian | 0.6 | 1.0938413270459857 | 0.8545290213993658 | 0.0 | 0 | 0.0017769336700439453 sec | 0 sec| 0.00021004676818847656 sec | -| eegalcohol | mcar | mean | bayesian | 0.8 | 1.07436956341757 | 0.8291370178635111 | 0.0 | 0 | 0.0017156600952148438 sec | 0 sec| 0.0001709461212158203 sec | -| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.37483452324301586 | 0.3375262694281006 | 1.0397207708399179 | 0.7365655689896633 | 0.0002193450927734375 sec | 0.38846516609191895 sec| 0.004354953765869141 sec | -| eegalcohol | mcar | cdrec | bayesian | 0.1 | 1.3799678230195285 | 1.1003322284844623 | 1.732867951399863 | -0.500100644242659 | 0.0001366138458251953 sec | 0.38846516609191895 sec| 0.0026137828826904297 sec | -| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.5279485898506157 | 0.42431581904234256 | 1.342409426595628 | 0.9071070625126642 | 0.00019049644470214844 sec | 0.38846516609191895 sec| 0.011534452438354492 sec | -| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.6529812630837011 | 0.42858056477338186 | 0.8905639332827393 | 0.7722811146383882 | 0.000324249267578125 sec | 0.38846516609191895 sec| 0.004487276077270508 sec | -| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.6798826891423311 | 0.47135122101632737 | 0.6001676421795947 | 0.7742382236368857 | 0.0006392002105712891 sec | 0.38846516609191895 sec| 0.007712364196777344 sec | -| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.7608485588056992 | 0.5479154581689161 | 0.42721564894947844 | 0.7017141157422242 | 0.001476287841796875 sec | 0.38846516609191895 sec| 0.007761716842651367 sec | -| eegalcohol | mcar | stmvl | bayesian | 0.05 | 0.3251125774837754 | 0.26797673641099284 | 1.0397207708399179 | 0.6142581896031455 | 0.0003612041473388672 sec | 0.20192217826843262 sec| 0.038892269134521484 sec | -| eegalcohol | mcar | stmvl | bayesian | 0.1 | 0.299492451492057 | 0.26432871720074347 | 1.9061547465398494 | 0.967896575643492 | 0.0001418590545654297 sec | 0.20192217826843262 sec| 0.04026174545288086 sec | -| eegalcohol | mcar | stmvl | bayesian | 0.2 | 0.32852543256899075 | 0.27202573018354975 | 1.5996631161656454 | 0.9558373872353643 | 0.00017714500427246094 sec | 0.20192217826843262 sec| 0.036742210388183594 sec | -| eegalcohol | mcar | stmvl | bayesian | 0.4 | 0.4508488005700101 | 0.34941433537269606 | 0.8543113555966528 | 0.8959297471926679 | 0.0003261566162109375 sec | 0.20192217826843262 sec| 0.05383801460266113 sec | -| eegalcohol | mcar | stmvl | bayesian | 0.6 | 18.797539991079297 | 7.812583796335101 | 0.36244773022350796 | 0.6210142190959098 | 0.0006361007690429688 sec | 0.20192217826843262 sec| 0.03060746192932129 sec | -| eegalcohol | mcar | stmvl | bayesian | 0.8 | 3.1451455567216193 | 1.1637520656636082 | 0.0643204354315137 | 0.22737088719870605 | 0.001417398452758789 sec | 0.20192217826843262 sec| 0.03860116004943848 sec | -| eegalcohol | mcar | iim | bayesian | 0.05 | 0.2311363556202525 | 0.22809317150257158 | 0.6931471805599452 | 0.8754093900930757 | 0.0001418590545654297 sec | 0.8676083087921143 sec| 0.008327007293701172 sec | -| eegalcohol | mcar | iim | bayesian | 0.1 | 0.21734571962767568 | 0.20142183555276616 | 1.4941751382893083 | 0.9836625389334559 | 0.0005736351013183594 sec | 0.8676083087921143 sec| 0.017409324645996094 sec | -| eegalcohol | mcar | iim | bayesian | 0.2 | 0.2763681623559098 | 0.21205899863451294 | 1.692828654044598 | 0.9663556239228223 | 0.0006301403045654297 sec | 0.8676083087921143 sec| 0.05528140068054199 sec | -| eegalcohol | mcar | iim | bayesian | 0.4 | 0.32470532661816204 | 0.24836184775095202 | 1.0631520030142667 | 0.9435024215665483 | 0.00033092498779296875 sec | 0.8676083087921143 sec| 0.4662141799926758 sec | -| eegalcohol | mcar | iim | bayesian | 0.6 | 0.45693859713260937 | 0.3350566242376081 | 0.836724518636222 | 0.9015668975756113 | 0.0018763542175292969 sec | 0.8676083087921143 sec| 1.346935749053955 sec | -| eegalcohol | mcar | iim | bayesian | 0.8 | 0.7301676007328138 | 0.5391664379693699 | 0.43198783605819785 | 0.7329833767632488 | 0.0029129981994628906 sec | 0.8676083087921143 sec| 1.884641408920288 sec | -| eegalcohol | mcar | mrnn | bayesian | 0.05 | 1.8888571092045499 | 1.8500318680460206 | 0.0 | 0 | 0.00013375282287597656 sec | 41.83157467842102 sec| 11.03073787689209 sec | -| eegalcohol | mcar | mrnn | bayesian | 0.1 | 1.5313532904397844 | 1.3237393808067868 | 0.0 | 0 | 0.0007200241088867188 sec | 41.83157467842102 sec| 10.905119180679321 sec | -| eegalcohol | mcar | mrnn | bayesian | 0.2 | 1.3755299921307305 | 1.1483280216501393 | 0.397640786553919 | 0.02308952990216339 | 0.0004470348358154297 sec | 41.83157467842102 sec| 11.329344034194946 sec | -| eegalcohol | mcar | mrnn | bayesian | 0.4 | 1.2745248166990786 | 1.0870430599973084 | 0.1572442415087993 | -0.13151214992987914 | 0.0009887218475341797 sec | 41.83157467842102 sec| 14.02935528755188 sec | -| eegalcohol | mcar | mrnn | bayesian | 0.6 | 1.0836713249663261 | 0.8821860011769823 | 0.11389826782453707 | 0.14049086543856532 | 0.0020742416381835938 sec | 41.83157467842102 sec| 13.940861463546753 sec | -| eegalcohol | mcar | mrnn | bayesian | 0.8 | 1.3072027837327744 | 1.0359769879313836 | 0.07343223617106094 | 0.020957442976883907 | 0.0034284591674804688 sec | 41.83157467842102 sec| 13.018044233322144 sec | +| dataset_value | algorithm_value | optimizer_value | pattern_value | x_value | RMSE | MAE | MI | CORRELATION | time_contamination | time_optimization | time_imputation | +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.5197922283008971 | 0.4543356516868202 | 0.0 | 0 | 0.00595545768737793 sec | 0 sec| 0.00043392181396484375 sec | +| eegalcohol | mcar | mean | bayesian | 0.1 | 1.0659202645786816 | 0.9085417731383956 | 0.0 | 0 | 0.0005295276641845703 sec | 0 sec| 0.0002522468566894531 sec | +| eegalcohol | mcar | mean | bayesian | 0.2 | 1.1400385999631493 | 0.9394950730289477 | 0.0 | 0 | 0.0006201267242431641 sec | 0 sec| 0.0008230209350585938 sec | +| eegalcohol | mcar | mean | bayesian | 0.4 | 1.0333061850175014 | 0.817160720129779 | 0.0 | 0 | 0.001007080078125 sec | 0 sec| 0.00022459030151367188 sec | +| eegalcohol | mcar | mean | bayesian | 0.6 | 1.0938413270459857 | 0.8545290213993658 | 0.0 | 0 | 0.0009987354278564453 sec | 0 sec| 0.00011801719665527344 sec | +| eegalcohol | mcar | mean | bayesian | 0.8 | 1.07436956341757 | 0.8291370178635111 | 0.0 | 0 | 0.003017902374267578 sec | 0 sec| 0.00011444091796875 sec | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.37483452324301586 | 0.3375262694281006 | 1.0397207708399179 | 0.7365655689896633 | 0.00025963783264160156 sec | 0.7803182601928711 sec| 0.0062770843505859375 sec | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 1.3799678230195285 | 1.1003322284844623 | 1.732867951399863 | -0.500100644242659 | 0.0001938343048095703 sec | 0.7803182601928711 sec| 0.003858327865600586 sec | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.5279485898506157 | 0.42431581904234256 | 1.342409426595628 | 0.9071070625126642 | 0.0002734661102294922 sec | 0.7803182601928711 sec| 0.0160219669342041 sec | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.6529812630837011 | 0.42858056477338186 | 0.8905639332827393 | 0.7722811146383882 | 0.00046372413635253906 sec | 0.7803182601928711 sec| 0.0058972835540771484 sec | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.6798826891423311 | 0.47135122101632737 | 0.6001676421795947 | 0.7742382236368857 | 0.0010447502136230469 sec | 0.7803182601928711 sec| 0.004141092300415039 sec | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.7608485588056992 | 0.5479154581689161 | 0.42721564894947844 | 0.7017141157422242 | 0.0019161701202392578 sec | 0.7803182601928711 sec| 0.003822803497314453 sec | +| eegalcohol | mcar | stmvl | bayesian | 0.05 | 0.3251125774837754 | 0.26797673641099284 | 1.0397207708399179 | 0.6142581896031455 | 0.00019931793212890625 sec | 0.2204442024230957 sec| 0.07355213165283203 sec | +| eegalcohol | mcar | stmvl | bayesian | 0.1 | 0.299492451492057 | 0.26432871720074347 | 1.9061547465398494 | 0.967896575643492 | 0.00019979476928710938 sec | 0.2204442024230957 sec| 0.04834103584289551 sec | +| eegalcohol | mcar | stmvl | bayesian | 0.2 | 0.32852543256899075 | 0.27202573018354975 | 1.5996631161656454 | 0.9558373872353643 | 0.0002765655517578125 sec | 0.2204442024230957 sec| 0.04274296760559082 sec | +| eegalcohol | mcar | stmvl | bayesian | 0.4 | 0.4508488005700101 | 0.34941433537269606 | 0.8543113555966528 | 0.8959297471926679 | 0.0004570484161376953 sec | 0.2204442024230957 sec| 0.04534268379211426 sec | +| eegalcohol | mcar | stmvl | bayesian | 0.6 | 18.797539991079297 | 7.812583796335101 | 0.36244773022350796 | 0.6210142190959098 | 0.0008726119995117188 sec | 0.2204442024230957 sec| 0.06519889831542969 sec | +| eegalcohol | mcar | stmvl | bayesian | 0.8 | 3.1451455567216193 | 1.1637520656636082 | 0.0643204354315137 | 0.22737088719870605 | 0.0024149417877197266 sec | 0.2204442024230957 sec| 0.04286837577819824 sec | +| eegalcohol | mcar | iim | bayesian | 0.05 | 0.2311363556202525 | 0.22809317150257158 | 0.6931471805599452 | 0.8754093900930757 | 0.00020170211791992188 sec | 1.6157011985778809 sec| 0.026927947998046875 sec | +| eegalcohol | mcar | iim | bayesian | 0.1 | 0.21734571962767568 | 0.20142183555276616 | 1.4941751382893083 | 0.9836625389334559 | 0.0008146762847900391 sec | 1.6157011985778809 sec| 0.01802802085876465 sec | +| eegalcohol | mcar | iim | bayesian | 0.2 | 0.2763681623559098 | 0.21205899863451294 | 1.692828654044598 | 0.9663556239228223 | 0.0003628730773925781 sec | 1.6157011985778809 sec| 0.0547788143157959 sec | +| eegalcohol | mcar | iim | bayesian | 0.4 | 0.32470532661816204 | 0.24836184775095202 | 1.0631520030142667 | 0.9435024215665483 | 0.0004475116729736328 sec | 1.6157011985778809 sec| 0.35022640228271484 sec | +| eegalcohol | mcar | iim | bayesian | 0.6 | 0.45693859713260937 | 0.3350566242376081 | 0.836724518636222 | 0.9015668975756113 | 0.001636505126953125 sec | 1.6157011985778809 sec| 1.4350597858428955 sec | +| eegalcohol | mcar | iim | bayesian | 0.8 | 0.7301676007328138 | 0.5391664379693699 | 0.43198783605819785 | 0.7329833767632488 | 0.0025839805603027344 sec | 1.6157011985778809 sec| 2.5229721069335938 sec | +| eegalcohol | mcar | mrnn | bayesian | 0.05 | 0.7954004036752882 | 0.698210953766905 | 0.0 | 0 | 0.00024056434631347656 sec | 55.21971392631531 sec| 16.342230558395386 sec | +| eegalcohol | mcar | mrnn | bayesian | 0.1 | 2.6936568895656348 | 2.489424521179574 | 0.0 | 0 | 0.0004248619079589844 sec | 55.21971392631531 sec| 15.168364524841309 sec | +| eegalcohol | mcar | mrnn | bayesian | 0.2 | 1.857852634005059 | 1.5224319279942484 | 0.397640786553919 | -0.21223871496262464 | 0.0010728836059570312 sec | 55.21971392631531 sec| 16.22831082344055 sec | +| eegalcohol | mcar | mrnn | bayesian | 0.4 | 1.0181578738028223 | 0.8619744787865197 | 0.18313742175518144 | 0.18834546499652363 | 0.0010895729064941406 sec | 55.21971392631531 sec| 11.819524765014648 sec | +| eegalcohol | mcar | mrnn | bayesian | 0.6 | 1.2300573444617369 | 1.0028692814560194 | 0.1459222830540006 | 0.07062587700867294 | 0.0009198188781738281 sec | 55.21971392631531 sec| 11.623413324356079 sec | +| eegalcohol | mcar | mrnn | bayesian | 0.8 | 1.2392952729487146 | 0.9582228155928016 | 0.06542651966529484 | 0.015442327855018706 | 0.0027763843536376953 sec | 55.21971392631531 sec| 17.062708854675293 sec | diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index 565af3a..9225256 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -1,5 +1,5 @@ import unittest -from imputegap.recovery.benchmarking import Benchmarking +from imputegap.recovery.benchmark import Benchmark class TestBenchmarking(unittest.TestCase): @@ -9,16 +9,16 @@ def test_benchmarking(self): """ expected_datasets = ["eeg-alcohol"] - opti_bayesian = {"optimizer": "bayesian", "options": {"n_calls": 2, "n_random_starts": 50, "acq_func": "gp_hedge", "selected_metrics": "RMSE"}} + opti_bayesian = {"optimizer": "bayesian", "options": {"n_calls": 2, "n_random_starts": 50, "acq_func": "gp_hedge", "metrics": "RMSE"}} optimizers = [opti_bayesian] algorithms_full = ["mean", "cdrec", "stmvl", "iim", "mrnn"] - scenarios_small = ["mcar"] + patterns_small = ["mcar"] x_axis = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8] - results_benchmarking = Benchmarking().comprehensive_evaluation(datasets=expected_datasets, optimizers=optimizers, algorithms=algorithms_full, scenarios=scenarios_small, x_axis=x_axis, already_optimized=False, reports=-1) + results_benchmarking = Benchmark().eval(datasets=expected_datasets, optimizers=optimizers, algorithms=algorithms_full, patterns=patterns_small, x_axis=x_axis, already_optimized=False, runs=-1) expected_datasets = ["eegalcohol"] @@ -34,7 +34,7 @@ def test_benchmarking(self): if not dataset_data: # If dataset is empty, skip validation continue - # Check that scenarios exist (e.g., 'mcar') + # Check that patterns exist (e.g., 'mcar') self.assertIn( "mcar", dataset_data, f"Dataset '{dataset}' is missing 'mcar' scenario." @@ -156,7 +156,7 @@ def test_benchmarking_matrix(self): "iim": {"bayesian": { "0.05": {"scores": {"RMSE": -100, "MAE": 1, "MI": 2, "CORRELATION": 3}, "times": {"contamination": 4, "optimization": 5, "imputation": 6}}, "0.2": {"scores": {"RMSE": 1, "MAE": 2, "MI": 3, "CORRELATION": 4}, "times": {"contamination": 5, "optimization": 6, "imputation": 7}}, "0.4": {"scores": {"RMSE": 0.5, "MAE": 1.5, "MI": 2.5, "CORRELATION": 3.5}, "times": {"contamination": 4.5, "optimization": 5.5, "imputation": 6.5}}, "0.6": {"scores": {"RMSE": 0.5, "MAE": 1.5, "MI": 2.5, "CORRELATION": 3.5}, "times": {"contamination": 4.5, "optimization": 5.5, "imputation": 6.5}}, "0.8": {"scores": {"RMSE": 0.5, "MAE": 1.5, "MI": 2.5, "CORRELATION": 3.5}, "times": {"contamination": 4.5, "optimization": 5.5, "imputation": 6.5}}}}, }}} - scores_list, algos, sets = Benchmarking().avg_results(alpha_1, alpha_2, beta_1, beta_2, delta_1, delta_2, epsilon_1, epsilon_2, gamma_1, gamma_2) + scores_list, algos, sets = Benchmark().avg_results(alpha_1, alpha_2, beta_1, beta_2, delta_1, delta_2, epsilon_1, epsilon_2, gamma_1, gamma_2) print(scores_list) @@ -195,6 +195,6 @@ def test_benchmarking_matrix(self): f"Unexpected RMSE for algorithm '{algo}' at dataset index {i}." ) - validation = Benchmarking().generate_matrix(scores_list, algos, sets, "./reports", False) + validation = Benchmark().generate_heatmap(scores_list, algos, sets, "./reports", False) self.assertTrue(validation) diff --git a/tests/test_contamination_blackout.py b/tests/test_contamination_blackout.py index 7a1602f..6125a29 100644 --- a/tests/test_contamination_blackout.py +++ b/tests/test_contamination_blackout.py @@ -14,15 +14,15 @@ def test_blackout_selection(self): ts_1.load_timeseries(utils.search_path("test")) missing_rates = [0.4, 1] - protection = 0.1 + offset = 0.1 M, N = ts_1.data.shape for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.blackout(ts=ts_1.data, missing_rate=missing_rate, protection=protection) + ts_contaminate = ts_1.Contamination.blackout(input_data=ts_1.data, missing_rate=missing_rate, offset=offset) n_nan = np.isnan(ts_contaminate).sum() expected_nan_series = M - expected_nan_values = int((N - int(N * protection)) * missing_rate) + expected_nan_values = int((N - int(N * offset)) * missing_rate) expected = expected_nan_series * expected_nan_values self.assertEqual(n_nan, expected, f"Expected {expected} contaminated series but found {n_nan}") @@ -39,7 +39,7 @@ def test_blackout_position(self): for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.blackout(ts=ts_1.data, missing_rate=missing_rate, protection=0.1) + ts_contaminate = ts_1.Contamination.blackout(input_data=ts_1.data, missing_rate=missing_rate, offset=0.1) if np.isnan(ts_contaminate[:, :ten_percent_index]).any(): check_position = False diff --git a/tests/test_contamination_mcar.py b/tests/test_contamination_mcar.py index e9fbd17..929ac48 100644 --- a/tests/test_contamination_mcar.py +++ b/tests/test_contamination_mcar.py @@ -16,35 +16,33 @@ def test_mcar_selection(self): series_impacted = [0.4] missing_rates = [40] - seeds_start, seeds_end = 42, 43 series_check = ["8", "1", "5", "0"] - protection = 0.1 + offset = 0.1 block_size = 2 - for seed_value in range(seeds_start, seeds_end): - for series_sel in series_impacted: - for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.mcar(ts=ts_1.data, - series_impacted=series_sel, - missing_rate=missing_rate, block_size=block_size, - protection=protection, use_seed=True, - seed=seed_value) + for series_sel in series_impacted: + for missing_rate in missing_rates: - check_nan_series = False + ts_contaminate = ts_1.Contamination.mcar(input_data=ts_1.data, + series_rate=series_sel, + missing_rate=missing_rate, block_size=block_size, + offset=offset, seed=True) - for series, data in enumerate(ts_contaminate): - if str(series) in series_check: - if np.isnan(data).any(): - check_nan_series = True + check_nan_series = False + + for series, data in enumerate(ts_contaminate): + if str(series) in series_check: + if np.isnan(data).any(): + check_nan_series = True + else: + if np.isnan(data).any(): + check_nan_series = False + break else: - if np.isnan(data).any(): - check_nan_series = False - break - else: - check_nan_series = True + check_nan_series = True - self.assertTrue(check_nan_series, True) + self.assertTrue(check_nan_series, True) def test_mcar_position(self): """ @@ -56,24 +54,22 @@ def test_mcar_position(self): series_impacted = [0.4, 1] missing_rates = [0.1, 0.4, 0.6] ten_percent_index = int(ts_1.data.shape[1] * 0.1) - seeds_start, seeds_end = 42, 43 - for seed_value in range(seeds_start, seeds_end): - for series_sel in series_impacted: - for missing_rate in missing_rates: + for series_sel in series_impacted: + for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.mcar(ts=ts_1.data, - series_impacted=series_sel, - missing_rate=missing_rate, - block_size=2, protection=0.1, - use_seed=True, seed=seed_value) + ts_contaminate = ts_1.Contamination.mcar(input_data=ts_1.data, + series_rate=series_sel, + missing_rate=missing_rate, + block_size=2, offset=0.1, + seed=True) - if np.isnan(ts_contaminate[:, :ten_percent_index]).any(): - check_position = False - else: - check_position = True + if np.isnan(ts_contaminate[:, :ten_percent_index]).any(): + check_position = False + else: + check_position = True - self.assertTrue(check_position, True) + self.assertTrue(check_position, True) def test_mcar_selection_datasets(self): """ @@ -82,37 +78,36 @@ def test_mcar_selection_datasets(self): datasets = ["bafu", "chlorine", "climate", "drift", "meteo"] series_impacted = [0.4, 1] missing_rates = [0.2, 0.6] - seeds_start, seeds_end = 42, 43 - protection = 0.1 + offset = 0.1 block_size = 10 for dataset in datasets: ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset)) - for seed_value in range(seeds_start, seeds_end): - for series_sel in series_impacted: - for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.mcar(ts=ts_1.data, - missing_rate=missing_rate, - series_impacted=series_sel, - block_size=block_size, protection=protection, - use_seed=True, seed=seed_value) - - # 1) Check if the number of NaN values is correct - M, N = ts_contaminate.shape - P = int(N * protection) - W = int((N - P) * missing_rate) - expected_contaminated_series = int(np.ceil(M * series_sel)) - B = int(W / block_size) - total_expected = (B * block_size) * expected_contaminated_series - total_nan = np.isnan(ts_contaminate).sum() - - self.assertEqual(total_nan, total_expected) - - # 2) Check if the correct percentage of series are contaminated - contaminated_series = np.isnan(ts_contaminate).any(axis=1).sum() - self.assertEqual(contaminated_series, expected_contaminated_series, f"Expected {expected_contaminated_series} contaminated series but found {contaminated_series}") + + for series_sel in series_impacted: + for missing_rate in missing_rates: + ts_contaminate = ts_1.Contamination.mcar(input_data=ts_1.data, + missing_rate=missing_rate, + series_rate=series_sel, + block_size=block_size, offset=offset, + seed=True) + + # 1) Check if the number of NaN values is correct + M, N = ts_contaminate.shape + P = int(N * offset) + W = int((N - P) * missing_rate) + expected_contaminated_series = int(np.ceil(M * series_sel)) + B = int(W / block_size) + total_expected = (B * block_size) * expected_contaminated_series + total_nan = np.isnan(ts_contaminate).sum() + + self.assertEqual(total_nan, total_expected) + + # 2) Check if the correct percentage of series are contaminated + contaminated_series = np.isnan(ts_contaminate).any(axis=1).sum() + self.assertEqual(contaminated_series, expected_contaminated_series, f"Expected {expected_contaminated_series} contaminated series but found {contaminated_series}") def test_mcar_position_datasets(self): """ @@ -121,8 +116,7 @@ def test_mcar_position_datasets(self): datasets = ["bafu", "chlorine", "climate", "drift", "meteo"] series_impacted = [0.4, 1] missing_rates = [0.2, 0.6] - seeds_start, seeds_end = 42, 43 - protection = 0.1 + offset = 0.1 block_size = 10 for dataset in datasets: @@ -130,22 +124,21 @@ def test_mcar_position_datasets(self): ts_1.load_timeseries(utils.search_path(dataset)) ten_percent_index = int(ts_1.data.shape[1] * 0.1) - for seed_value in range(seeds_start, seeds_end): - for series_sel in series_impacted: - for missing_rate in missing_rates: + for series_sel in series_impacted: + for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.mcar(ts=ts_1.data, - series_impacted=series_sel, - missing_rate=missing_rate, - block_size=block_size, protection=protection, - use_seed=True, seed=seed_value) + ts_contaminate = ts_1.Contamination.mcar(input_data=ts_1.data, + series_rate=series_sel, + missing_rate=missing_rate, + block_size=block_size, offset=offset, + seed=True) - if np.isnan(ts_contaminate[:, :ten_percent_index]).any(): - check_position = False - else: - check_position = True + if np.isnan(ts_contaminate[:, :ten_percent_index]).any(): + check_position = False + else: + check_position = True - self.assertTrue(check_position, True) + self.assertTrue(check_position, True) def test_contaminate_plot(self): """ @@ -155,11 +148,11 @@ def test_contaminate_plot(self): ts_1.load_timeseries(utils.search_path("chlorine")) ts_2 = TimeSeries() - ts_2.import_matrix(ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.1, - block_size=10, protection=0.1, use_seed=True, seed=42)) + ts_2.import_matrix(ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.1, + block_size=10, offset=0.1, seed=True)) ts_1.print() - filepath = ts_1.plot(raw_data=ts_1.data, infected_data=ts_2.data, max_series=10, max_values=100, save_path="./assets/", display=False) + filepath = ts_1.plot(input_data=ts_1.data, incomp_data=ts_2.data, max_series=10, max_values=100, save_path="./assets/", display=False) self.assertTrue(os.path.exists(filepath)) def test_mcar_size_of_block(self): @@ -169,46 +162,44 @@ def test_mcar_size_of_block(self): datasets = ["drift", "chlorine", "eeg-reading", "eeg-alcohol", "fmri-objectviewing", "fmri-stoptask"] series_impacted = [0.4, 1] missing_rates = [0.2, 0.6] - seeds_start, seeds_end = 42, 43 - protection = 0.1 + offset = 0.1 block_size = 10 for dataset in datasets: ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset)) - for seed_value in range(seeds_start, seeds_end): - for series_sel in series_impacted: - for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.mcar(ts=ts_1.data, - missing_rate=missing_rate, - series_impacted=series_sel, - block_size=block_size, protection=protection, - use_seed=True, seed=seed_value) - - for i, series in enumerate(ts_contaminate): - nan_blocks = [] - block_indices = [] - current_block_size = 0 - series_size = len(series) - lower_bound = int(protection * series_size) + block_size - upper_bound = series_size - lower_bound - block_size - protected_indices = set(range(0, lower_bound)) | set(range(upper_bound, series_size)) - - # Find NaN blocks and their indices - for index, value in enumerate(series): - if np.isnan(value): - current_block_size += 1 - block_indices.append(index) - else: - if current_block_size > 0: - if not any(i in protected_indices for i in block_indices): - nan_blocks.append(current_block_size) - current_block_size = 0 - block_indices = [] - - for block in nan_blocks: - assert block >= block_size, ( - f"Dataset: {dataset}, Series: {i}, Seed: {seed_value}, " - f"Block size {block} found, expected at least {block_size}." - ) \ No newline at end of file + for series_sel in series_impacted: + for missing_rate in missing_rates: + ts_contaminate = ts_1.Contamination.mcar(input_data=ts_1.data, + missing_rate=missing_rate, + series_rate=series_sel, + block_size=block_size, offset=offset, + seed=True) + + for i, series in enumerate(ts_contaminate): + nan_blocks = [] + block_indices = [] + current_block_size = 0 + series_size = len(series) + lower_bound = int(offset * series_size) + block_size + upper_bound = series_size - lower_bound - block_size + protected_indices = set(range(0, lower_bound)) | set(range(upper_bound, series_size)) + + # Find NaN blocks and their indices + for index, value in enumerate(series): + if np.isnan(value): + current_block_size += 1 + block_indices.append(index) + else: + if current_block_size > 0: + if not any(i in protected_indices for i in block_indices): + nan_blocks.append(current_block_size) + current_block_size = 0 + block_indices = [] + + for block in nan_blocks: + assert block >= block_size, ( + f"Dataset: {dataset}, Series: {i}, Seed: {seed_value}, " + f"Block size {block} found, expected at least {block_size}." + ) \ No newline at end of file diff --git a/tests/test_contamination_mp.py b/tests/test_contamination_mp.py index 2dee4f2..c8c39ce 100644 --- a/tests/test_contamination_mp.py +++ b/tests/test_contamination_mp.py @@ -15,7 +15,7 @@ def test_mp_selection(self): datasets = ["drift", "chlorine", "eeg-alcohol", "fmri-objectviewing", "fmri-stoptask"] series_impacted = [0.1, 0.5, 1] # percentage of series impacted missing_rates = [0.1, 0.5, 1] # percentage of missing values with NaN - P = 0.1 # protection zone + P = 0.1 # offset zone for dataset in datasets: ts = TimeSeries() @@ -24,9 +24,9 @@ def test_mp_selection(self): for S in series_impacted: for R in missing_rates: - contamination = ts.Contaminate.missing_percentage(ts=ts.data, series_impacted=S, missing_rate=R, protection=P) + incomp_data = ts.Contamination.missing_percentage(input_data=ts.data, series_rate=S, missing_rate=R, offset=P) - n_nan = np.isnan(contamination).sum() + n_nan = np.isnan(incomp_data).sum() expected_nan_series = math.ceil(S * M) expected_nan_values = int((N - int(N * P)) * R) expected_nan = expected_nan_series * expected_nan_values @@ -47,9 +47,9 @@ def test_mp_position(self): for series_sel in series_impacted: for missing_rate in missing_rates: - ts_contaminate = ts_1.Contaminate.missing_percentage(ts=ts_1.data, - series_impacted=series_sel, - missing_rate=missing_rate, protection=0.1) + ts_contaminate = ts_1.Contamination.missing_percentage(input_data=ts_1.data, + series_rate=series_sel, + missing_rate=missing_rate, offset=0.1) if np.isnan(ts_contaminate[:, :ten_percent_index]).any(): check_position = False diff --git a/tests/test_exception.py b/tests/test_exception.py index 5fdbf4b..c5e03fe 100644 --- a/tests/test_exception.py +++ b/tests/test_exception.py @@ -16,15 +16,15 @@ def test_algorithm_exc(self): """ algorithm = "invalid_algo" with pytest.raises(ValueError, match=f"Invalid algorithm: {algorithm}"): - Imputation.evaluate_params(ground_truth=None, contamination=None, configuration=tuple(), algorithm=algorithm) + Imputation.evaluate_params(input_data=None, incomp_data=None, configuration=tuple(), algorithm=algorithm) def test_data_exc(self): """ - The goal is to test the exception raised when ground_truth (raw_data) is None + The goal is to test the exception raised when input_data (raw_data) is None """ - raw_data = None # Simulate a scenario where raw_data is None - with pytest.raises(ValueError, match=f"Need ground_truth to be able to adapt the hyper-parameters: {raw_data}"): - _ = Imputation.MatrixCompletion.CDRec(None).impute(user_defined=False, params={"ground_truth":raw_data, "optimizer": "bayesian", "options":{"n_calls": 2}}) + input_data = None # Simulate a scenario where raw_data is None + with pytest.raises(ValueError, match=f"Need input_data to be able to adapt the hyper-parameters: {input_data}"): + _ = Imputation.MatrixCompletion.CDRec(None).impute(user_def=False, params={"input_data":input_data, "optimizer": "bayesian", "options":{"n_calls": 2}}) def test_import_exc(self): @@ -49,7 +49,7 @@ def test_mcar_exc(self): with pytest.raises(ValueError, match="The number of block to remove must be greater than 0. " "The dataset or the number of blocks may not be appropriate."): # Call the function or method that raises the ValueError - ts_01.Contaminate.mcar(ts=np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), block_size=5) + ts_01.Contamination.mcar(input_data=np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), block_size=5) def test_percentage_exc(self): """ @@ -59,7 +59,7 @@ def test_percentage_exc(self): percentage = 120 with pytest.raises(ValueError, match=f"The percentage is out of the acceptable range."): - ts_01.Contaminate.mcar(ts=np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), missing_rate=percentage) + ts_01.Contamination.mcar(input_data=np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), missing_rate=percentage) def test_load_exc(self): diff --git a/tests/test_explainer.py b/tests/test_explainer.py index 4af7072..cfdc041 100644 --- a/tests/test_explainer.py +++ b/tests/test_explainer.py @@ -24,7 +24,7 @@ def test_explainer_shap(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(filename)) - shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, file_name=filename, use_seed=True, seed=42, verbose=True) + shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, file_name=filename, seed=True, verbose=True) self.assertTrue(shap_values is not None) self.assertTrue(shap_details is not None) diff --git a/tests/test_imputation.py b/tests/test_imputation.py index 9894c68..50fa6dc 100644 --- a/tests/test_imputation.py +++ b/tests/test_imputation.py @@ -13,17 +13,17 @@ def test_imputation_min(self): ts_1 = TimeSeries() ts_1.import_matrix(np.array([[1, 2, 1], [4, 2, 6]])) - contamination = np.array([[1, 2, np.nan], [4, np.nan, 6]]) + incomp_data = np.array([[1, 2, np.nan], [4, np.nan, 6]]) - algo = Imputation.Statistics.MinImpute(contamination) + algo = Imputation.Statistics.MinImpute(incomp_data) algo.impute() algo.score(ts_1.data) result = np.array([[1, 2, 1], [4, 1, 6]]) - imputation, _ = algo.imputed_matrix, algo.metrics + recov_data, _ = algo.recov_data, algo.metrics - assert np.all(np.isclose(imputation, result)), f"imputation: expected {result}, got {imputation}" + assert np.all(np.isclose(recov_data, result)), f"imputation: expected {result}, got {recov_data}" def test_imputation_zero(self): """ @@ -32,17 +32,17 @@ def test_imputation_zero(self): ts_1 = TimeSeries() ts_1.import_matrix(np.array([[1, 2, 1], [4, 2, 6]])) - contamination = np.array([[1, 2, np.nan], [4, np.nan, 6]]) + incomp_data = np.array([[1, 2, np.nan], [4, np.nan, 6]]) - algo = Imputation.Statistics.ZeroImpute(contamination) + algo = Imputation.Statistics.ZeroImpute(incomp_data) algo.impute() algo.score(ts_1.data) result = np.array([[1, 2, 0], [4, 0, 6]]) - imputation, _ = algo.imputed_matrix, algo.metrics + recov_data, _ = algo.recov_data, algo.metrics - assert np.all(np.isclose(imputation, result)), f"imputation: expected {result}, got {imputation}" + assert np.all(np.isclose(recov_data, result)), f"imputation: expected {result}, got {recov_data}" def test_imputation_mean(self): """ @@ -51,14 +51,14 @@ def test_imputation_mean(self): ts_1 = TimeSeries() ts_1.import_matrix(np.array([[4, 2, 1], [4, 2, 6]])) - contamination = np.array([[4, 2, np.nan], [4, np.nan, 6]]) + incomp_data = np.array([[4, 2, np.nan], [4, np.nan, 6]]) - algo = Imputation.Statistics.MeanImpute(contamination) + algo = Imputation.Statistics.MeanImpute(incomp_data) algo.impute() algo.score(ts_1.data) result = np.array([[4, 2, 4], [4, 4, 6]]) - imputation, _ = algo.imputed_matrix, algo.metrics + recov_data, _ = algo.recov_data, algo.metrics - assert np.all(np.isclose(imputation, result)), f"imputation: expected {result}, got {imputation}" + assert np.all(np.isclose(recov_data, result)), f"imputation: expected {result}, got {recov_data}" diff --git a/tests/test_imputation_cdrec.py b/tests/test_imputation_cdrec.py index 100055f..6cc4fbd 100644 --- a/tests/test_imputation_cdrec.py +++ b/tests/test_imputation_cdrec.py @@ -13,13 +13,13 @@ def test_imputation_cdrec(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("test")) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) - algo = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo = Imputation.MatrixCompletion.CDRec(incomp_data) algo.impute() algo.score(ts_1.data) - _, metrics = algo.imputed_matrix, algo.metrics + _, metrics = algo.recov_data, algo.metrics expected_metrics = { "RMSE": 0.5993259196563864, @@ -42,13 +42,13 @@ def test_imputation_cdrec_chlorine(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("chlorine"), max_values=200) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=10, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=10, offset=0.1, seed=True) - algo = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo = Imputation.MatrixCompletion.CDRec(incomp_data) algo.impute() algo.score(ts_1.data) - _, metrics = algo.imputed_matrix, algo.metrics + _, metrics = algo.recov_data, algo.metrics expected_metrics = { "RMSE": 0.10329523970909142, diff --git a/tests/test_imputation_iim.py b/tests/test_imputation_iim.py index 2529627..b8979a2 100644 --- a/tests/test_imputation_iim.py +++ b/tests/test_imputation_iim.py @@ -15,14 +15,14 @@ def test_imputation_iim_chlorine(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("chlorine"), max_values=200) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=10, - protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=10, + offset=0.1, seed=True) - algo = Imputation.Statistics.IIM(infected_matrix) + algo = Imputation.Statistics.IIM(incomp_data) algo.impute() algo.score(ts_1.data) - _, metrics = algo.imputed_matrix, algo.metrics + _, metrics = algo.recov_data, algo.metrics expected_metrics = { "RMSE": 0.18572496326764323, diff --git a/tests/test_imputation_mrnn.py b/tests/test_imputation_mrnn.py index 3d01fa9..6513ff4 100644 --- a/tests/test_imputation_mrnn.py +++ b/tests/test_imputation_mrnn.py @@ -13,14 +13,14 @@ def test_imputation_mrnn_chlorine(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("chlorine"), max_values=200) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=10, - protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=10, + offset=0.1, seed=True) - algo = Imputation.DeepLearning.MRNN(infected_matrix) + algo = Imputation.DeepLearning.MRNN(incomp_data) algo.impute() algo.score(ts_1.data) - _, metrics = algo.imputed_matrix, algo.metrics + _, metrics = algo.recov_data, algo.metrics expected_metrics = { "RMSE": 0.24304439492433505, diff --git a/tests/test_imputation_stmvl.py b/tests/test_imputation_stmvl.py index dd2779d..4af2366 100644 --- a/tests/test_imputation_stmvl.py +++ b/tests/test_imputation_stmvl.py @@ -12,13 +12,13 @@ def test_imputation_stmvl_chlorine(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path("chlorine"), max_values=200) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=10, - protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=10, + offset=0.1, seed=True) - algo = Imputation.PatternSearch.STMVL(infected_matrix) + algo = Imputation.PatternSearch.STMVL(incomp_data) algo.impute() algo.score(ts_1.data) - _, metrics = algo.imputed_matrix, algo.metrics + _, metrics = algo.recov_data, algo.metrics expected_metrics = { "RMSE": 0.05795429338869703, diff --git a/tests/test_loading.py b/tests/test_loading.py index cc2deaf..edaf9c3 100644 --- a/tests/test_loading.py +++ b/tests/test_loading.py @@ -38,7 +38,7 @@ def test_loading_plot(self): ts_1.load_timeseries(utils.search_path("test")) to_save = "./assets" - file_path = ts_1.plot(raw_data=ts_1.data, title="test", max_series=5, max_values=100, size=(16, 8), save_path=to_save, display=False) + file_path = ts_1.plot(input_data=ts_1.data, max_series=5, max_values=100, size=(16, 8), save_path=to_save, display=False) self.assertTrue(os.path.exists(file_path)) diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_CORRELATION.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_CORRELATION.jpg new file mode 100644 index 0000000..6508c92 Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_CORRELATION.jpg differ diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_MAE.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_MAE.jpg new file mode 100644 index 0000000..7b03c0c Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_MAE.jpg differ diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_MI.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_MI.jpg new file mode 100644 index 0000000..186f6d6 Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_MI.jpg differ diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_RMSE.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_RMSE.jpg new file mode 100644 index 0000000..3c015c6 Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_RMSE.jpg differ diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_imputation_time.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_imputation_time.jpg new file mode 100644 index 0000000..9d857c4 Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_imputation_time.jpg differ diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_log_imputation.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_log_imputation.jpg new file mode 100644 index 0000000..423f26b Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_log_imputation.jpg differ diff --git a/tests/test_naterq/run_0/eegalcohol_mcar_metrics_subplot.jpg b/tests/test_naterq/run_0/eegalcohol_mcar_metrics_subplot.jpg new file mode 100644 index 0000000..c5db5d7 Binary files /dev/null and b/tests/test_naterq/run_0/eegalcohol_mcar_metrics_subplot.jpg differ diff --git a/tests/test_naterq/run_0/report_eeg-alcohol.txt b/tests/test_naterq/run_0/report_eeg-alcohol.txt new file mode 100644 index 0000000..98a81eb --- /dev/null +++ b/tests/test_naterq/run_0/report_eeg-alcohol.txt @@ -0,0 +1,83 @@ +Report for Dataset: eeg-alcohol +Generated on: 2025-01-07 16:56:09 +Run number: 0 +======================================================================================================================== + +RMSE: Root Mean Square Error - Measures the average magnitude of error. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | RMSE | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 1.1073947986 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.8569349077 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.9609255265 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 1.0184989121 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.9997401940 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.9895691678 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.2765860051 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 0.2322153312 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.2179628330 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.2852656711 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.3360171448 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.5558362531 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +MAE: Mean Absolute Error - Measures the average absolute error. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | MAE | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.9036474830 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.6416542360 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.7560138355 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 0.8150966718 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.7985721719 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.7901674118 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.2020444480 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 0.1729082341 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.1625581157 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.1957738066 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.2318468642 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.3744634603 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +MI: Mutual Information - Indicates dependency between variables. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | MI | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.0000000000 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 1.6287285826 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 1.1990748752 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 1.1847242800 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 1.0148282079 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.8789374924 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.5772409317 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +CORRELATION: Correlation Coefficient - Indicates linear relationship between variables. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | CORRELATION | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.0000000000 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.9837210172 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 0.9640732994 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.9737521039 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.9594852424 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.9418882414 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.8478935496 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +Dictionary of Results: +{'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.00043654441833496094, 'optimization': 0, 'imputation': 0.0001857280731201172, 'log_imputation': -8.591226926448048}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0012257099151611328, 'optimization': 0, 'imputation': 0.00018358230590820312, 'log_imputation': -8.602847457471068}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0014116764068603516, 'optimization': 0, 'imputation': 0.0003268718719482422, 'log_imputation': -8.025942292756483}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.007466316223144531, 'optimization': 0, 'imputation': 0.00021529197692871094, 'log_imputation': -8.44351541890181}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.021721839904785156, 'optimization': 0, 'imputation': 0.0002415180206298828, 'log_imputation': -8.328566468070113}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.07830667495727539, 'optimization': 0, 'imputation': 0.0003383159637451172, 'log_imputation': -7.991530295158754}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.00037598609924316406, 'optimization': 41.457383155822754, 'imputation': 1.5736122131347656, 'log_imputation': 0.45337374933278035}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.013141393661499023, 'optimization': 41.457383155822754, 'imputation': 1.9010391235351562, 'log_imputation': 0.6424006437969658}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.024869680404663086, 'optimization': 41.457383155822754, 'imputation': 3.6323060989379883, 'log_imputation': 1.289867735529784}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.011472463607788086, 'optimization': 41.457383155822754, 'imputation': 5.265213966369629, 'log_imputation': 1.6611217841466255}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.03936338424682617, 'optimization': 41.457383155822754, 'imputation': 12.350928544998169, 'log_imputation': 2.5137312460774845}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.17351055145263672, 'optimization': 41.457383155822754, 'imputation': 69.20163869857788, 'log_imputation': 4.23702454295853}}}}}}} diff --git a/tests/test_naterq/run_0/report_eeg-alcohol.xlsx b/tests/test_naterq/run_0/report_eeg-alcohol.xlsx new file mode 100644 index 0000000..aa67db2 Binary files /dev/null and b/tests/test_naterq/run_0/report_eeg-alcohol.xlsx differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_CORRELATION.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_CORRELATION.jpg new file mode 100644 index 0000000..6508c92 Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_CORRELATION.jpg differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_MAE.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_MAE.jpg new file mode 100644 index 0000000..7b03c0c Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_MAE.jpg differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_MI.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_MI.jpg new file mode 100644 index 0000000..186f6d6 Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_MI.jpg differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_RMSE.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_RMSE.jpg new file mode 100644 index 0000000..3c015c6 Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_RMSE.jpg differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_imputation_time.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_imputation_time.jpg new file mode 100644 index 0000000..cde1774 Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_imputation_time.jpg differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_log_imputation.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_log_imputation.jpg new file mode 100644 index 0000000..a4130bb Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_log_imputation.jpg differ diff --git a/tests/test_naterq/run_1/eegalcohol_mcar_metrics_subplot.jpg b/tests/test_naterq/run_1/eegalcohol_mcar_metrics_subplot.jpg new file mode 100644 index 0000000..24d4604 Binary files /dev/null and b/tests/test_naterq/run_1/eegalcohol_mcar_metrics_subplot.jpg differ diff --git a/tests/test_naterq/run_1/report_eeg-alcohol.txt b/tests/test_naterq/run_1/report_eeg-alcohol.txt new file mode 100644 index 0000000..47d1719 --- /dev/null +++ b/tests/test_naterq/run_1/report_eeg-alcohol.txt @@ -0,0 +1,83 @@ +Report for Dataset: eeg-alcohol +Generated on: 2025-01-07 16:57:37 +Run number: 1 +======================================================================================================================== + +RMSE: Root Mean Square Error - Measures the average magnitude of error. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | RMSE | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 1.1073947986 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.8569349077 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.9609255265 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 1.0184989121 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.9997401940 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.9895691678 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.2765860051 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 0.2322153312 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.2179628330 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.2852656711 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.3360171448 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.5558362531 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +MAE: Mean Absolute Error - Measures the average absolute error. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | MAE | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.9036474830 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.6416542360 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.7560138355 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 0.8150966718 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.7985721719 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.7901674118 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.2020444480 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 0.1729082341 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.1625581157 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.1957738066 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.2318468642 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.3744634603 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +MI: Mutual Information - Indicates dependency between variables. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | MI | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.0000000000 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 1.6287285826 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 1.1990748752 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 1.1847242800 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 1.0148282079 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.8789374924 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.5772409317 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +CORRELATION: Correlation Coefficient - Indicates linear relationship between variables. + ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| Dataset | Algorithm | Optimizer | Pattern | X Value | CORRELATION | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ +| eegalcohol | mcar | mean | bayesian | 0.05 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.1 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.2 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.4 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.6 | 0.0000000000 | +| eegalcohol | mcar | mean | bayesian | 0.8 | 0.0000000000 | +| eegalcohol | mcar | cdrec | bayesian | 0.05 | 0.9837210172 | +| eegalcohol | mcar | cdrec | bayesian | 0.1 | 0.9640732994 | +| eegalcohol | mcar | cdrec | bayesian | 0.2 | 0.9737521039 | +| eegalcohol | mcar | cdrec | bayesian | 0.4 | 0.9594852424 | +| eegalcohol | mcar | cdrec | bayesian | 0.6 | 0.9418882414 | +| eegalcohol | mcar | cdrec | bayesian | 0.8 | 0.8478935496 | ++-----------------+-----------------+-----------------+-----------------+--------------+---------------------------+ + +Dictionary of Results: +{'eegalcohol': {'mcar': {'mean': {'bayesian': {'0.05': {'scores': {'RMSE': 1.107394798606378, 'MAE': 0.9036474830477748, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0007011890411376953, 'optimization': 0, 'imputation': 0.0003256797790527344, 'log_imputation': -8.02959593218806}}, '0.1': {'scores': {'RMSE': 0.8569349076796438, 'MAE': 0.6416542359734557, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017731189727783203, 'optimization': 0, 'imputation': 0.000640869140625, 'log_imputation': -7.352685270675757}}, '0.2': {'scores': {'RMSE': 0.9609255264919324, 'MAE': 0.756013835497571, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0017459392547607422, 'optimization': 0, 'imputation': 0.0002167224884033203, 'log_imputation': -8.436892878141318}}, '0.4': {'scores': {'RMSE': 1.0184989120725458, 'MAE': 0.8150966718352457, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.011270761489868164, 'optimization': 0, 'imputation': 0.00033211708068847656, 'log_imputation': -8.010022998538991}}, '0.6': {'scores': {'RMSE': 0.9997401940199045, 'MAE': 0.7985721718600829, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.0378873348236084, 'optimization': 0, 'imputation': 0.00038123130798339844, 'log_imputation': -7.872104259484843}}, '0.8': {'scores': {'RMSE': 0.9895691678332014, 'MAE': 0.7901674118013952, 'MI': 0.0, 'CORRELATION': 0}, 'times': {'contamination': 0.14658021926879883, 'optimization': 0, 'imputation': 0.00038242340087890625, 'log_imputation': -7.868982183892337}}}}, 'cdrec': {'bayesian': {'0.05': {'scores': {'RMSE': 0.27658600512073456, 'MAE': 0.20204444801773774, 'MI': 1.6287285825717355, 'CORRELATION': 0.9837210171556283}, 'times': {'contamination': 0.00045228004455566406, 'optimization': 23.663841724395752, 'imputation': 0.6373500823974609, 'log_imputation': -0.4504361944718769}}, '0.1': {'scores': {'RMSE': 0.2322153312143858, 'MAE': 0.1729082341483471, 'MI': 1.1990748751673153, 'CORRELATION': 0.9640732993793864}, 'times': {'contamination': 0.0013141632080078125, 'optimization': 23.663841724395752, 'imputation': 1.499657392501831, 'log_imputation': 0.4052366770209908}}, '0.2': {'scores': {'RMSE': 0.21796283300762773, 'MAE': 0.16255811567403466, 'MI': 1.184724280002774, 'CORRELATION': 0.9737521039022545}, 'times': {'contamination': 0.0035202503204345703, 'optimization': 23.663841724395752, 'imputation': 0.7658035755157471, 'log_imputation': -0.2668295709556574}}, '0.4': {'scores': {'RMSE': 0.2852656711446442, 'MAE': 0.19577380664036, 'MI': 1.014828207927502, 'CORRELATION': 0.959485242427464}, 'times': {'contamination': 0.022913455963134766, 'optimization': 23.663841724395752, 'imputation': 6.1727776527404785, 'log_imputation': 1.8201489234435073}}, '0.6': {'scores': {'RMSE': 0.3360171448119046, 'MAE': 0.23184686418998596, 'MI': 0.8789374924043876, 'CORRELATION': 0.9418882413737133}, 'times': {'contamination': 0.06402039527893066, 'optimization': 23.663841724395752, 'imputation': 9.204896926879883, 'log_imputation': 2.2197356171079416}}, '0.8': {'scores': {'RMSE': 0.5558362531202891, 'MAE': 0.37446346030237454, 'MI': 0.5772409317426037, 'CORRELATION': 0.8478935496183876}, 'times': {'contamination': 0.1733553409576416, 'optimization': 23.663841724395752, 'imputation': 41.960779905319214, 'log_imputation': 3.736735370231416}}}}}}} diff --git a/tests/test_naterq/run_1/report_eeg-alcohol.xlsx b/tests/test_naterq/run_1/report_eeg-alcohol.xlsx new file mode 100644 index 0000000..352bd35 Binary files /dev/null and b/tests/test_naterq/run_1/report_eeg-alcohol.xlsx differ diff --git a/tests/test_opti_bayesian_cdrec.py b/tests/test_opti_bayesian_cdrec.py index 9fb1e76..9341cf5 100644 --- a/tests/test_opti_bayesian_cdrec.py +++ b/tests/test_opti_bayesian_cdrec.py @@ -17,24 +17,24 @@ def test_optimization_bayesian_cdrec(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset)) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal_load = utils.load_parameters(query="optimal", algorithm=algorithm, dataset=dataset, optimizer="b") - algo_opti = Imputation.MatrixCompletion.CDRec(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth":ts_1.data, "optimizer":"bayesian", "options":{"n_calls": 2}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.MatrixCompletion.CDRec(incomp_data) + algo_opti.impute(user_def=False, params={"input_data":ts_1.data, "optimizer": "bayesian", "options":{"n_calls": 2}}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_default = Imputation.MatrixCompletion.CDRec(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics - algo_load = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_load = Imputation.MatrixCompletion.CDRec(incomp_data) algo_load.impute(params=params_optimal_load) - algo_load.score(raw_matrix=ts_1.data) + algo_load.score(input_data=ts_1.data) metrics_optimal_load = algo_load.metrics self.assertTrue(metrics_optimal["RMSE"] < metrics_default["RMSE"], f"Expected {metrics_optimal['RMSE']} < {metrics_default['RMSE']} ") diff --git a/tests/test_opti_bayesian_cdrec_eeg.py b/tests/test_opti_bayesian_cdrec_eeg.py index 353810d..1cfd33d 100644 --- a/tests/test_opti_bayesian_cdrec_eeg.py +++ b/tests/test_opti_bayesian_cdrec_eeg.py @@ -17,18 +17,18 @@ def test_optimization_bayesian_cdrec_eeg(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset), header=True) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) - algo_opti = Imputation.MatrixCompletion.CDRec(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth":ts_1.data, "optimizer":"bayesian", "options":{"n_calls": 8}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.MatrixCompletion.CDRec(incomp_data) + algo_opti.impute(user_def=False, params={"input_data":ts_1.data, "optimizer": "bayesian", "options":{"n_calls": 8}}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_default = Imputation.MatrixCompletion.CDRec(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics self.assertTrue(metrics_optimal["RMSE"] < metrics_default["RMSE"], f"Expected {metrics_optimal['RMSE']} < {metrics_default['RMSE']} ") diff --git a/tests/test_opti_bayesian_iim.py b/tests/test_opti_bayesian_iim.py index f80d1ac..d873828 100644 --- a/tests/test_opti_bayesian_iim.py +++ b/tests/test_opti_bayesian_iim.py @@ -18,26 +18,26 @@ def test_optimization_bayesian_iim(self): ts_1.load_timeseries(data=utils.search_path(), max_values=100) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, - protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, + offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal_load = utils.load_parameters(query="optimal", algorithm=algorithm, dataset=dataset, optimizer="b") - algo_opti = Imputation.Statistics.IIM(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.Statistics.IIM(incomp_data) + algo_opti.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.Statistics.IIM(infected_matrix) + algo_default = Imputation.Statistics.IIM(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics - algo_load = Imputation.Statistics.IIM(infected_matrix) + algo_load = Imputation.Statistics.IIM(incomp_data) algo_load.impute(params=params_optimal_load) - algo_load.score(raw_matrix=ts_1.data) + algo_load.score(input_data=ts_1.data) metrics_optimal_load = algo_load.metrics self.assertTrue(abs(metrics_optimal["RMSE"] - metrics_default["RMSE"]) < 0.1, f"Expected {metrics_optimal['RMSE']} - {metrics_default['RMSE']} < 0.1") diff --git a/tests/test_opti_bayesian_mrnn.py b/tests/test_opti_bayesian_mrnn.py index 593bd91..025aacb 100644 --- a/tests/test_opti_bayesian_mrnn.py +++ b/tests/test_opti_bayesian_mrnn.py @@ -17,24 +17,24 @@ def test_optimization_bayesian_mrnn(self): ts_1.load_timeseries(data=utils.search_path(dataset), max_values=200) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal_load = utils.load_parameters(query="optimal", algorithm=algorithm, dataset=dataset, optimizer="b") - algo_opti = Imputation.DeepLearning.MRNN(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) + algo_opti = Imputation.DeepLearning.MRNN(incomp_data) + algo_opti.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.DeepLearning.MRNN(infected_matrix) + algo_default = Imputation.DeepLearning.MRNN(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics - algo_load = Imputation.PatternSearch.STMVL(infected_matrix) + algo_load = Imputation.PatternSearch.STMVL(incomp_data) algo_load.impute(params=params_optimal_load) - algo_load.score(raw_matrix=ts_1.data) + algo_load.score(input_data=ts_1.data) self.assertTrue(abs(metrics_optimal["RMSE"] - metrics_default["RMSE"]) < 0.1, f"Expected {metrics_optimal['RMSE']} > {metrics_default['RMSE']}") \ No newline at end of file diff --git a/tests/test_opti_bayesian_stmvl.py b/tests/test_opti_bayesian_stmvl.py index 1f4b8a7..0a9e14b 100644 --- a/tests/test_opti_bayesian_stmvl.py +++ b/tests/test_opti_bayesian_stmvl.py @@ -19,25 +19,25 @@ def test_optimization_bayesian_stmvl(self): ts_1.load_timeseries(utils.search_path(dataset), max_values=200) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, - protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, + offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal_load = utils.load_parameters(query="optimal", algorithm=algorithm, dataset=dataset, optimizer="b") - algo_opti = Imputation.PatternSearch.STMVL(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.PatternSearch.STMVL(incomp_data) + algo_opti.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": {"n_calls": 2}}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.PatternSearch.STMVL(infected_matrix) + algo_default = Imputation.PatternSearch.STMVL(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics - algo_load = Imputation.PatternSearch.STMVL(infected_matrix) + algo_load = Imputation.PatternSearch.STMVL(incomp_data) algo_load.impute(params=params_optimal_load) - algo_load.score(raw_matrix=ts_1.data) + algo_load.score(input_data=ts_1.data) metrics_optimal_load = algo_load.metrics self.assertTrue(abs(metrics_optimal["RMSE"] - metrics_default["RMSE"]) < 0.1, f"Expected {metrics_optimal['RMSE']} - {metrics_default['RMSE']} < 0.1") diff --git a/tests/test_opti_greedy_cdrec.py b/tests/test_opti_greedy_cdrec.py index db274bc..a1a9030 100644 --- a/tests/test_opti_greedy_cdrec.py +++ b/tests/test_opti_greedy_cdrec.py @@ -17,24 +17,24 @@ def test_optimization_greedy_cdrec(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset), max_series=50, max_values=100) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) params_optimal_load = utils.load_parameters(query="optimal", algorithm=algorithm, dataset=dataset, optimizer="g") - algo_opti = Imputation.MatrixCompletion.CDRec(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "greedy"}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.MatrixCompletion.CDRec(incomp_data) + algo_opti.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "greedy"}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_default = Imputation.MatrixCompletion.CDRec(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics - algo_load = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_load = Imputation.MatrixCompletion.CDRec(incomp_data) algo_load.impute(params=params_optimal_load) - algo_load.score(raw_matrix=ts_1.data) + algo_load.score(input_data=ts_1.data) metrics_optimal_load = algo_load.metrics self.assertTrue(metrics_optimal["RMSE"] < metrics_default["RMSE"], f"Expected {metrics_optimal['RMSE']} < {metrics_default['RMSE']} ") diff --git a/tests/test_opti_pso_cdrec.py b/tests/test_opti_pso_cdrec.py index cecd3ed..582459f 100644 --- a/tests/test_opti_pso_cdrec.py +++ b/tests/test_opti_pso_cdrec.py @@ -17,18 +17,18 @@ def test_optimization_pso_cdrec(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset), max_series=50, max_values=100) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) - algo_opti = Imputation.MatrixCompletion.CDRec(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "pso", "options": {"n_particles": 2}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.MatrixCompletion.CDRec(incomp_data) + algo_opti.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "pso", "options": {"n_particles": 2}}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_default = Imputation.MatrixCompletion.CDRec(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics diff --git a/tests/test_opti_sh_cdrec.py b/tests/test_opti_sh_cdrec.py index d8a78fc..d23f766 100644 --- a/tests/test_opti_sh_cdrec.py +++ b/tests/test_opti_sh_cdrec.py @@ -17,18 +17,18 @@ def test_optimization_sh_cdrec(self): ts_1 = TimeSeries() ts_1.load_timeseries(utils.search_path(dataset), max_series=50, max_values=100) - infected_matrix = ts_1.Contaminate.mcar(ts=ts_1.data, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42) + incomp_data = ts_1.Contamination.mcar(input_data=ts_1.data, series_rate=0.4, missing_rate=0.4, block_size=2, offset=0.1, seed=True) params = utils.load_parameters(query="default", algorithm=algorithm) - algo_opti = Imputation.MatrixCompletion.CDRec(infected_matrix) - algo_opti.impute(user_defined=False, params={"ground_truth": ts_1.data, "optimizer": "sh", "options": {"num_configs": 2}}) - algo_opti.score(raw_matrix=ts_1.data) + algo_opti = Imputation.MatrixCompletion.CDRec(incomp_data) + algo_opti.impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "sh", "options": {"num_configs": 2}}) + algo_opti.score(input_data=ts_1.data) metrics_optimal = algo_opti.metrics - algo_default = Imputation.MatrixCompletion.CDRec(infected_matrix) + algo_default = Imputation.MatrixCompletion.CDRec(incomp_data) algo_default.impute(params=params) - algo_default.score(raw_matrix=ts_1.data) + algo_default.score(input_data=ts_1.data) metrics_default = algo_default.metrics diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..c0f5719 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,52 @@ +import os +import unittest +import numpy as np +from imputegap.tools import utils + +from imputegap.recovery.imputation import Imputation +from imputegap.recovery.manager import TimeSeries +from imputegap.recovery.explainer import Explainer +from imputegap.recovery.benchmark import Benchmark + + +class TestPipeline(unittest.TestCase): + + def test_pipeline(self): + """ + Verify if the manager of a dataset is working + """ + x = False + + # automl + ts_1 = TimeSeries() + ts_1.load_timeseries(utils.search_path("eeg-alcohol")) + ts_1.normalize(normalizer="min_max") + incomp_data = ts_1.Contamination.mcar(ts_1.data) + + cdrec = Imputation.MatrixCompletion.CDRec(incomp_data).impute() + cdrec.score(ts_1.data, cdrec.recov_data) + cdrec = Imputation.MatrixCompletion.CDRec(incomp_data).impute(user_def=False, params={"input_data": ts_1.data, "optimizer": "bayesian", "options": { "n_calls": 3}}) + cdrec.score(ts_1.data, cdrec.recov_data) + ts_1.print_results(cdrec.metrics) + utils.save_optimization(optimal_params=cdrec.parameters, algorithm="cdrec", dataset="eeg", optimizer="t") + + # explainer + ts_1 = TimeSeries() + ts_1.load_timeseries(utils.search_path("chlorine")) + shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, missing_rate=0.25, limit_ratio=0.4, split_ratio=0.6, file_name="chlorine", algorithm="cdrec") + Explainer.print(shap_values, shap_details) + + # benchmark + dataset_test = ["eeg-alcohol"] + opti_bayesian = {"optimizer": "bayesian", "options": {"n_calls": 3, "n_random_starts": 50, "acq_func": "gp_hedge", "metrics": "RMSE"}} + optimizers = [opti_bayesian] + algorithms_test = ["mean", "cdrec"] + patterns_small = ["mcar"] + x_axis = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8] + Benchmark().eval(algorithms=algorithms_test, datasets=dataset_test, patterns=patterns_small, x_axis=x_axis, optimizers=optimizers, save_dir="test_naterq", runs=2) + + x = not x + self.assertTrue(x) + + +