1a PM - alpha correction

eXascaleInfolab · Jan 7, 2025 · e9311ca · e9311ca
1 parent 4802825
commit e9311ca
Show file tree

Hide file tree

Showing 212 changed files with 4,668 additions and 1,406 deletions.
diff --git a/.github/workflows/pytest_pipeline.yml b/.github/workflows/pytest_pipeline.yml
@@ -0,0 +1,35 @@
+name: Pytest - ImputeGAP - Pipeline - 12
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libmlpack-dev
+        sudo apt-get install libopenblas-dev
+        sudo apt-get install python3-dev build-essential
+        pip install --upgrade google protobuf
+        pip install -r requirements.txt
+        pip install mypy
+        pip install pytest
+
+
+    - name: Run pytest
+      run: |
+        python -m pytest ./tests/test_pipeline.py
diff --git a/.github/workflows/pytest_pipeline_10.yml b/.github/workflows/pytest_pipeline_10.yml
@@ -0,0 +1,35 @@
+name: Pytest - ImputeGAP - Pipeline - 10
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libmlpack-dev
+        sudo apt-get install libopenblas-dev
+        sudo apt-get install python3-dev build-essential
+        pip install --upgrade google protobuf
+        pip install -r requirements.txt
+        pip install mypy
+        pip install pytest
+
+
+    - name: Run pytest
+      run: |
+        python -m pytest ./tests/test_pipeline.py
diff --git a/.github/workflows/pytest_pipeline_12_6.yml b/.github/workflows/pytest_pipeline_12_6.yml
@@ -0,0 +1,35 @@
+name: Pytest - ImputeGAP - Pipeline - 12.6
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12.6'
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libmlpack-dev
+        sudo apt-get install libopenblas-dev
+        sudo apt-get install python3-dev build-essential
+        pip install --upgrade google protobuf
+        pip install -r requirements.txt
+        pip install mypy
+        pip install pytest
+
+
+    - name: Run pytest
+      run: |
+        python -m pytest ./tests/test_pipeline.py
diff --git a/.github/workflows/pytest_pipeline_12_8.yml b/.github/workflows/pytest_pipeline_12_8.yml
@@ -0,0 +1,35 @@
+name: Pytest - ImputeGAP - Pipeline - 12.8
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12.6'
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libmlpack-dev
+        sudo apt-get install libopenblas-dev
+        sudo apt-get install python3-dev build-essential
+        pip install --upgrade google protobuf
+        pip install -r requirements.txt
+        pip install mypy
+        pip install pytest
+
+
+    - name: Run pytest
+      run: |
+        python -m pytest ./tests/test_pipeline.py
diff --git a/.github/workflows/pytest_pipeline_13.yml b/.github/workflows/pytest_pipeline_13.yml
@@ -0,0 +1,35 @@
+name: Pytest - ImputeGAP - Pipeline - 13
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.13'
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install libmlpack-dev
+        sudo apt-get install libopenblas-dev
+        sudo apt-get install python3-dev build-essential
+        pip install --upgrade google protobuf
+        pip install -r requirements.txt
+        pip install mypy
+        pip install pytest
+
+
+    - name: Run pytest
+      run: |
+        python -m pytest ./tests/test_pipeline.py
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/README.md b/README.md
@@ -89,8 +89,8 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"), max_series=5, max_values=
 ts_1.normalize(normalizer="z_score")
 
 # [OPTIONAL] you can plot your raw data / print the information
-ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets")
-ts_1.print(limit=10)
+ts_1.plot(input_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets")
+ts_1.print(limit_series=10)
 
 ```
 
@@ -116,10 +116,10 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
 ts_1.normalize(normalizer="min_max")
 
 # 3. contamination of the data with MCAR scenario
-infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True)
+infected_data = ts_1.Contamination.mcar(ts_1.data, series_rate=0.4, missing_rate=0.2, use_seed=True)
 
 # [OPTIONAL] you can plot your raw data / print the contamination
-ts_1.print(limit=10)
+ts_1.print(limit_series=10)
 ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./imputegap/assets")
 ```
 
@@ -146,7 +146,7 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
 ts_1.normalize(normalizer="min_max")
 
 # 3. contamination of the data
-infected_data = ts_1.Contaminate.mcar(ts_1.data)
+infected_data = ts_1.Contamination.mcar(ts_1.data)
 
 # 4. imputation of the contaminated data
 # choice of the algorithm, and their parameters (default, automl, or defined by the user)
@@ -190,20 +190,20 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
 ts_1.normalize(normalizer="min_max")
 
 # 3. contamination of the data
-infected_data = ts_1.Contaminate.mcar(ts_1.data)
+infected_data = ts_1.Contamination.mcar(ts_1.data)
 
 # 4. imputation of the contaminated data
 # imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm
-cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data,
-                                                                                            "optimizer": "bayesian",
-                                                                                            "options": {"n_calls": 5}})
+cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_def=False, params={"ground_truth": ts_1.data,
+                                                                                        "optimizer": "bayesian",
+                                                                                        "options": {"n_calls": 5}})
 
 # 5. score the imputation with the raw_data
-cdrec.score(ts_1.data, cdrec.imputed_matrix)
+cdrec.score(ts_1.data, cdrec.recov_data)
 
 # 6. [OPTIONAL] display the results
 ts_1.print_results(cdrec.metrics)
-ts_1.plot(raw_data=ts_1.data, infected_data=infected_data, imputed_data=cdrec.imputed_matrix, title="imputation",
+ts_1.plot(input_data=ts_1.data, incomp_data=infected_data, imputed_data=cdrec.recov_data, title="imputation",
           max_series=1, save_path="./imputegap/assets", display=True)
 
 # 7. [OPTIONAL] save hyperparameters
@@ -234,7 +234,7 @@ ts_1 = TimeSeries()
 ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
 
 # 3. call the explanation of your dataset with a specific algorithm to gain insight on the Imputation results
-shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec")
+shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec")
 
 # [OPTIONAL] print the results with the impact of each feature.
 Explainer.print(shap_values, shap_details)

diff --git a/build/lib/imputegap/algorithms/cdrec.py b/build/lib/imputegap/algorithms/cdrec.py
@@ -101,13 +101,13 @@ def native_cdrec(__py_matrix, __py_rank, __py_epsilon, __py_iterations):
     return __py_imputed_matrix;
 
 
-def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
+def cdrec(incomp_data, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
     """
     CDRec algorithm for matrix imputation of missing values using Centroid Decomposition.
 
     Parameters
     ----------
-    contamination : numpy.ndarray
+    incomp_data : numpy.ndarray
         The input matrix with contamination (missing values represented as NaNs).
     truncation_rank : int
         The truncation rank for matrix decomposition (must be greater than 1 and smaller than the number of series).
@@ -127,18 +127,18 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_pa
 
     Example
     -------
-    >>> imputed_data = cdrec(contamination=contamination_matrix, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True)
-    >>> print(imputed_data)
+    >>> recov_data = cdrec(incomp_data=incomp_data, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True)
+    >>> print(recov_data)
 
     """
     start_time = time.time()  # Record start time
 
     # Call the C++ function to perform recovery
-    imputed_matrix = native_cdrec(contamination, truncation_rank, epsilon, iterations)
+    recov_data = native_cdrec(incomp_data, truncation_rank, epsilon, iterations)
 
     end_time = time.time()
 
     if logs:
         print(f"\n\t\t> logs, imputation cdrec - Execution Time: {(end_time - start_time):.4f} seconds\n")
 
-    return imputed_matrix
+    return recov_data
diff --git a/build/lib/imputegap/algorithms/cpp_integration.py b/build/lib/imputegap/algorithms/cpp_integration.py
@@ -119,11 +119,11 @@ def your_algo(contamination, param, logs=True):
     start_time = time.time()  # Record start time
 
     # Call the C++ function to perform recovery
-    imputed_matrix = native_algo(contamination, param)
+    recov_data = native_algo(contamination, param)
 
     end_time = time.time()
 
     if logs:
         print(f"\n\t\t> logs, imputation algo - Execution Time: {(end_time - start_time):.4f} seconds\n")
 
-    return imputed_matrix
+    return recov_data
diff --git a/build/lib/imputegap/algorithms/iim.py b/build/lib/imputegap/algorithms/iim.py
@@ -2,13 +2,13 @@
 from imputegap.wrapper.AlgoPython.IIM.testerIIM import impute_with_algorithm
 
 
-def iim(contamination, number_neighbor, algo_code, logs=True):
+def iim(incomp_data, number_neighbor, algo_code, logs=True):
     """
     Perform imputation using the Iterative Imputation Method (IIM) algorithm.
 
     Parameters
     ----------
-    contamination : numpy.ndarray
+    incomp_data : numpy.ndarray
         The input matrix with contamination (missing values represented as NaNs).
     number_neighbor : int
         The number of neighbors to use for the K-Nearest Neighbors (KNN) classifier (default is 10).
@@ -31,8 +31,8 @@ def iim(contamination, number_neighbor, algo_code, logs=True):
 
     Example
     -------
-    >>> imputed_data = iim(contamination_matrix, number_neighbor=10, algo_code="iim 2")
-    >>> print(imputed_data)
+    >>> recov_data = iim(incomp_data, number_neighbor=10, algo_code="iim 2")
+    >>> print(recov_data)
 
     References
     ----------
@@ -41,10 +41,10 @@ def iim(contamination, number_neighbor, algo_code, logs=True):
     """
     start_time = time.time()  # Record start time
 
-    imputed_matrix = impute_with_algorithm(algo_code, contamination.copy(), number_neighbor)
+    recov_data = impute_with_algorithm(algo_code, incomp_data.copy(), number_neighbor)
 
     end_time = time.time()
     if logs:
         print(f"\n\t\t> logs, imputation iim - Execution Time: {(end_time - start_time):.4f} seconds\n")
 
-    return imputed_matrix
+    return recov_data
diff --git a/build/lib/imputegap/algorithms/mean_impute.py b/build/lib/imputegap/algorithms/mean_impute.py
@@ -1,13 +1,13 @@
 import numpy as np
 
 
-def mean_impute(contamination, params=None):
+def mean_impute(incomp_data, params=None):
     """
     Impute NaN values with the mean value of the time series.
 
     Parameters
     ----------
-    contamination : numpy.ndarray
+    incomp_data : numpy.ndarray
         The input time series with contamination (missing values represented as NaNs).
     params : dict, optional
         Optional parameters for the algorithm. If None, the minimum value from the contamination is used (default is None).
@@ -24,18 +24,18 @@ def mean_impute(contamination, params=None):
 
     Example
     -------
-    >>> contamination = np.array([[5, 2, np.nan], [3, np.nan, 6]])
-    >>> imputed_matrix = mean_impute(contamination)
-    >>> print(imputed_matrix)
+    >>> incomp_data = np.array([[5, 2, np.nan], [3, np.nan, 6]])
+    >>> recov_data = mean_impute(incomp_data)
+    >>> print(recov_data)
     array([[5., 2., 4.],
            [3., 4., 6.]])
 
     """
 
     # logic
-    mean_value = np.nanmean(contamination)
+    mean_value = np.nanmean(incomp_data)
 
     # Imputation
-    imputed_matrix = np.nan_to_num(contamination, nan=mean_value)
+    recov_data = np.nan_to_num(incomp_data, nan=mean_value)
 
-    return imputed_matrix
+    return recov_data