Skip to content

Commit

Permalink
1a PM - alpha correction
Browse files Browse the repository at this point in the history
  • Loading branch information
qnater committed Jan 7, 2025
1 parent 4802825 commit e9311ca
Show file tree
Hide file tree
Showing 212 changed files with 4,668 additions and 1,406 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/pytest_pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Pytest - ImputeGAP - Pipeline - 12

on:
push:
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libmlpack-dev
sudo apt-get install libopenblas-dev
sudo apt-get install python3-dev build-essential
pip install --upgrade google protobuf
pip install -r requirements.txt
pip install mypy
pip install pytest
- name: Run pytest
run: |
python -m pytest ./tests/test_pipeline.py
35 changes: 35 additions & 0 deletions .github/workflows/pytest_pipeline_10.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Pytest - ImputeGAP - Pipeline - 10

on:
push:
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libmlpack-dev
sudo apt-get install libopenblas-dev
sudo apt-get install python3-dev build-essential
pip install --upgrade google protobuf
pip install -r requirements.txt
pip install mypy
pip install pytest
- name: Run pytest
run: |
python -m pytest ./tests/test_pipeline.py
35 changes: 35 additions & 0 deletions .github/workflows/pytest_pipeline_12_6.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Pytest - ImputeGAP - Pipeline - 12.6

on:
push:
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12.6'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libmlpack-dev
sudo apt-get install libopenblas-dev
sudo apt-get install python3-dev build-essential
pip install --upgrade google protobuf
pip install -r requirements.txt
pip install mypy
pip install pytest
- name: Run pytest
run: |
python -m pytest ./tests/test_pipeline.py
35 changes: 35 additions & 0 deletions .github/workflows/pytest_pipeline_12_8.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Pytest - ImputeGAP - Pipeline - 12.8

on:
push:
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12.6'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libmlpack-dev
sudo apt-get install libopenblas-dev
sudo apt-get install python3-dev build-essential
pip install --upgrade google protobuf
pip install -r requirements.txt
pip install mypy
pip install pytest
- name: Run pytest
run: |
python -m pytest ./tests/test_pipeline.py
35 changes: 35 additions & 0 deletions .github/workflows/pytest_pipeline_13.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Pytest - ImputeGAP - Pipeline - 13

on:
push:
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.13'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libmlpack-dev
sudo apt-get install libopenblas-dev
sudo apt-get install python3-dev build-essential
pip install --upgrade google protobuf
pip install -r requirements.txt
pip install mypy
pip install pytest
- name: Run pytest
run: |
python -m pytest ./tests/test_pipeline.py
256 changes: 216 additions & 40 deletions .idea/workspace.xml

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"), max_series=5, max_values=
ts_1.normalize(normalizer="z_score")

# [OPTIONAL] you can plot your raw data / print the information
ts_1.plot(raw_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets")
ts_1.print(limit=10)
ts_1.plot(input_data=ts_1.data, title="raw data", max_series=10, max_values=100, save_path="./imputegap/assets")
ts_1.print(limit_series=10)

```

Expand All @@ -116,10 +116,10 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
ts_1.normalize(normalizer="min_max")

# 3. contamination of the data with MCAR scenario
infected_data = ts_1.Contaminate.mcar(ts_1.data, series_impacted=0.4, missing_rate=0.2, use_seed=True)
infected_data = ts_1.Contamination.mcar(ts_1.data, series_rate=0.4, missing_rate=0.2, use_seed=True)

# [OPTIONAL] you can plot your raw data / print the contamination
ts_1.print(limit=10)
ts_1.print(limit_series=10)
ts_1.plot(ts_1.data, infected_data, title="contamination", max_series=1, save_path="./imputegap/assets")
```

Expand All @@ -146,7 +146,7 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
ts_1.normalize(normalizer="min_max")

# 3. contamination of the data
infected_data = ts_1.Contaminate.mcar(ts_1.data)
infected_data = ts_1.Contamination.mcar(ts_1.data)

# 4. imputation of the contaminated data
# choice of the algorithm, and their parameters (default, automl, or defined by the user)
Expand Down Expand Up @@ -190,20 +190,20 @@ ts_1.load_timeseries(utils.search_path("eeg-alcohol"))
ts_1.normalize(normalizer="min_max")

# 3. contamination of the data
infected_data = ts_1.Contaminate.mcar(ts_1.data)
infected_data = ts_1.Contamination.mcar(ts_1.data)

# 4. imputation of the contaminated data
# imputation with AutoML which will discover the optimal hyperparameters for your dataset and your algorithm
cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_defined=False, params={"ground_truth": ts_1.data,
"optimizer": "bayesian",
"options": {"n_calls": 5}})
cdrec = Imputation.MatrixCompletion.CDRec(infected_data).impute(user_def=False, params={"ground_truth": ts_1.data,
"optimizer": "bayesian",
"options": {"n_calls": 5}})

# 5. score the imputation with the raw_data
cdrec.score(ts_1.data, cdrec.imputed_matrix)
cdrec.score(ts_1.data, cdrec.recov_data)

# 6. [OPTIONAL] display the results
ts_1.print_results(cdrec.metrics)
ts_1.plot(raw_data=ts_1.data, infected_data=infected_data, imputed_data=cdrec.imputed_matrix, title="imputation",
ts_1.plot(input_data=ts_1.data, incomp_data=infected_data, imputed_data=cdrec.recov_data, title="imputation",
max_series=1, save_path="./imputegap/assets", display=True)

# 7. [OPTIONAL] save hyperparameters
Expand Down Expand Up @@ -234,7 +234,7 @@ ts_1 = TimeSeries()
ts_1.load_timeseries(utils.search_path("eeg-alcohol"))

# 3. call the explanation of your dataset with a specific algorithm to gain insight on the Imputation results
shap_values, shap_details = Explainer.shap_explainer(raw_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec")
shap_values, shap_details = Explainer.shap_explainer(input_data=ts_1.data, file_name="eeg-alcohol", algorithm="cdrec")

# [OPTIONAL] print the results with the impact of each feature.
Explainer.print(shap_values, shap_details)
Expand Down
12 changes: 6 additions & 6 deletions build/lib/imputegap/algorithms/cdrec.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,13 @@ def native_cdrec(__py_matrix, __py_rank, __py_epsilon, __py_iterations):
return __py_imputed_matrix;


def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
def cdrec(incomp_data, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
"""
CDRec algorithm for matrix imputation of missing values using Centroid Decomposition.
Parameters
----------
contamination : numpy.ndarray
incomp_data : numpy.ndarray
The input matrix with contamination (missing values represented as NaNs).
truncation_rank : int
The truncation rank for matrix decomposition (must be greater than 1 and smaller than the number of series).
Expand All @@ -127,18 +127,18 @@ def cdrec(contamination, truncation_rank, iterations, epsilon, logs=True, lib_pa
Example
-------
>>> imputed_data = cdrec(contamination=contamination_matrix, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True)
>>> print(imputed_data)
>>> recov_data = cdrec(incomp_data=incomp_data, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True)
>>> print(recov_data)
"""
start_time = time.time() # Record start time

# Call the C++ function to perform recovery
imputed_matrix = native_cdrec(contamination, truncation_rank, epsilon, iterations)
recov_data = native_cdrec(incomp_data, truncation_rank, epsilon, iterations)

end_time = time.time()

if logs:
print(f"\n\t\t> logs, imputation cdrec - Execution Time: {(end_time - start_time):.4f} seconds\n")

return imputed_matrix
return recov_data
4 changes: 2 additions & 2 deletions build/lib/imputegap/algorithms/cpp_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@ def your_algo(contamination, param, logs=True):
start_time = time.time() # Record start time

# Call the C++ function to perform recovery
imputed_matrix = native_algo(contamination, param)
recov_data = native_algo(contamination, param)

end_time = time.time()

if logs:
print(f"\n\t\t> logs, imputation algo - Execution Time: {(end_time - start_time):.4f} seconds\n")

return imputed_matrix
return recov_data
12 changes: 6 additions & 6 deletions build/lib/imputegap/algorithms/iim.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
from imputegap.wrapper.AlgoPython.IIM.testerIIM import impute_with_algorithm


def iim(contamination, number_neighbor, algo_code, logs=True):
def iim(incomp_data, number_neighbor, algo_code, logs=True):
"""
Perform imputation using the Iterative Imputation Method (IIM) algorithm.
Parameters
----------
contamination : numpy.ndarray
incomp_data : numpy.ndarray
The input matrix with contamination (missing values represented as NaNs).
number_neighbor : int
The number of neighbors to use for the K-Nearest Neighbors (KNN) classifier (default is 10).
Expand All @@ -31,8 +31,8 @@ def iim(contamination, number_neighbor, algo_code, logs=True):
Example
-------
>>> imputed_data = iim(contamination_matrix, number_neighbor=10, algo_code="iim 2")
>>> print(imputed_data)
>>> recov_data = iim(incomp_data, number_neighbor=10, algo_code="iim 2")
>>> print(recov_data)
References
----------
Expand All @@ -41,10 +41,10 @@ def iim(contamination, number_neighbor, algo_code, logs=True):
"""
start_time = time.time() # Record start time

imputed_matrix = impute_with_algorithm(algo_code, contamination.copy(), number_neighbor)
recov_data = impute_with_algorithm(algo_code, incomp_data.copy(), number_neighbor)

end_time = time.time()
if logs:
print(f"\n\t\t> logs, imputation iim - Execution Time: {(end_time - start_time):.4f} seconds\n")

return imputed_matrix
return recov_data
16 changes: 8 additions & 8 deletions build/lib/imputegap/algorithms/mean_impute.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import numpy as np


def mean_impute(contamination, params=None):
def mean_impute(incomp_data, params=None):
"""
Impute NaN values with the mean value of the time series.
Parameters
----------
contamination : numpy.ndarray
incomp_data : numpy.ndarray
The input time series with contamination (missing values represented as NaNs).
params : dict, optional
Optional parameters for the algorithm. If None, the minimum value from the contamination is used (default is None).
Expand All @@ -24,18 +24,18 @@ def mean_impute(contamination, params=None):
Example
-------
>>> contamination = np.array([[5, 2, np.nan], [3, np.nan, 6]])
>>> imputed_matrix = mean_impute(contamination)
>>> print(imputed_matrix)
>>> incomp_data = np.array([[5, 2, np.nan], [3, np.nan, 6]])
>>> recov_data = mean_impute(incomp_data)
>>> print(recov_data)
array([[5., 2., 4.],
[3., 4., 6.]])
"""

# logic
mean_value = np.nanmean(contamination)
mean_value = np.nanmean(incomp_data)

# Imputation
imputed_matrix = np.nan_to_num(contamination, nan=mean_value)
recov_data = np.nan_to_num(incomp_data, nan=mean_value)

return imputed_matrix
return recov_data
Loading

0 comments on commit e9311ca

Please sign in to comment.