Skip to content

Commit

Permalink
correction of optimization and creation of batches
Browse files Browse the repository at this point in the history
  • Loading branch information
qnater committed Sep 30, 2024
1 parent 7937935 commit 9888ea8
Show file tree
Hide file tree
Showing 21 changed files with 46 additions and 31 deletions.
20 changes: 12 additions & 8 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion env/default_values.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ algorithm_code = "iim 2"
[mrnn]
hidden_dim = 10
learning_rate = 0.01
iterations = 1000
iterations = 100
sequence_length = 7

[explainer]
Expand Down
Binary file modified imputegap/assets/imputation/test_imputation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified imputegap/imputation/__pycache__/imputation.cpython-312.pyc
Binary file not shown.
8 changes: 4 additions & 4 deletions imputegap/imputation/imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ def cdrec(ground_truth, contamination, params=None):
:param ground_truth: original time series without contamination
:param contamination: time series with contamination
:param params: [Optional] (truncation_rank, epsilon, iterations) : parameters of the algorithm, if None, default ones are loaded
:param params: [Optional] (rank, epsilon, iterations) : parameters of the algorithm, if None, default ones are loaded
:return: imputed_matrix, metrics : all time series with imputation data and their metrics
"""
if params is not None:
truncation_rank, epsilon, iterations = params
rank, epsilon, iterations = params
else:
truncation_rank, epsilon, iterations = utils.load_parameters(query="default", algorithm="cdrec")
rank, epsilon, iterations = utils.load_parameters(query="default", algorithm="cdrec")

imputed_matrix = cdrec(contamination=contamination, truncation_rank=truncation_rank, iterations=iterations, epsilon=epsilon)
imputed_matrix = cdrec(contamination=contamination, truncation_rank=rank, iterations=iterations, epsilon=epsilon)

metrics = Evaluation(ground_truth, imputed_matrix, contamination).metrics_computation()

Expand Down
Binary file modified imputegap/manager/__pycache__/manager.cpython-312.pyc
Binary file not shown.
Binary file modified imputegap/manager/__pycache__/utils.cpython-312.pyc
Binary file not shown.
13 changes: 6 additions & 7 deletions imputegap/manager/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

class TimeSeries:

def __init__(self, data=None, normalization=None):
def __init__(self, data=None, normalization=None, limitation_values=None):
"""
:param ts : Original time series without alteration (ground-truth)
:param contaminated_ts : time series after contamination
:param imputation : time series after reconstruction of the missing data
:param optimal_params : optimal parameters found for a specific algorithm and time series dataset
:param explainer : result of the explainer algorithm to explain the imputation of the time series dataset
:param limitation_values : limitation of the maximum number of values by series (computation limitation) | default None
"""
self.ts = self.load_timeseries(data, normalization)
self.ts = self.load_timeseries(data, normalization, limitation_values)
self.ts_contaminate = None
self.ts_imputation = None
self.metrics = []
Expand All @@ -26,14 +25,15 @@ def __init__(self, data=None, normalization=None):



def load_timeseries(self, data=None, normalization=None):
def load_timeseries(self, data=None, normalization=None, limitation=None):
"""
Load timeseries manager from file
FORMAT : (Values,Series), values are seperated by space et series by \n
@author Quentin Nater
:param filename: path of the time series dataset
:param normalization : [OPTIONAL] choice of normalization ("z_score" or "min_max")
:param limitation : limitation of the maximum number of values by series (computation limitation) | default None
:return: time series format for imputegap from dataset
"""

Expand All @@ -43,8 +43,7 @@ def load_timeseries(self, data=None, normalization=None):

if isinstance(data, str):
print("\nThe time series has been loaded from " + str(data) + "\n")

ts = np.genfromtxt(data, delimiter=' ')
ts = np.genfromtxt(data, delimiter=' ', max_rows=limitation)

elif isinstance(data, list):
print("\nThe time series has been loaded from code ", *data, "\n")
Expand Down
2 changes: 1 addition & 1 deletion imputegap/manager/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def load_parameters(query: str = "default", algorithm: str = "cdrec", dataset: s
if query == "default":
filepath = "../env/default_values.toml"
elif query == "optimal":
filepath = "../env/optimal_parameters_"+str(optimizer)+"_"+str(dataset)+"_"+str(algorithm)+".toml"
filepath = "../params/optimal_parameters_"+str(optimizer)+"_"+str(dataset)+"_"+str(algorithm)+".toml"
else:
print("Query not found for this function ('optimal' or 'default')")

Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion imputegap/optimization/bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def save_optimization(optimal_params, algorithm="cdrec", dataset="", optimizer="
:param file_name: name of the TOML file to save the results. Default is 'optimization_results.toml'.
"""
if file_name is None:
file_name = "../env/optimal_parameters_" + str(optimizer) + "_" + str(dataset) + "_" + str(algorithm) + ".toml"
file_name = "../params/optimal_parameters_" + str(optimizer) + "_" + str(dataset) + "_" + str(algorithm) + ".toml"

if not os.path.exists(file_name):
file_name = file_name[1:]
Expand Down
18 changes: 15 additions & 3 deletions imputegap/runner_imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from imputegap.manager.manager import TimeSeries
import os

from imputegap.optimization.bayesian_optimization import Optimization


def display_title(title="Master Thesis", aut="Quentin Nater", lib="ImputeGAP", university="University Fribourg - exascale infolab"):
print("=" * 100)
Expand All @@ -25,6 +27,9 @@ def check_block_size(filename):
display_title()

filename = "eeg"
load = False
algo = "cdrec"

file_path = os.path.join("./dataset/", filename + ".txt")
gap = TimeSeries(data=file_path, normalization="z_score")

Expand All @@ -34,13 +39,20 @@ def check_block_size(filename):
gap.plot(title="test", save_path="assets", limitation=0, display=False)

gap.ts_contaminate = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=block_size, protection=0.1, use_seed=True, seed=42)
gap.print()
gap.print(limitation=10)
gap.plot(ts_type="contamination", title="test", save_path="assets", limitation=plot_limit, display=False)

gap.optimal_params = utils.load_parameters(query="optimal", algorithm="cdrec")
if load:
gap.optimal_params = utils.load_parameters(query="optimal", algorithm=algo)
else:
optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts,
contamination=gap.ts_contaminate,
algorithm=algo, n_calls=3)
gap.optimal_params = tuple(optimal_params.values())
print("\nOptical Params : ", gap.optimal_params)

gap.ts_imputation, gap.metrics = Imputation.MR.cdrec(ground_truth=gap.ts, contamination=gap.ts_contaminate, params=gap.optimal_params)
gap.print()
gap.print(limitation=10)
gap.print_results()

gap.plot(ts_type="imputation", title="test", save_path="assets", limitation=plot_limit, display=False)
Expand Down
4 changes: 2 additions & 2 deletions imputegap/runner_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def check_block_size(filename):
gap.plot(title="test", save_path="assets", limitation=6, display=False)

gap.ts_contaminate = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=block_size, protection=0.1, use_seed=True, seed=42)
gap.print()
gap.print(limitation=5)
gap.plot(ts_type="contamination", title="test", save_path="assets", limitation=3, display=False)

for algo in ["cdrec", "stmvl", "iim", "mrnn"]:
print("RUN OPTIMIZATION FOR : ", algo, "... with ", filename, "...")
optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts, contamination=gap.ts_contaminate, algorithm=algo)
optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts, contamination=gap.ts_contaminate, algorithm=algo, n_calls=100)
print("\nOptical Params : ", optimal_params)
print("\nyi : ", yi, "\n")
Optimization.save_optimization(optimal_params=optimal_params, algorithm=algo, dataset=filename, optimizer="b")
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/test_opti_bayesian_cdrec.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_optimization_bayesian_cdrec(self):

ts_contaminated = Contamination.scenario_mcar(ts=gap.ts, series_impacted=0.4, missing_rate=0.4, block_size=2, protection=0.1, use_seed=True, seed=42)

optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts, contamination=ts_contaminated, algorithm=algorithm)
optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts, contamination=ts_contaminated, algorithm=algorithm, n_calls=3)

print("\nOptimization done successfully... ")
print("\n", optimal_params, "\n")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_opti_bayesian_iim.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_optimization_bayesian_iim(self):

optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts,
contamination=ts_contaminated,
algorithm=algorithm)
algorithm=algorithm, n_calls=3)

print("\nOptimization done successfully... ")
print("\n", optimal_params, "\n")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_opti_bayesian_mrnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_optimization_bayesian_mrnn(self):

optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts,
contamination=ts_contaminated,
algorithm=algorithm)
algorithm=algorithm, n_calls=3)

print("\nOptimization done successfully... ")
print("\n", optimal_params, "\n")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_opti_bayesian_stmvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_optimization_bayesian_stmvl(self):

optimal_params, yi = Optimization.Bayesian.bayesian_optimization(ground_truth=gap.ts,
contamination=ts_contaminated,
algorithm=algorithm)
algorithm=algorithm, n_calls=3)

print("\nOptimization done successfully... ")
print("\n", optimal_params, "\n")
Expand Down

0 comments on commit 9888ea8

Please sign in to comment.