From 2ff30a90a15951fdf7383824d6b6d0c7f19adf05 Mon Sep 17 00:00:00 2001 From: mhchia Date: Sun, 15 Sep 2024 18:19:38 +0800 Subject: [PATCH] create_dummy --- tests/helpers.py | 4 ++-- zkstats/core.py | 29 ++++++++++++----------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 966dc02..03f109c 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -4,8 +4,8 @@ import torch -from zkstats.core import prover_gen_settings, setup, prover_gen_proof, verifier_verify, generate_data_commitment, verifier_define_calculation -from zkstats.computation import computation_to_model, TComputation, State, IModel +from zkstats.core import prover_gen_settings, setup, prover_gen_proof, verifier_verify, generate_data_commitment +from zkstats.computation import IModel DEFAULT_POSSIBLE_SCALES = list(range(20)) diff --git a/zkstats/core.py b/zkstats/core.py index dd24b28..083b0fb 100644 --- a/zkstats/core.py +++ b/zkstats/core.py @@ -38,26 +38,21 @@ def verifier_define_calculation( _export_onnx(verifier_model, dummy_data_tensor_array, verifier_model_path) -# TODO: Should only need the shape of data instead of the real dataset, since -# users (verifiers) call this function and they don't have the real data. -def create_dummy(data_path: str, dummy_data_path: str) -> None: +def create_dummy(shape_info: dict[str, int], dummy_data_path: str) -> None: """ - Create a dummy data file with randomized data based on the shape of the original data. - """ - # Convert data file to json under the same directory but with suffix .json - data_path: Path = Path(data_path) - data_json_path = Path(data_path).with_suffix(DataExtension.JSON.value) + Create a dummy data file with randomized data based on the provided shape information. - data = json.loads(open(data_json_path, "r").read()) - # assume all columns have same number of rows - dummy_data ={} - for col in data: - # not use same value for every column to prevent something weird, like singular matrix - min_col = min(data[col]) - max_col = max(data[col]) - dummy_data[col] = np.round(np.random.uniform(min_col,max_col,len(data[col])),1).tolist() + Parameters: + - shape_info (dict): A dictionary where keys are column names and values are the number of elements (shape). + - dummy_data_path (str): The path to save the dummy data file. + """ + dummy_data = {} + for col, length in shape_info.items(): + # Generate random data for each column + dummy_data[col] = np.round(np.random.uniform(0, 100, length), 1).tolist() - json.dump(dummy_data, open(dummy_data_path, 'w')) + with open(dummy_data_path, 'w') as f: + json.dump(dummy_data, f) # =================================================================================================== # ===================================================================================================