From 2ff30a90a15951fdf7383824d6b6d0c7f19adf05 Mon Sep 17 00:00:00 2001
From: mhchia <kevin.mh.chia@gmail.com>
Date: Sun, 15 Sep 2024 18:19:38 +0800
Subject: [PATCH] create_dummy

---
 tests/helpers.py |  4 ++--
 zkstats/core.py  | 29 ++++++++++++-----------------
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/tests/helpers.py b/tests/helpers.py
index 966dc02..03f109c 100644
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -4,8 +4,8 @@
 
 import torch
 
-from zkstats.core import prover_gen_settings, setup, prover_gen_proof, verifier_verify, generate_data_commitment, verifier_define_calculation
-from zkstats.computation import computation_to_model, TComputation, State, IModel
+from zkstats.core import prover_gen_settings, setup, prover_gen_proof, verifier_verify, generate_data_commitment
+from zkstats.computation import IModel
 
 
 DEFAULT_POSSIBLE_SCALES = list(range(20))
diff --git a/zkstats/core.py b/zkstats/core.py
index dd24b28..083b0fb 100644
--- a/zkstats/core.py
+++ b/zkstats/core.py
@@ -38,26 +38,21 @@ def verifier_define_calculation(
   _export_onnx(verifier_model, dummy_data_tensor_array, verifier_model_path)
 
 
-# TODO: Should only need the shape of data instead of the real dataset, since
-# users (verifiers) call this function and they don't have the real data.
-def create_dummy(data_path: str, dummy_data_path: str) -> None:
+def create_dummy(shape_info: dict[str, int], dummy_data_path: str) -> None:
     """
-    Create a dummy data file with randomized data based on the shape of the original data.
-    """
-    # Convert data file to json under the same directory but with suffix .json
-    data_path: Path = Path(data_path)
-    data_json_path = Path(data_path).with_suffix(DataExtension.JSON.value)
+    Create a dummy data file with randomized data based on the provided shape information.
 
-    data = json.loads(open(data_json_path, "r").read())
-    # assume all columns have same number of rows
-    dummy_data ={}
-    for col in data:
-        # not use same value for every column to prevent something weird, like singular matrix
-        min_col = min(data[col])
-        max_col = max(data[col])
-        dummy_data[col] = np.round(np.random.uniform(min_col,max_col,len(data[col])),1).tolist()
+    Parameters:
+    - shape_info (dict): A dictionary where keys are column names and values are the number of elements (shape).
+    - dummy_data_path (str): The path to save the dummy data file.
+    """
+    dummy_data = {}
+    for col, length in shape_info.items():
+        # Generate random data for each column
+        dummy_data[col] = np.round(np.random.uniform(0, 100, length), 1).tolist()
 
-    json.dump(dummy_data, open(dummy_data_path, 'w'))
+    with open(dummy_data_path, 'w') as f:
+        json.dump(dummy_data, f)
 
 # ===================================================================================================
 # ===================================================================================================