04mar hacking session

gitcoindao · Mar 4, 2021 · 506018e · 506018e
1 parent 77ae127
commit 506018e
Show file tree

Hide file tree

Showing 4 changed files with 3,357 additions and 6,650 deletions.
diff --git a/attack_vector_ab_test.ipynb b/attack_vector_ab_test.ipynb
diff --git a/prepare_data.py b/prepare_data.py
@@ -6,12 +6,15 @@
 from cape_privacy.pandas import transformations as tfms
 
 
-def parse_grants_data(input_path: str, output_csv_path: str=None) -> pd.DataFrame:
+def parse_contributions_data(input_path: str, output_csv_path: str=None) -> pd.DataFrame:
     """
     Clean the Gitcoin Rounds data for privacy and 
     ease of the use in the simulation.
     """
-    raw_df = pd.read_csv(input_path)
+    if '.json' in input_path:
+        raw_df = pd.read_json(input_path)
+    else:
+        raw_df = pd.read_csv(input_path)
 
     # Parse the normalized data strings into dictionaries
     json_data: dict = raw_df.normalized_data.map(json.loads)
@@ -47,7 +50,8 @@ def parse_grants_data(input_path: str, output_csv_path: str=None) -> pd.DataFram
     sorted_df = df.sort_values('created_on')
 
     # Columns which are to keep into the dynamical network
-    event_property_map = {'originated_address': 'contributor',
+    event_property_map = {'created_on': 'created_on',
+                          'originated_address': 'contributor',
                           'title': 'grant',
                           'amount_per_period_usdt': 'amount'}
 
@@ -77,13 +81,13 @@ def main(src, dst):
     if src is None or dst is None:
         print("Paths must be provided in order to continue")
     else:
-        parse_grants_data(src, dst)
+        parse_contributions_data(src, dst)
 
 
 if __name__ == '__main__':
     main()
 # %%
 import json
 path = 'contributions_2021-02-24T16_51_25.595Z.json'
-df = pd.read_json(path)
+parse_contributions_data(path, 'data/2021-02-24-contributions.csv.xz')
 # %%
diff --git a/qf_performance/compare.py b/qf_performance/compare.py
@@ -0,0 +1,85 @@
+
+# Dependences
+
+import torch
+from opt_einsum import contract
+
+import xarray as xr 
+import numpy as np 
+
+
+def generate_data(N_users=5, N_grants=3) -> xr.Dataset:
+    """
+    Generates random data for testing QF algorithms.
+
+    Output:
+    Dataset with {'user', 'grant'} dimensions and
+     {'contribution', 'trust'} variables
+    """
+
+    # Generates names for the users and grants
+    users = [f"u_{i}"
+            for i in range(N_users)]
+
+    grants = [f"g_{i}"
+            for i in range(N_grants)]
+
+    # Generate contribution between users and grants
+    shape = (N_users, N_grants)
+    contrib_data = np.random.randn(*shape)
+    contributions = xr.DataArray(contrib_data, 
+                    coords=[users, grants],
+                    dims=['user', 'grant'])
+    contributions.name ='contribution'
+
+    # Generate user trust vector
+    trust = xr.DataArray(np.random.randn(N_users),
+                        coords=[users],
+                        dims=['user'])
+    trust.name = 'trust'
+
+    # Merge and return
+    ds = xr.merge([contributions, trust])
+    return ds
+
+
+def pairwise_clr_match(contribs: torch.tensor,
+                       trust: torch.tensor,
+                       m: float) -> torch.tensor:
+  """
+  Arguments
+  contribs: array of shape (N_proj, N_user)
+  trust: array of shape (N_user,)
+  m: number
+
+  Output
+  subsidies: array of shape (N_project, )
+  """
+  participant_overlap = contract('up,pv->uv', contribs.t().sqrt(), contribs.sqrt())
+  k = m / (m+participant_overlap)
+  # No self-subsidy
+  k.fill_diagonal_(0)
+
+  # Mysterious term
+  obj_1 = trust.repeat(trust.size()[0],1)
+  obj_2 = trust.repeat(trust.size()[0],1).t()
+  complicated_obj: tuple = (obj_1, obj_2)
+  max_pairwise_trust : float = torch.max(*complicated_obj)
+
+  # To use sparse, we're either going to have to use something like pytaco, which can handle sparse einsum
+  # or else break this down row by row, take the outer product, and sum.
+  subsidies = contract('pu,uv,uv,pv->p',contribs.sqrt(), k, max_pairwise_trust, contribs.sqrt())
+  return subsidies
+
+M = 1
+ALGORITHMS = [pairwise_clr_match]
+
+ds = generate_data()
+
+
+results = {}
+for algo in ALGORITHMS:
+    match_per_grant = algo(ds.contribution, ds.trust, M)
+    name = algo.__name__
+    results[name] = match_per_grant
+
diff --git a/qf_performance_diagnosis.ipynb b/qf_performance_diagnosis.ipynb