Merge pull request #23 from kuffmode/Release-1.5

optimised for nd array contributions works with Python 3.11 takes care of elderlies on weekends.
kuffmode · Nov 17, 2023 · 787f574 · 787f574
2 parents 0892e87 + 929f272
commit 787f574
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 27 deletions.
diff --git a/docs/README.md b/docs/README.md
@@ -15,7 +15,7 @@ As you probably noticed this won't be feasible to calclulate as for example, it
 
 And our own recent work [Fakhar K, Hilgetag CC. Systematic perturbation of an artificial neural network: A step towards quantifying causal contributions in the brain. PLoS Comput Biol. 2022;18: e1010250. doi:10.1371/journal.pcbi.1010250](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1010250)
 ## Installation:
-The easiest way is to `pip install msapy`, I wrote this package in Python `3.9` and tried it on `3.8` so, it's safe to say you need at least a Python `3.8` so easy-installing it on older versions is not an option. Alternatively you can clone the repository (`git clone https://github.com/kuffmode/msa.git`, move to the `msa` folder (`cd msa`), and run `pip install .` In case the requirements weren't installed automatically you can also use the command `pip install -r requirements.txt` and then let me know so I can fix it!
+The easiest way is to `pip install msapy`, This package is tested on Python `3.8` to Python `3.11`. Other versions might not work. Alternatively you can clone the repository (`git clone https://github.com/kuffmode/msa.git`, move to the `msa` folder (`cd msa`), and run `pip install .` In case the requirements weren't installed automatically you can also use the command `pip install -r requirements.txt` and then let me know so I can fix it!
 ## How it works:
 Here you can see a schematic representation of how the algorithm works (interested in math instead? check the papers above). Briefly, all MSA needs from you is a list of players and a game function. The players can be your nodes, for example, brain regions or indices in a connectivity matrix, or links between them as tuples. It then shuffles them to produce N orderings in which they can join the game. This can end with repeating permutations if the set is small but that's fine don't worry! MSA then produces a "combination space" in which it produces all the combinations the player should form coalitions. Then it uses your game function and fills the contributions of those coalitions. The last step is to perform a Shapley integration and isolate each player's contribution in that given permutation. Repeating this for all permutations produces a contribution table (shapley table) and you'll get your shapley values by averaging over permutations so the end result is a value per element/player. To get a better grasp of how this works in code, check the minimal example in the examples folder.
 

diff --git a/msapy/__init__.py b/msapy/__init__.py
@@ -1,2 +1,2 @@
 from msapy import msa, utils, plottings,checks
-__version__ = "1.4"
+__version__ = "1.5"
diff --git a/msapy/datastructures.py b/msapy/datastructures.py
@@ -54,24 +54,24 @@ def contribution_type(self):
         return "nd"
 
     @classmethod
-    def from_dataframe(cls, shapley_table, shape):
-        num_permutation, num_nodes = shapley_table.shape
-        data = np.stack(shapley_table.values.flatten())
-        mode_size = data.shape[-1]
-        data = data.reshape(num_permutation, num_nodes, -1)
+    def from_ndarray(cls, shapley_table, columns):
+        num_permutation, num_nodes = shapley_table.shape[:2]
+        contrib_shape = shapley_table.shape[2:]
+        data = shapley_table.reshape(num_permutation, num_nodes, -1)
+        mode_size = data.shape[2]
         data = data.transpose((0, 2, 1)).reshape((-1, num_nodes))
 
         shapley_table = pd.DataFrame(data=data,
                                      index=pd.MultiIndex.from_product(
                                          [range(num_permutation), range(mode_size)], names=[None, "mode_size"]),
-                                     columns=shapley_table.columns
+                                     columns=columns
                                      )
         shapley_table.index.names = [None, "ND"]
-        return cls(shapley_table, shape)
+        return cls(shapley_table, contrib_shape)
 
     @property
     def shapley_modes(self):
-        return ShapleyModeND(self.groupby(level=1).mean(), self.shape)
+        return ShapleyModeND(self.groupby(level=1).mean(), self._shape)
 
 
 class ShapleyModeND(pd.DataFrame):

diff --git a/msapy/msa.py b/msapy/msa.py
@@ -396,54 +396,57 @@ def local_efficiency(complements, graph):
     contributions = {tuple(): objective_function(tuple(), **objective_function_params)} if lazy else contributions
 
     contribution_type, arbitrary_contrib = _get_contribution_type(contributions)
+    contrib_shape = arbitrary_contrib.shape if contribution_type == "nd" else []
 
     lesioned = set(lesioned) if lesioned else set()
-    shapley_table = 0 if (contribution_type == 'nd' and not save_permutations) else {}
+    sorted_elements = sorted(permutation_space[0])
+    permutation_space = set(permutation_space)
 
     if not lazy:
-        parent_bar = enumerate(set(permutation_space))
+        parent_bar = enumerate(permutation_space)
     elif (not dual_progress_bars) or mbar:
-        parent_bar = progress_bar(enumerate(set(permutation_space)), total=len(
+        parent_bar = progress_bar(enumerate(permutation_space), total=len(
             permutation_space), leave=False, parent=mbar)
     elif lazy:
         parent_bar = master_bar(
-            enumerate(set(permutation_space)), total=len(permutation_space))
+            enumerate(permutation_space), total=len(permutation_space))
+
+    shapley_table = 0 if (contribution_type == 'nd' and not save_permutations) else np.zeros((len(permutation_space), len(sorted_elements), *contrib_shape), dtype=float)
 
     for i, permutation in parent_bar:
-        isolated_contributions = []  # got to be a better way!
+        isolated_contributions = np.zeros((len(permutation), *arbitrary_contrib.shape), dtype=float) if contribution_type=="nd" else ([None] * len(permutation))  # got to be a better way!
         child_bar = enumerate(permutation) if not (dual_progress_bars and lazy) else progress_bar(
             enumerate(permutation), total=len(permutation), leave=False, parent=parent_bar)
         # iterate over all elements in the permutation to calculate their isolated contributions
-        for index, _ in child_bar:
+        for index, element in child_bar:
             including = frozenset(permutation[:index + 1]) - lesioned
             excluding = frozenset(permutation[:index]) - lesioned
 
             # the isolated contribution of an element is the difference of contribution with that element and without that element
             if lazy:
                 contributions_including = objective_function(tuple(excluding), **objective_function_params)
                 contributions_excluding = objective_function(tuple(including), **objective_function_params)
-
-                isolated_contributions.append(contributions_including - contributions_excluding)
+                isolated_contributions[sorted_elements.index(element)] = contributions_including - contributions_excluding
             else:
-                isolated_contributions.append(contributions[including] - contributions[excluding])
+                isolated_contributions[sorted_elements.index(element)] =  contributions[including] - contributions[excluding]
 
         if contribution_type == 'nd' and not save_permutations:
-            isolated_contributions = [x for _, x in sorted(zip(permutation, isolated_contributions))]
-            shapley_table += (np.array(isolated_contributions) - shapley_table) / (i + 1)
+            shapley_table += (isolated_contributions - shapley_table) / (i + 1)
         else:
-            shapley_table[permutation] = np.array(isolated_contributions)
+            shapley_table[i] = np.array(isolated_contributions)
 
     # post processing of shapley values based on what type of contribution it is. The format of output will vary based on if the
     # values are multi-scores, timeseries, etc.
     if contribution_type == 'nd' and not save_permutations:
         shapley_table = shapley_table.reshape(shapley_table.shape[0], -1).T
         shapley_table = pd.DataFrame(
-            shapley_table, columns=sorted(permutation))
+            shapley_table, columns=sorted_elements)
         return ShapleyModeND(shapley_table, arbitrary_contrib.shape)
 
-    shapley_table = pd.DataFrame([dict(zip(permutations, shapleys))
-                                  for permutations, shapleys in shapley_table.items()])
-    return ShapleyTableND.from_dataframe(shapley_table, shape=arbitrary_contrib.shape) if (contribution_type == "nd") else ShapleyTable(shapley_table)
+    if contribution_type == "scaler":
+        return ShapleyTable(pd.DataFrame(shapley_table, columns=sorted_elements))
+
+    return ShapleyTableND.from_ndarray(shapley_table, columns=sorted_elements)
 
 
 @typechecked

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 test_packages = ["pytest~=6.2.5"]
 
 setup(name="msapy",
-      version="1.4",
+      version="1.5",
       description=DESCRIPTION,
       long_description=LONG_DESCRIPTION,
       long_description_content_type='text/markdown',

diff --git a/tests/test_ground_truth_nd.py b/tests/test_ground_truth_nd.py
@@ -48,6 +48,20 @@ def test_contributions(n_parallel_games, lazy):
     assert np.allclose(shapley_mode.get_total_contributions(), image)
 
 
+@pytest.mark.parametrize("n_parallel_games, lazy", [[1, True], [-1, True], [1, False], [-1, False]])
+def test_contributions_permutations(n_parallel_games, lazy):
+    shapley_table_nd = msa.interface(
+        elements=list(range(4)),
+        n_permutations=100,
+        objective_function=objective_func,
+        n_parallel_games=n_parallel_games,
+        save_permutations=True,
+        lazy=lazy
+    )
+
+    assert np.allclose(shapley_table_nd.shapley_modes.get_total_contributions(), image)
+
+
 @pytest.mark.parametrize("n_cores, multiprocessing_method, parallelize_over_games",
                          [(1, 'joblib', True), (-1, 'joblib', True), (1, 'joblib', False), (-1, 'joblib', False)])
 def test_estimate_causal_influence(n_cores, multiprocessing_method, parallelize_over_games):