Skip to content

Commit

Permalink
Merge pull request #23 from kuffmode/Release-1.5
Browse files Browse the repository at this point in the history
optimised for nd array contributions
works with Python 3.11
takes care of elderlies on weekends.
  • Loading branch information
kuffmode authored Nov 17, 2023
2 parents 0892e87 + 929f272 commit 787f574
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 27 deletions.
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ As you probably noticed this won't be feasible to calclulate as for example, it

And our own recent work [Fakhar K, Hilgetag CC. Systematic perturbation of an artificial neural network: A step towards quantifying causal contributions in the brain. PLoS Comput Biol. 2022;18: e1010250. doi:10.1371/journal.pcbi.1010250](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1010250)
## Installation:
The easiest way is to `pip install msapy`, I wrote this package in Python `3.9` and tried it on `3.8` so, it's safe to say you need at least a Python `3.8` so easy-installing it on older versions is not an option. Alternatively you can clone the repository (`git clone https://github.com/kuffmode/msa.git`, move to the `msa` folder (`cd msa`), and run `pip install .` In case the requirements weren't installed automatically you can also use the command `pip install -r requirements.txt` and then let me know so I can fix it!
The easiest way is to `pip install msapy`, This package is tested on Python `3.8` to Python `3.11`. Other versions might not work. Alternatively you can clone the repository (`git clone https://github.com/kuffmode/msa.git`, move to the `msa` folder (`cd msa`), and run `pip install .` In case the requirements weren't installed automatically you can also use the command `pip install -r requirements.txt` and then let me know so I can fix it!
## How it works:
Here you can see a schematic representation of how the algorithm works (interested in math instead? check the papers above). Briefly, all MSA needs from you is a list of players and a game function. The players can be your nodes, for example, brain regions or indices in a connectivity matrix, or links between them as tuples. It then shuffles them to produce N orderings in which they can join the game. This can end with repeating permutations if the set is small but that's fine don't worry! MSA then produces a "combination space" in which it produces all the combinations the player should form coalitions. Then it uses your game function and fills the contributions of those coalitions. The last step is to perform a Shapley integration and isolate each player's contribution in that given permutation. Repeating this for all permutations produces a contribution table (shapley table) and you'll get your shapley values by averaging over permutations so the end result is a value per element/player. To get a better grasp of how this works in code, check the minimal example in the examples folder.

Expand Down
2 changes: 1 addition & 1 deletion msapy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from msapy import msa, utils, plottings,checks
__version__ = "1.4"
__version__ = "1.5"
16 changes: 8 additions & 8 deletions msapy/datastructures.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,24 @@ def contribution_type(self):
return "nd"

@classmethod
def from_dataframe(cls, shapley_table, shape):
num_permutation, num_nodes = shapley_table.shape
data = np.stack(shapley_table.values.flatten())
mode_size = data.shape[-1]
data = data.reshape(num_permutation, num_nodes, -1)
def from_ndarray(cls, shapley_table, columns):
num_permutation, num_nodes = shapley_table.shape[:2]
contrib_shape = shapley_table.shape[2:]
data = shapley_table.reshape(num_permutation, num_nodes, -1)
mode_size = data.shape[2]
data = data.transpose((0, 2, 1)).reshape((-1, num_nodes))

shapley_table = pd.DataFrame(data=data,
index=pd.MultiIndex.from_product(
[range(num_permutation), range(mode_size)], names=[None, "mode_size"]),
columns=shapley_table.columns
columns=columns
)
shapley_table.index.names = [None, "ND"]
return cls(shapley_table, shape)
return cls(shapley_table, contrib_shape)

@property
def shapley_modes(self):
return ShapleyModeND(self.groupby(level=1).mean(), self.shape)
return ShapleyModeND(self.groupby(level=1).mean(), self._shape)


class ShapleyModeND(pd.DataFrame):
Expand Down
35 changes: 19 additions & 16 deletions msapy/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,54 +396,57 @@ def local_efficiency(complements, graph):
contributions = {tuple(): objective_function(tuple(), **objective_function_params)} if lazy else contributions

contribution_type, arbitrary_contrib = _get_contribution_type(contributions)
contrib_shape = arbitrary_contrib.shape if contribution_type == "nd" else []

lesioned = set(lesioned) if lesioned else set()
shapley_table = 0 if (contribution_type == 'nd' and not save_permutations) else {}
sorted_elements = sorted(permutation_space[0])
permutation_space = set(permutation_space)

if not lazy:
parent_bar = enumerate(set(permutation_space))
parent_bar = enumerate(permutation_space)
elif (not dual_progress_bars) or mbar:
parent_bar = progress_bar(enumerate(set(permutation_space)), total=len(
parent_bar = progress_bar(enumerate(permutation_space), total=len(
permutation_space), leave=False, parent=mbar)
elif lazy:
parent_bar = master_bar(
enumerate(set(permutation_space)), total=len(permutation_space))
enumerate(permutation_space), total=len(permutation_space))

shapley_table = 0 if (contribution_type == 'nd' and not save_permutations) else np.zeros((len(permutation_space), len(sorted_elements), *contrib_shape), dtype=float)

for i, permutation in parent_bar:
isolated_contributions = [] # got to be a better way!
isolated_contributions = np.zeros((len(permutation), *arbitrary_contrib.shape), dtype=float) if contribution_type=="nd" else ([None] * len(permutation)) # got to be a better way!
child_bar = enumerate(permutation) if not (dual_progress_bars and lazy) else progress_bar(
enumerate(permutation), total=len(permutation), leave=False, parent=parent_bar)
# iterate over all elements in the permutation to calculate their isolated contributions
for index, _ in child_bar:
for index, element in child_bar:
including = frozenset(permutation[:index + 1]) - lesioned
excluding = frozenset(permutation[:index]) - lesioned

# the isolated contribution of an element is the difference of contribution with that element and without that element
if lazy:
contributions_including = objective_function(tuple(excluding), **objective_function_params)
contributions_excluding = objective_function(tuple(including), **objective_function_params)

isolated_contributions.append(contributions_including - contributions_excluding)
isolated_contributions[sorted_elements.index(element)] = contributions_including - contributions_excluding
else:
isolated_contributions.append(contributions[including] - contributions[excluding])
isolated_contributions[sorted_elements.index(element)] = contributions[including] - contributions[excluding]

if contribution_type == 'nd' and not save_permutations:
isolated_contributions = [x for _, x in sorted(zip(permutation, isolated_contributions))]
shapley_table += (np.array(isolated_contributions) - shapley_table) / (i + 1)
shapley_table += (isolated_contributions - shapley_table) / (i + 1)
else:
shapley_table[permutation] = np.array(isolated_contributions)
shapley_table[i] = np.array(isolated_contributions)

# post processing of shapley values based on what type of contribution it is. The format of output will vary based on if the
# values are multi-scores, timeseries, etc.
if contribution_type == 'nd' and not save_permutations:
shapley_table = shapley_table.reshape(shapley_table.shape[0], -1).T
shapley_table = pd.DataFrame(
shapley_table, columns=sorted(permutation))
shapley_table, columns=sorted_elements)
return ShapleyModeND(shapley_table, arbitrary_contrib.shape)

shapley_table = pd.DataFrame([dict(zip(permutations, shapleys))
for permutations, shapleys in shapley_table.items()])
return ShapleyTableND.from_dataframe(shapley_table, shape=arbitrary_contrib.shape) if (contribution_type == "nd") else ShapleyTable(shapley_table)
if contribution_type == "scaler":
return ShapleyTable(pd.DataFrame(shapley_table, columns=sorted_elements))

return ShapleyTableND.from_ndarray(shapley_table, columns=sorted_elements)


@typechecked
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
test_packages = ["pytest~=6.2.5"]

setup(name="msapy",
version="1.4",
version="1.5",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
long_description_content_type='text/markdown',
Expand Down
14 changes: 14 additions & 0 deletions tests/test_ground_truth_nd.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@ def test_contributions(n_parallel_games, lazy):
assert np.allclose(shapley_mode.get_total_contributions(), image)


@pytest.mark.parametrize("n_parallel_games, lazy", [[1, True], [-1, True], [1, False], [-1, False]])
def test_contributions_permutations(n_parallel_games, lazy):
shapley_table_nd = msa.interface(
elements=list(range(4)),
n_permutations=100,
objective_function=objective_func,
n_parallel_games=n_parallel_games,
save_permutations=True,
lazy=lazy
)

assert np.allclose(shapley_table_nd.shapley_modes.get_total_contributions(), image)


@pytest.mark.parametrize("n_cores, multiprocessing_method, parallelize_over_games",
[(1, 'joblib', True), (-1, 'joblib', True), (1, 'joblib', False), (-1, 'joblib', False)])
def test_estimate_causal_influence(n_cores, multiprocessing_method, parallelize_over_games):
Expand Down

0 comments on commit 787f574

Please sign in to comment.