From dea80386d7b51c6c557c38bc5b9bee16a0abe6c5 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Mon, 6 Feb 2023 17:25:30 -0800 Subject: [PATCH 01/26] Renaming main folder --- MANIFEST.in | 2 +- README.md | 2 +- pyproject.toml | 4 ++-- {load_suite2p => rsp_vision}/__init__.py | 0 .../analysis/spatial_freq_temporal_freq.py | 0 {load_suite2p => rsp_vision}/analysis/utils.py | 0 {load_suite2p => rsp_vision}/load/__init__.py | 0 {load_suite2p => rsp_vision}/load/load_data.py | 0 {load_suite2p => rsp_vision}/load/read_config.py | 0 {load_suite2p => rsp_vision}/main.py | 0 {load_suite2p => rsp_vision}/objects/data_raw.py | 0 {load_suite2p => rsp_vision}/objects/enums.py | 0 {load_suite2p => rsp_vision}/objects/file.py | 0 {load_suite2p => rsp_vision}/objects/folder_naming_specs.py | 0 {load_suite2p => rsp_vision}/objects/options.py | 0 {load_suite2p => rsp_vision}/objects/parsers2p/__init__.py | 0 {load_suite2p => rsp_vision}/objects/parsers2p/parser2p.py | 0 .../objects/parsers2p/parser2pRSP.py | 0 {load_suite2p => rsp_vision}/objects/photon_data.py | 0 {load_suite2p => rsp_vision}/objects/specifications.py | 0 {load_suite2p => rsp_vision}/plots/plotter.py | 0 {load_suite2p => rsp_vision}/utils.py | 0 setup_for_demo.py | 6 +++--- tests/test_integration/test_data_raw.py | 2 +- tests/test_integration/test_folder_naming_specs.py | 2 +- tests/test_integration/test_photon_data.py | 6 +++--- tests/test_unit/test_parsers.py | 2 +- tests/test_unit/test_utils.py | 2 +- tox.ini | 2 +- 29 files changed, 15 insertions(+), 15 deletions(-) rename {load_suite2p => rsp_vision}/__init__.py (100%) rename {load_suite2p => rsp_vision}/analysis/spatial_freq_temporal_freq.py (100%) rename {load_suite2p => rsp_vision}/analysis/utils.py (100%) rename {load_suite2p => rsp_vision}/load/__init__.py (100%) rename {load_suite2p => rsp_vision}/load/load_data.py (100%) rename {load_suite2p => rsp_vision}/load/read_config.py (100%) rename {load_suite2p => rsp_vision}/main.py (100%) rename {load_suite2p => rsp_vision}/objects/data_raw.py (100%) rename {load_suite2p => rsp_vision}/objects/enums.py (100%) rename {load_suite2p => rsp_vision}/objects/file.py (100%) rename {load_suite2p => rsp_vision}/objects/folder_naming_specs.py (100%) rename {load_suite2p => rsp_vision}/objects/options.py (100%) rename {load_suite2p => rsp_vision}/objects/parsers2p/__init__.py (100%) rename {load_suite2p => rsp_vision}/objects/parsers2p/parser2p.py (100%) rename {load_suite2p => rsp_vision}/objects/parsers2p/parser2pRSP.py (100%) rename {load_suite2p => rsp_vision}/objects/photon_data.py (100%) rename {load_suite2p => rsp_vision}/objects/specifications.py (100%) rename {load_suite2p => rsp_vision}/plots/plotter.py (100%) rename {load_suite2p => rsp_vision}/utils.py (100%) diff --git a/MANIFEST.in b/MANIFEST.in index 758c132..28d5bf1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include LICENSE include README.md -recursive-include load_suite2p *.yml +recursive-include rsp_vision *.yml exclude *.yaml exclude tox.ini diff --git a/README.md b/README.md index e153dd5..b847a66 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Then edit the config file with the correct paths to the data by overwriting `/pa Finally, run the following commands with IPython: ```python -from load_suite2p.main import main +from rsp_vision.main import main main() ``` diff --git a/pyproject.toml b/pyproject.toml index 534035d..3f3518c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,11 +64,11 @@ build-backend = "setuptools.build_meta" include-package-data = true [tool.setuptools.packages.find] -include = ["load_suite2p*"] +include = ["rsp_vision*"] exclude = ["tests*"] [tool.pytest.ini_options] -addopts = "-s --cov=load_suite2p" +addopts = "-s --cov=rsp_vision" [tool.black] target-version = ['py39'] diff --git a/load_suite2p/__init__.py b/rsp_vision/__init__.py similarity index 100% rename from load_suite2p/__init__.py rename to rsp_vision/__init__.py diff --git a/load_suite2p/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py similarity index 100% rename from load_suite2p/analysis/spatial_freq_temporal_freq.py rename to rsp_vision/analysis/spatial_freq_temporal_freq.py diff --git a/load_suite2p/analysis/utils.py b/rsp_vision/analysis/utils.py similarity index 100% rename from load_suite2p/analysis/utils.py rename to rsp_vision/analysis/utils.py diff --git a/load_suite2p/load/__init__.py b/rsp_vision/load/__init__.py similarity index 100% rename from load_suite2p/load/__init__.py rename to rsp_vision/load/__init__.py diff --git a/load_suite2p/load/load_data.py b/rsp_vision/load/load_data.py similarity index 100% rename from load_suite2p/load/load_data.py rename to rsp_vision/load/load_data.py diff --git a/load_suite2p/load/read_config.py b/rsp_vision/load/read_config.py similarity index 100% rename from load_suite2p/load/read_config.py rename to rsp_vision/load/read_config.py diff --git a/load_suite2p/main.py b/rsp_vision/main.py similarity index 100% rename from load_suite2p/main.py rename to rsp_vision/main.py diff --git a/load_suite2p/objects/data_raw.py b/rsp_vision/objects/data_raw.py similarity index 100% rename from load_suite2p/objects/data_raw.py rename to rsp_vision/objects/data_raw.py diff --git a/load_suite2p/objects/enums.py b/rsp_vision/objects/enums.py similarity index 100% rename from load_suite2p/objects/enums.py rename to rsp_vision/objects/enums.py diff --git a/load_suite2p/objects/file.py b/rsp_vision/objects/file.py similarity index 100% rename from load_suite2p/objects/file.py rename to rsp_vision/objects/file.py diff --git a/load_suite2p/objects/folder_naming_specs.py b/rsp_vision/objects/folder_naming_specs.py similarity index 100% rename from load_suite2p/objects/folder_naming_specs.py rename to rsp_vision/objects/folder_naming_specs.py diff --git a/load_suite2p/objects/options.py b/rsp_vision/objects/options.py similarity index 100% rename from load_suite2p/objects/options.py rename to rsp_vision/objects/options.py diff --git a/load_suite2p/objects/parsers2p/__init__.py b/rsp_vision/objects/parsers2p/__init__.py similarity index 100% rename from load_suite2p/objects/parsers2p/__init__.py rename to rsp_vision/objects/parsers2p/__init__.py diff --git a/load_suite2p/objects/parsers2p/parser2p.py b/rsp_vision/objects/parsers2p/parser2p.py similarity index 100% rename from load_suite2p/objects/parsers2p/parser2p.py rename to rsp_vision/objects/parsers2p/parser2p.py diff --git a/load_suite2p/objects/parsers2p/parser2pRSP.py b/rsp_vision/objects/parsers2p/parser2pRSP.py similarity index 100% rename from load_suite2p/objects/parsers2p/parser2pRSP.py rename to rsp_vision/objects/parsers2p/parser2pRSP.py diff --git a/load_suite2p/objects/photon_data.py b/rsp_vision/objects/photon_data.py similarity index 100% rename from load_suite2p/objects/photon_data.py rename to rsp_vision/objects/photon_data.py diff --git a/load_suite2p/objects/specifications.py b/rsp_vision/objects/specifications.py similarity index 100% rename from load_suite2p/objects/specifications.py rename to rsp_vision/objects/specifications.py diff --git a/load_suite2p/plots/plotter.py b/rsp_vision/plots/plotter.py similarity index 100% rename from load_suite2p/plots/plotter.py rename to rsp_vision/plots/plotter.py diff --git a/load_suite2p/utils.py b/rsp_vision/utils.py similarity index 100% rename from load_suite2p/utils.py rename to rsp_vision/utils.py diff --git a/setup_for_demo.py b/setup_for_demo.py index 0f8fb18..e227f00 100644 --- a/setup_for_demo.py +++ b/setup_for_demo.py @@ -10,12 +10,12 @@ f.write('CONFIG_PATH="config-test/config.yml"') # create config folder - Path("load_suite2p/config-test").mkdir(parents=True, exist_ok=True) + Path("rsp_vision/config-test").mkdir(parents=True, exist_ok=True) # create config file and store it in config folder - f = open("load_suite2p/config-test/config.yml", "x") + f = open("rsp_vision/config-test/config.yml", "x") - with open("load_suite2p/config-test/config.yml", "w") as f: + with open("rsp_vision/config-test/config.yml", "w") as f: content = { "parser": "Parser2pRSP", "paths": { diff --git a/tests/test_integration/test_data_raw.py b/tests/test_integration/test_data_raw.py index c74b12c..eadbfe2 100644 --- a/tests/test_integration/test_data_raw.py +++ b/tests/test_integration/test_data_raw.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from load_suite2p.objects.data_raw import DataRaw +from rsp_vision.objects.data_raw import DataRaw @pytest.fixture diff --git a/tests/test_integration/test_folder_naming_specs.py b/tests/test_integration/test_folder_naming_specs.py index dfc7e8e..edbecd5 100644 --- a/tests/test_integration/test_folder_naming_specs.py +++ b/tests/test_integration/test_folder_naming_specs.py @@ -2,7 +2,7 @@ import pytest -from load_suite2p.objects import folder_naming_specs +from rsp_vision.objects import folder_naming_specs # Mocks diff --git a/tests/test_integration/test_photon_data.py b/tests/test_integration/test_photon_data.py index 880959d..0bc31df 100644 --- a/tests/test_integration/test_photon_data.py +++ b/tests/test_integration/test_photon_data.py @@ -1,9 +1,9 @@ import numpy as np import pytest -from load_suite2p.objects.data_raw import DataRaw -from load_suite2p.objects.enums import PhotonType -from load_suite2p.objects.photon_data import PhotonData +from rsp_vision.objects.data_raw import DataRaw +from rsp_vision.objects.enums import PhotonType +from rsp_vision.objects.photon_data import PhotonData @pytest.fixture diff --git a/tests/test_unit/test_parsers.py b/tests/test_unit/test_parsers.py index aadc168..f0734f6 100644 --- a/tests/test_unit/test_parsers.py +++ b/tests/test_unit/test_parsers.py @@ -1,6 +1,6 @@ from pathlib import Path -from load_suite2p.objects.parsers2p.parser2pRSP import Parser2pRSP +from rsp_vision.objects.parsers2p.parser2pRSP import Parser2pRSP config = { "parser": "Parser2pRSP", diff --git a/tests/test_unit/test_utils.py b/tests/test_unit/test_utils.py index c8d7650..adb4a5c 100644 --- a/tests/test_unit/test_utils.py +++ b/tests/test_unit/test_utils.py @@ -1,6 +1,6 @@ from types import ModuleType -from load_suite2p import utils +from rsp_vision import utils def test_get_module_for_logging(): diff --git a/tox.ini b/tox.ini index 6ba7434..4b0b0ba 100644 --- a/tox.ini +++ b/tox.ini @@ -15,4 +15,4 @@ deps = rich PyYAML commands = - pytest -v --color=yes --cov=load_suite2p --cov-report=xml + pytest -v --color=yes --cov=rsp_vision --cov-report=xml From 7b514a56c9e41da93d3a543a7f0810920368af3c Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Mon, 13 Feb 2023 18:25:08 -0800 Subject: [PATCH 02/26] Refactor and remove redundant objects "options" and "specifications" --- .../analysis/spatial_freq_temporal_freq.py | 5 +- rsp_vision/load/load_data.py | 45 +++++------------- rsp_vision/objects/options.py | 46 ------------------- rsp_vision/objects/specifications.py | 15 ------ 4 files changed, 14 insertions(+), 97 deletions(-) delete mode 100644 rsp_vision/objects/options.py delete mode 100644 rsp_vision/objects/specifications.py diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index 8142a52..0cc6fb0 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -1,11 +1,10 @@ from ..objects.photon_data import PhotonData -from ..objects.specifications import Specifications class SF_TF: - def __init__(self, data: PhotonData, specs: Specifications): + def __init__(self, data: PhotonData, config: dict): self.data = data - self.config = specs + self.config = config def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian diff --git a/rsp_vision/load/load_data.py b/rsp_vision/load/load_data.py index b0d3076..17c4aa3 100644 --- a/rsp_vision/load/load_data.py +++ b/rsp_vision/load/load_data.py @@ -7,14 +7,14 @@ from ..objects.data_raw import DataRaw from ..objects.enums import AnalysisType, DataType -from ..objects.specifications import Specifications +from ..objects.folder_naming_specs import FolderNamingSpecs from .read_config import read CONFIG_PATH = config("CONFIG_PATH") config_path = Path(__file__).parents[1] / CONFIG_PATH -def load_data(folder_name: str) -> Tuple[DataRaw, Specifications]: +def load_data(folder_name: str) -> Tuple[DataRaw, dict]: """Creates the configuration object and loads the data. Parameters @@ -24,46 +24,25 @@ def load_data(folder_name: str) -> Tuple[DataRaw, Specifications]: Returns ------- - Tuple(list, Specifications) + Tuple(list, dict) specs: specs object data_raw: list containing all raw data """ - specs = get_specifications(folder_name) - data_raw = load(specs) - - return data_raw, specs - - -def get_specifications(folder_name: str) -> Specifications: - """Create specifications object. It reads the configuration - file and adds the folder name and experimental details - derived from it (analysis options and file paths). - - Parameters - ---------- - folder_name : string - name of the folder containing the experimental data - - Returns - ------- - specs - Specifications object - """ - """""" - logging.debug("Reading configurations") config = read(config_path) - logging.debug(f"Configurations read: {config}") - specs = Specifications(config, folder_name) - return specs + folder_naming = FolderNamingSpecs(folder_name, config) + folder_naming.extract_all_file_names() + data_raw = load(folder_naming, config) + + return data_raw, config -def load(specs: Specifications) -> DataRaw: - if specs.config["use-allen-dff"]: - if specs.config["analysis-type"] == "sf_tf": +def load(folder_naming: FolderNamingSpecs, config: dict) -> DataRaw: + if config["use-allen-dff"]: + if config["analysis-type"] == "sf_tf": allen_data_files = [ file - for file in specs.folder_naming.all_files + for file in folder_naming.all_files if file.datatype == DataType.ALLEN_DFF and file.analysistype == AnalysisType.SF_TF ] diff --git a/rsp_vision/objects/options.py b/rsp_vision/objects/options.py deleted file mode 100644 index 672da0b..0000000 --- a/rsp_vision/objects/options.py +++ /dev/null @@ -1,46 +0,0 @@ -class Options: - """Class with the options to analyze data from suite2p and registers2p. - Based on original Options class in the MATLAB project.""" - - def __init__(self, config: dict): - self.response = self.ResponseOptions() - self.fitting = self.FittingOptions() - # Attributes to evaluate: - # self.trials - # self.days_to_analyze - # self.directions - # self.colour - pass - - class ResponseOptions: - def __init__(self): - # Attributes to evaluate: - # self.baseline_frames - # self.frames - # self.peak_window - # self.peak_enable - # self.measure - # self.trials - # self.average_using - # self.subtract_baseline - # self.response_frames - # self.p_threashold - # self.use_magnitude - # self.magnitude_threshold - pass - - class FittingOptions: - def __init__(self): - # Attributes to evaluate: - # self.lower_bound - # self.start_cond - # self.upper_bound - # self.jitter - # self.n_starting_points - pass - - class SftfMapOptions: - def __init__(self): - # Attributes to evaluate: - # self.use_pref_dir - pass diff --git a/rsp_vision/objects/specifications.py b/rsp_vision/objects/specifications.py deleted file mode 100644 index 22c00e4..0000000 --- a/rsp_vision/objects/specifications.py +++ /dev/null @@ -1,15 +0,0 @@ -from .folder_naming_specs import FolderNamingSpecs -from .options import Options - - -class Specifications: - """The class :class:`Specifications` holds the configuration of the - experiment, the analysis options, and the paths to the files - to be loaded.""" - - def __init__(self, config: dict, folder_name: str): - self.config: dict = config - self.folder_name = folder_name - self.folder_naming = FolderNamingSpecs(folder_name, config) - self.folder_naming.extract_all_file_names() - self.options = Options(config) From 92701f56e853e48acb9e25fef78302b574b4fa53 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Mon, 13 Feb 2023 19:23:49 -0800 Subject: [PATCH 03/26] Add stim matrix logic and flake8 adjustments --- .flake8 | 1 + .../analysis/spatial_freq_temporal_freq.py | 38 ++++++++++++++++++- rsp_vision/main.py | 6 +-- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/.flake8 b/.flake8 index e9c1c58..585f87b 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,4 @@ [flake8] max-line-length = 79 exclude = __init__.py,build,.eggs +ignore = E203, W503 diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index 0cc6fb0..36c6c0b 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -1,10 +1,44 @@ +import numpy as np +from utils import get_fps + +from ..objects.enums import PhotonType from ..objects.photon_data import PhotonData class SF_TF: - def __init__(self, data: PhotonData, config: dict): + def __init__( + self, data: PhotonData, config: dict, photon_type: PhotonType + ): self.data = data - self.config = config + self.photon_type = photon_type + + self.fps = get_fps(photon_type) + + self.padding_start = int(config["padding"][0]) + self.padding_end = int(config["padding"][1]) + + def get_response_matrix_from_padding(self, day_idx=0, roi_idx=0): + stimulus_idxs = self.data.signal[self.data.signal["stimulus_onset"]] + self.n_frames_for_dispalay = ( + self.data.n_frames_per_trigger * self.data.n_triggers_per_stimulus + + self.padding_end + ) + full_matrix = np.zeros( + (self.n_frames_for_dispalay, len(stimulus_idxs)) + ) + + filtered_signal = self.data.signal[ + (self.data.signal["day_idx"] == day_idx) + & (self.data.signal["roi_idx"] == roi_idx) + ] + for i, idx in enumerate(stimulus_idxs): + full_matrix[:, i] = filtered_signal[ + idx - self.padding_start : idx + self.padding_end + ]["signal"] + + response_matrix = np.mean(full_matrix[self.fps :, :], axis=0) + baseline_matrix = np.mean(full_matrix[-2 * self.fps :, :], axis=0) + return response_matrix - baseline_matrix def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian diff --git a/rsp_vision/main.py b/rsp_vision/main.py index 06ebe63..18a6663 100644 --- a/rsp_vision/main.py +++ b/rsp_vision/main.py @@ -22,15 +22,15 @@ def main(): Example: AK_1111739_hL_RSPd_monitor_front\n \ 📁" ) - + photon_type = PhotonType.TWO_PHOTON # load data data, specs = load_data(folder_name) # preprocess and make PhotonData object - photon_data = PhotonData(data, PhotonType.TWO_PHOTON) + photon_data = PhotonData(data, photon_type) # make analysis object - analysis = SF_TF(photon_data, specs) + analysis = SF_TF(photon_data, specs, photon_type) # calculate responsiveness and display it in a nice way responsiveness = analysis.responsiveness(photon_data) From 99a2dd766b214035e44336aa626c160f3686d05a Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Mon, 13 Feb 2023 19:34:12 -0800 Subject: [PATCH 04/26] Add missing tox dependency --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 4b0b0ba..8dc8323 100644 --- a/tox.ini +++ b/tox.ini @@ -14,5 +14,6 @@ deps = pytest-cov rich PyYAML + pandas commands = pytest -v --color=yes --cov=rsp_vision --cov-report=xml From 3299b4ed567bb78f70dc3ea7e0cfb689fe6f2da3 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 14 Feb 2023 18:45:12 -0800 Subject: [PATCH 05/26] Improve calculations of response and baseline given a padding --- .../analysis/spatial_freq_temporal_freq.py | 76 ++++++++++++++----- rsp_vision/main.py | 3 +- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index 36c6c0b..1949d7e 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -1,8 +1,10 @@ +import logging + import numpy as np -from utils import get_fps from ..objects.enums import PhotonType from ..objects.photon_data import PhotonData +from .utils import get_fps class SF_TF: @@ -17,37 +19,73 @@ def __init__( self.padding_start = int(config["padding"][0]) self.padding_end = int(config["padding"][1]) - def get_response_matrix_from_padding(self, day_idx=0, roi_idx=0): - stimulus_idxs = self.data.signal[self.data.signal["stimulus_onset"]] - self.n_frames_for_dispalay = ( + self.get_response_matrix_from_padding() + + def get_response_matrix_from_padding( + self, + ): + stimulus_idxs = self.data.signal[ + self.data.signal["stimulus_onset"] + ].index + self.n_frames_for_dispalay = int( self.data.n_frames_per_trigger * self.data.n_triggers_per_stimulus + + self.padding_start + self.padding_end ) - full_matrix = np.zeros( - (self.n_frames_for_dispalay, len(stimulus_idxs)) + + self.adapted_signal = self.data.signal + self.adapted_signal["mean_response"] = np.nan + self.adapted_signal["mean_baseline"] = np.nan + + logging.info("Starting to edit the signal daataframe...") + + for idx in stimulus_idxs: + id = idx - self.padding_start + + self.adapted_signal.loc[ + idx, "mean_response" + ] = self.adapted_signal[ + (self.adapted_signal.index >= id + self.fps) + & (self.adapted_signal.index < id + self.n_frames_for_dispalay) + ][ + "signal" + ].mean() + + self.adapted_signal.loc[ + idx, "mean_baseline" + ] = self.adapted_signal[ + ( + self.adapted_signal.index + >= id + self.n_frames_for_dispalay - (2 * self.fps) + ) + & (self.adapted_signal.index < id + self.n_frames_for_dispalay) + ][ + "signal" + ].mean() + + self.adapted_signal["subtracted"] = ( + self.adapted_signal["mean_response"] + - self.adapted_signal["mean_baseline"] ) - filtered_signal = self.data.signal[ - (self.data.signal["day_idx"] == day_idx) - & (self.data.signal["roi_idx"] == roi_idx) - ] - for i, idx in enumerate(stimulus_idxs): - full_matrix[:, i] = filtered_signal[ - idx - self.padding_start : idx + self.padding_end - ]["signal"] + view = self.adapted_signal[self.adapted_signal["stimulus_onset"]] + logging.info(f"Adapted signal dataframe:{view.head()}") - response_matrix = np.mean(full_matrix[self.fps :, :], axis=0) - baseline_matrix = np.mean(full_matrix[-2 * self.fps :, :], axis=0) - return response_matrix - baseline_matrix + # in the last trigger baseline is not calculated + # because indexes go beyond the length of the dataframe def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian raise NotImplementedError("This method is not implemented yet") - def responsiveness(self): + def responsiveness(self, rois: list): + raise NotImplementedError("This method is not implemented yet") - def responsiveness_anova_window(self): + def responsiveness_anova(self): + # perform non-parametric one way anova and + # return the p-value for all combos across sesssions + raise NotImplementedError("This method is not implemented yet") def get_preferred_direction_all_rois(self): diff --git a/rsp_vision/main.py b/rsp_vision/main.py index 18a6663..a1522db 100644 --- a/rsp_vision/main.py +++ b/rsp_vision/main.py @@ -33,7 +33,8 @@ def main(): analysis = SF_TF(photon_data, specs, photon_type) # calculate responsiveness and display it in a nice way - responsiveness = analysis.responsiveness(photon_data) + rois = list(range(11)) + responsiveness = analysis.responsiveness(rois) print(responsiveness) # TODO: nice appearance # Plots From a28211aa4655e5517138721f4ed227c7a4b67f3a Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 16 Feb 2023 11:42:39 -0800 Subject: [PATCH 06/26] Fix bugs in photon data and add many checks --- rsp_vision/objects/photon_data.py | 103 ++++++++++++++++++++++++------ 1 file changed, 83 insertions(+), 20 deletions(-) diff --git a/rsp_vision/objects/photon_data.py b/rsp_vision/objects/photon_data.py index 4c964c9..07c3646 100644 --- a/rsp_vision/objects/photon_data.py +++ b/rsp_vision/objects/photon_data.py @@ -69,13 +69,13 @@ def set_general_variables(self, data_raw): The raw data object from which the data will be extracted """ self.screen_size = data_raw.stim[0]["screen_size"] - self.is_cell = data_raw.is_cell - self.day_roi = data_raw.day["roi"] - self.day_roi_label = data_raw.day["roi_label"] + self.is_cell = data_raw.is_cell # seems useless + self.day_roi = data_raw.day["roi"] # seems useless + self.day_roi_label = data_raw.day["roi_label"] # seems useless self.n_sessions = data_raw.frames.shape[0] self.n_roi = data_raw.frames[0].shape[0] self.n_frames_per_session = data_raw.frames[0].shape[1] - self.day_stim = data_raw.day["stimulus"] + self.day_stim = data_raw.day["stimulus"] # seems useless self.grey_or_static = ascii_array_to_string( data_raw.stim[0]["stimulus"]["grey_or_static"] ) @@ -89,16 +89,17 @@ def set_general_variables(self, data_raw): self.is_gray = False self.n_triggers_per_stimulus = 2 - self.n_trigger = data_raw.stim[0]["n_triggers"] - self.n_baseline_trigger = int( + self.n_all_triggers = data_raw.stim[0]["n_triggers"] + self.n_session_boundary_baseline_triggers = int( data_raw.stim[0]["stimulus"]["n_baseline_triggers"] ) - self.total_n_stimulus_triggers = ( - self.n_trigger - 2 * self.n_baseline_trigger + self.n_stimulus_triggers_across_all_sessions = ( + self.n_all_triggers - 2 * self.n_session_boundary_baseline_triggers ) * self.n_sessions - self.total_n_of_stimuli = ( - self.total_n_stimulus_triggers / self.n_triggers_per_stimulus + self.n_of_stimuli_across_all_sessions = int( + self.n_stimulus_triggers_across_all_sessions + / self.n_triggers_per_stimulus ) self.calculations_to_find_start_frames() @@ -107,15 +108,18 @@ def calculations_to_find_start_frames(self): """Calculations to find the start frames of the stimuli, as in the matlab codebase. """ - self.n_frames_per_trigger = self.n_frames_per_session / self.n_trigger + self.n_frames_per_trigger = ( + self.n_frames_per_session / self.n_all_triggers + ) # could also be calculate from fps, would be good to add a check self.n_baseline_frames = ( - self.n_baseline_trigger * self.n_frames_per_trigger + self.n_session_boundary_baseline_triggers + * self.n_frames_per_trigger ) - self.n_stimulus_triggers = int( - self.n_trigger - 2 * self.n_baseline_trigger + self.n_stimulus_triggers_per_session = int( + self.n_all_triggers - 2 * self.n_session_boundary_baseline_triggers ) - self.inner_total_n_of_stimuli = int( - self.n_stimulus_triggers / self.n_triggers_per_stimulus + self.n_of_stimuli_per_session = int( + self.n_stimulus_triggers_per_session / self.n_triggers_per_stimulus ) self.stimulus_start_frames = self.get_stimulus_start_frames() @@ -126,7 +130,7 @@ def get_stimulus_start_frames(self): return np.array( ( self.n_baseline_frames - + np.arange(0, self.total_n_of_stimuli) + + np.arange(0, self.n_of_stimuli_across_all_sessions) * self.n_triggers_per_stimulus * self.n_frames_per_trigger + 1 @@ -186,7 +190,10 @@ def make_signal_dataframe(self, data_raw): self.day_stim[0], self.n_frames_per_session ), "time from beginning": self.get_timing_array(), - "frames_id": np.arange(0, self.n_frames_per_session), + "frames_id": np.arange( + self.n_frames_per_session * session, + self.n_frames_per_session * (session + 1), + ), "signal": data_raw.frames[session][roi, :], "roi_id": np.repeat(roi, self.n_frames_per_session), "session_id": np.repeat( @@ -250,11 +257,41 @@ def get_stimuli(self, data_raw): "directions" ], "session": np.repeat( - signal_idx, self.inner_total_n_of_stimuli + signal_idx, self.n_of_stimuli_per_session ), } ) stimuli = pd.concat([stimuli, df]) + + if len(stimuli) != self.n_of_stimuli_across_all_sessions: + logging.error( + f"Len of stimuli table: {len(stimuli)}, calculated " + + "stimuli lenght: {self.n_of_stimuli_across_all_sessions}" + ) + raise RuntimeError( + "Number of stimuli in raw_data.stim differs from the " + + "calculated amount of stimuli" + ) + + pivot_table = stimuli.pivot_table( + index=["sf", "tf", "direction"], aggfunc="size" + ) + + if len(pivot_table) != 6 * 6 * 8: + logging.error(f"Pivot table: {pivot_table}") + logging.error(f"Pivot table length: {len(pivot_table)}") + raise RuntimeError( + "Number of stimuli is not correct, some combinations are " + + "missing or duplicated" + ) + + if np.any(pivot_table.values != self.n_triggers_per_stimulus): + logging.error(f"Pivot table: {pivot_table}") + raise RuntimeError( + "Number of stimuli is not correct, some combinations are " + + "missing or duplicated" + ) + return stimuli def fill_up_with_stim_info(self, signal, stimuli): @@ -272,17 +309,31 @@ def fill_up_with_stim_info(self, signal, stimuli): signal : pd.DataFrame Final signal dataframe with stimulus information """ + + explored_idxs = set() # register only the stimulus onset for k, start_frame in enumerate(self.stimulus_start_frames): signal_idxs = signal.index[ signal["frames_id"] == start_frame ].tolist() + s_idxs = set(signal_idxs) + if explored_idxs.intersection(set(s_idxs)): + raise RuntimeError("Index is duplicated across signals") + + explored_idxs.union(s_idxs) + # starting frames and stimuli are alligned in source data stimulus = stimuli.iloc[k] + logging.info(f"\nK: {k}, stimulus:\n{stimulus}") + if len(signal_idxs) != self.n_roi: + raise RuntimeError( + f"Number of instances for stimulus {stimulus} is wrong." + ) + # there would be the same start frame for each session for each roi - for i, signal_idx in enumerate(signal_idxs): + for signal_idx in signal_idxs: signal.iloc[ signal_idx, signal.columns.get_loc("sf") ] = stimulus["sf"] @@ -296,5 +347,17 @@ def fill_up_with_stim_info(self, signal, stimuli): signal_idx, signal.columns.get_loc("stimulus_onset") ] = True + if np.any( + signal[signal.stimulus_onset] + .pivot_table(index=["sf", "tf", "direction"], aggfunc="size") + .values + != self.n_triggers_per_stimulus * self.n_roi + ): + raise RuntimeError( + "Signal table was not populated correctly " + + "with stimulus information" + ) + logging.info("Stimulus information added to signal dataframe") + return signal From a31e3108fe77c91034676d18aa82423b15229aec Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 23 Feb 2023 15:37:51 +0000 Subject: [PATCH 07/26] Add tests edits --- tests/test_integration/test_photon_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_integration/test_photon_data.py b/tests/test_integration/test_photon_data.py index 0bc31df..3657f19 100644 --- a/tests/test_integration/test_photon_data.py +++ b/tests/test_integration/test_photon_data.py @@ -84,7 +84,7 @@ def test_set_general_variables(get_data_raw_object, get_variables): assert photon_data.n_sessions == n_sessions assert photon_data.n_roi == n_roi assert photon_data.n_frames_per_session == l_signal - assert photon_data.inner_total_n_of_stimuli == n_stim + assert photon_data.n_of_stimuli_per_session == n_stim assert photon_data.stimulus_start_frames.shape[0] == n_sessions * n_stim From 4d0fbc2468651e7867dcb49ebb1d282d21495a64 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 23 Feb 2023 16:14:05 +0000 Subject: [PATCH 08/26] Add usage of config in phton_data for more controls --- rsp_vision/objects/photon_data.py | 25 +++++++++++++++++++------ setup_for_demo.py | 4 ++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/rsp_vision/objects/photon_data.py b/rsp_vision/objects/photon_data.py index 260a3be..88eeea6 100644 --- a/rsp_vision/objects/photon_data.py +++ b/rsp_vision/objects/photon_data.py @@ -23,6 +23,7 @@ def __init__( ): self.photon_type = photon_type self.config = config + self.fps = get_fps(self.photon_type, self.config) self.set_general_variables(data_raw) self.signal = self.get_signal_df(data_raw) logging.info( @@ -72,9 +73,6 @@ def set_general_variables(self, data_raw): The raw data object from which the data will be extracted """ self.screen_size = data_raw.stim[0]["screen_size"] - self.is_cell = data_raw.is_cell # seems useless - self.day_roi = data_raw.day["roi"] # seems useless - self.day_roi_label = data_raw.day["roi_label"] # seems useless self.n_sessions = data_raw.frames.shape[0] self.n_roi = data_raw.frames[0].shape[0] self.n_frames_per_session = data_raw.frames[0].shape[1] @@ -113,7 +111,20 @@ def calculations_to_find_start_frames(self): """ self.n_frames_per_trigger = ( self.n_frames_per_session / self.n_all_triggers - ) # could also be calculate from fps, would be good to add a check + ) + if ( + self.n_frames_per_trigger + != self.config["trigger_interval_s"] * self.fps + ): + logging.error( + "Frames per trigger from data: " + + f"{self.n_frames_per_trigger} " + + "and calculated from fps: " + + f"{self.config['trigger_interval_s'] * self.fps}" + + " are different" + ) + raise RuntimeError("Number of frames per trigger is wrong") + self.n_baseline_frames = ( self.n_session_boundary_baseline_triggers * self.n_frames_per_trigger @@ -280,7 +291,10 @@ def get_stimuli(self, data_raw): index=["sf", "tf", "direction"], aggfunc="size" ) - if len(pivot_table) != 6 * 6 * 8: + if ( + len(pivot_table) + != self.config["n_sf"] * self.config["n_tf"] * self.config["n_dir"] + ): logging.error(f"Pivot table: {pivot_table}") logging.error(f"Pivot table length: {len(pivot_table)}") raise RuntimeError( @@ -329,7 +343,6 @@ def fill_up_with_stim_info(self, signal, stimuli): # starting frames and stimuli are alligned in source data stimulus = stimuli.iloc[k] - logging.info(f"\nK: {k}, stimulus:\n{stimulus}") if len(signal_idxs) != self.n_roi: raise RuntimeError( f"Number of instances for stimulus {stimulus} is wrong." diff --git a/setup_for_demo.py b/setup_for_demo.py index 4595a28..83c2cbd 100644 --- a/setup_for_demo.py +++ b/setup_for_demo.py @@ -28,6 +28,10 @@ "drift_order": 2, "fps_two_photon": 30, "fps_tree_photon": 15, + "n_sf": 6, + "n_tf": 6, + "n_dir": 8, + "trigger_interval_s": 2.5, } yaml.dump(content, f) From 62539751d48cf5b2772dfb2e388d51286c272636 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 23 Feb 2023 18:01:30 +0000 Subject: [PATCH 09/26] Update setup_for_demo --- setup_for_demo.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/setup_for_demo.py b/setup_for_demo.py index 83c2cbd..263bb81 100644 --- a/setup_for_demo.py +++ b/setup_for_demo.py @@ -8,12 +8,11 @@ f.write('CONFIG_PATH="config/config.yml"') # create config folder - Path("rsp_vision/config").mkdir(parents=True, exist_ok=True) + config_folder_path = Path("rsp_vision/config") + config_folder_path.mkdir(parents=True, exist_ok=True) + config_path = config_folder_path / "config.yml" - # create config file and store it in config folder - f = open("rsp_vision/config/config.yml", "x") - - with open("rsp_vision/config/config.yml", "w") as f: + with config_path.open("w", encoding="utf-8") as f: content = { "parser": "Parser2pRSP", "paths": { From 830958cae082fc821cde199f343d29b4bd92c4c2 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 23 Feb 2023 18:03:11 +0000 Subject: [PATCH 10/26] Add logic to calculate mean response and baseline --- .../analysis/spatial_freq_temporal_freq.py | 124 +++++++++++------- rsp_vision/main.py | 5 +- 2 files changed, 79 insertions(+), 50 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index 0854a12..bb55dbd 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -8,83 +8,113 @@ class SF_TF: - def __init__( - self, data: PhotonData, config: dict, photon_type: PhotonType - ): + def __init__(self, data: PhotonData, photon_type: PhotonType): self.data = data self.photon_type = photon_type + self.fps = get_fps(photon_type, data.config) - self.fps = get_fps(photon_type, config) - - self.padding_start = int(config["padding"][0]) - self.padding_end = int(config["padding"][1]) + self.padding_start = int(data.config["padding"][0]) + self.padding_end = int(data.config["padding"][1]) - self.get_response_matrix_from_padding() + self.calculate_mean_response_and_baseline() - def get_response_matrix_from_padding( + def calculate_mean_response_and_baseline( self, ): stimulus_idxs = self.data.signal[ self.data.signal["stimulus_onset"] ].index - self.n_frames_for_dispalay = int( - self.data.n_frames_per_trigger * self.data.n_triggers_per_stimulus - + self.padding_start - + self.padding_end - ) self.adapted_signal = self.data.signal self.adapted_signal["mean_response"] = np.nan self.adapted_signal["mean_baseline"] = np.nan - logging.info("Starting to edit the signal daataframe...") - - for idx in stimulus_idxs: - id = idx - self.padding_start - - self.adapted_signal.loc[ - idx, "mean_response" - ] = self.adapted_signal[ - (self.adapted_signal.index >= id + self.fps) - & (self.adapted_signal.index < id + self.n_frames_for_dispalay) - ][ - "signal" - ].mean() - - self.adapted_signal.loc[ - idx, "mean_baseline" - ] = self.adapted_signal[ - ( - self.adapted_signal.index - >= id + self.n_frames_for_dispalay - (2 * self.fps) + baseline_start = 0 + if self.data.n_triggers_per_stimulus == 3: + response_start = 2 + if self.data.config["baseline"] == "static": + baseline_start = 1 + if self.data.n_triggers_per_stimulus == 2: + response_start = 1 + + logging.info("Start to edit the signal dataframe...") + + # Identify the rows in the window of each stimulus onset + window_start_response = stimulus_idxs + ( + self.data.n_frames_per_trigger * response_start + ) + window_end_response = stimulus_idxs + ( + self.data.n_frames_per_trigger * (response_start + 1) + ) + window_mask_response = np.vstack( + [ + np.arange(start, end) + for start, end in zip( + window_start_response, window_end_response + ) + ] + ) + + window_start_baseline = stimulus_idxs + ( + self.data.n_frames_per_trigger * baseline_start + ) + window_end_baseline = stimulus_idxs + ( + self.data.n_frames_per_trigger * (baseline_start + 1) + ) + window_mask_baseline = np.vstack( + [ + np.arange(start, end) + for start, end in zip( + window_start_baseline, window_end_baseline ) - & (self.adapted_signal.index < id + self.n_frames_for_dispalay) - ][ - "signal" - ].mean() + ] + ) + + mean_response_signal = [ + np.mean( + self.adapted_signal.iloc[ + window_mask_response[i] + ].signal.values, + axis=0, + ) + for i in range(len(window_mask_response)) + ] + mean_baseline_signal = [ + np.mean( + self.adapted_signal.iloc[ + window_mask_baseline[i] + ].signal.values, + axis=0, + ) + for i in range(len(window_mask_baseline)) + ] + + self.adapted_signal.loc[ + stimulus_idxs, "mean_response" + ] = mean_response_signal + self.adapted_signal.loc[ + stimulus_idxs, "mean_baseline" + ] = mean_baseline_signal self.adapted_signal["subtracted"] = ( self.adapted_signal["mean_response"] - self.adapted_signal["mean_baseline"] ) - view = self.adapted_signal[self.adapted_signal["stimulus_onset"]] - logging.info(f"Adapted signal dataframe:{view.head()}") - - # in the last trigger baseline is not calculated - # because indexes go beyond the length of the dataframe + self.only_stim_onset = self.adapted_signal[ + self.adapted_signal["stimulus_onset"] + ] + logging.info(f"Adapted signal dataframe:{self.only_stim_onset.head()}") def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian raise NotImplementedError("This method is not implemented yet") - def responsiveness(self, rois: list): + def responsiveness(self): + self.responsiveness_anova() raise NotImplementedError("This method is not implemented yet") def responsiveness_anova(self): - # perform non-parametric one way anova and - # return the p-value for all combos across sesssions - raise NotImplementedError("This method is not implemented yet") def get_preferred_direction_all_rois(self): diff --git a/rsp_vision/main.py b/rsp_vision/main.py index b8f4cb6..091a185 100644 --- a/rsp_vision/main.py +++ b/rsp_vision/main.py @@ -30,11 +30,10 @@ def main(): photon_data = PhotonData(data, PhotonType.TWO_PHOTON, config) # make analysis object - analysis = SF_TF(photon_data, config, photon_type) + analysis = SF_TF(photon_data, photon_type) # calculate responsiveness and display it in a nice way - rois = list(range(11)) - responsiveness = analysis.responsiveness(rois) + responsiveness = analysis.responsiveness() print(responsiveness) # TODO: nice appearance # Plots From b07aab13bcf01075e83ad7747b3e5ad02ae1df2f Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 23 Feb 2023 18:18:04 +0000 Subject: [PATCH 11/26] Fix test error - silence checks that require real data --- rsp_vision/objects/photon_data.py | 86 +++++++++++++--------- tests/test_integration/test_photon_data.py | 11 ++- 2 files changed, 60 insertions(+), 37 deletions(-) diff --git a/rsp_vision/objects/photon_data.py b/rsp_vision/objects/photon_data.py index 88eeea6..879b59a 100644 --- a/rsp_vision/objects/photon_data.py +++ b/rsp_vision/objects/photon_data.py @@ -19,8 +19,13 @@ class PhotonData: """ def __init__( - self, data_raw: DataRaw, photon_type: PhotonType, config: dict + self, + data_raw: DataRaw, + photon_type: PhotonType, + config: dict, + deactivate_checks=False, ): + self.deactivate_checks = deactivate_checks self.photon_type = photon_type self.config = config self.fps = get_fps(self.photon_type, self.config) @@ -277,37 +282,40 @@ def get_stimuli(self, data_raw): ) stimuli = pd.concat([stimuli, df]) - if len(stimuli) != self.n_of_stimuli_across_all_sessions: - logging.error( - f"Len of stimuli table: {len(stimuli)}, calculated " - + "stimuli lenght: {self.n_of_stimuli_across_all_sessions}" - ) - raise RuntimeError( - "Number of stimuli in raw_data.stim differs from the " - + "calculated amount of stimuli" - ) - - pivot_table = stimuli.pivot_table( - index=["sf", "tf", "direction"], aggfunc="size" - ) + if not self.deactivate_checks: + if len(stimuli) != self.n_of_stimuli_across_all_sessions: + logging.error( + f"Len of stimuli table: {len(stimuli)}, calculated " + + "stimuli lenght: {self.n_of_stimuli_across_all_sessions}" + ) + raise RuntimeError( + "Number of stimuli in raw_data.stim differs from the " + + "calculated amount of stimuli" + ) - if ( - len(pivot_table) - != self.config["n_sf"] * self.config["n_tf"] * self.config["n_dir"] - ): - logging.error(f"Pivot table: {pivot_table}") - logging.error(f"Pivot table length: {len(pivot_table)}") - raise RuntimeError( - "Number of stimuli is not correct, some combinations are " - + "missing or duplicated" + pivot_table = stimuli.pivot_table( + index=["sf", "tf", "direction"], aggfunc="size" ) - if np.any(pivot_table.values != self.n_triggers_per_stimulus): - logging.error(f"Pivot table: {pivot_table}") - raise RuntimeError( - "Number of stimuli is not correct, some combinations are " - + "missing or duplicated" - ) + if ( + len(pivot_table) + != self.config["n_sf"] + * self.config["n_tf"] + * self.config["n_dir"] + ): + logging.error(f"Pivot table: {pivot_table}") + logging.error(f"Pivot table length: {len(pivot_table)}") + raise RuntimeError( + "Number of stimuli is not correct, some combinations are " + + "missing or duplicated" + ) + + if np.any(pivot_table.values != self.n_triggers_per_stimulus): + logging.error(f"Pivot table: {pivot_table}") + raise RuntimeError( + "Number of stimuli is not correct, some combinations are " + + "missing or duplicated" + ) return stimuli @@ -335,7 +343,10 @@ def fill_up_with_stim_info(self, signal, stimuli): ].tolist() s_idxs = set(signal_idxs) - if explored_idxs.intersection(set(s_idxs)): + if ( + explored_idxs.intersection(set(s_idxs)) + and not self.deactivate_checks + ): raise RuntimeError("Index is duplicated across signals") explored_idxs.union(s_idxs) @@ -343,7 +354,7 @@ def fill_up_with_stim_info(self, signal, stimuli): # starting frames and stimuli are alligned in source data stimulus = stimuli.iloc[k] - if len(signal_idxs) != self.n_roi: + if len(signal_idxs) != self.n_roi and not self.deactivate_checks: raise RuntimeError( f"Number of instances for stimulus {stimulus} is wrong." ) @@ -363,11 +374,14 @@ def fill_up_with_stim_info(self, signal, stimuli): signal_idx, signal.columns.get_loc("stimulus_onset") ] = True - if np.any( - signal[signal.stimulus_onset] - .pivot_table(index=["sf", "tf", "direction"], aggfunc="size") - .values - != self.n_triggers_per_stimulus * self.n_roi + if ( + np.any( + signal[signal.stimulus_onset] + .pivot_table(index=["sf", "tf", "direction"], aggfunc="size") + .values + != self.n_triggers_per_stimulus * self.n_roi + ) + and not self.deactivate_checks ): raise RuntimeError( "Signal table was not populated correctly " diff --git a/tests/test_integration/test_photon_data.py b/tests/test_integration/test_photon_data.py index d0a7c1e..caea2aa 100644 --- a/tests/test_integration/test_photon_data.py +++ b/tests/test_integration/test_photon_data.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from rsp_vision.analysis.utils import get_fps from rsp_vision.objects.data_raw import DataRaw from rsp_vision.objects.enums import PhotonType from rsp_vision.objects.photon_data import PhotonData @@ -18,7 +19,13 @@ def get_variables(): @pytest.fixture def get_config(): - yield {"fps_two_photon": 30} + yield { + "fps_two_photon": 30, + "trigger_interval_s": 2.5, + "n_sf": 6, + "n_tf": 6, + "n_dir": 8, + } @pytest.fixture @@ -81,8 +88,10 @@ def get_data_raw_object(get_variables): @pytest.fixture def get_photon_data(get_data_raw_object, get_config): photon_data = PhotonData.__new__(PhotonData) + photon_data.deactivate_checks = True photon_data.photon_type = PhotonType.TWO_PHOTON photon_data.config = get_config + photon_data.fps = get_fps(photon_data.photon_type, photon_data.config) photon_data.set_general_variables(get_data_raw_object) yield photon_data From a6ca4340a1d5617cf6d04c0e563459cb01d20c62 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:57:54 +0000 Subject: [PATCH 12/26] Add unique stimuli dict and fix start frames bug in photon_data --- rsp_vision/objects/photon_data.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/rsp_vision/objects/photon_data.py b/rsp_vision/objects/photon_data.py index 879b59a..903d4ef 100644 --- a/rsp_vision/objects/photon_data.py +++ b/rsp_vision/objects/photon_data.py @@ -146,10 +146,11 @@ def get_stimulus_start_frames(self): # I assume the signal has been cut in the generation of # this summary data in order to allign perfectly # It needs to be checked with trigger information - return np.array( + + frames_in_session = np.array( ( self.n_baseline_frames - + np.arange(0, self.n_of_stimuli_across_all_sessions) + + np.arange(0, self.n_of_stimuli_per_session) * self.n_triggers_per_stimulus * self.n_frames_per_trigger + 1 @@ -157,6 +158,14 @@ def get_stimulus_start_frames(self): dtype=int, ) + frames_all_sessions = ( + frames_in_session.reshape(-1, 1) + + np.arange(0, self.n_sessions) * self.n_frames_per_session + ) + frames_all_sessions = np.sort(frames_all_sessions.flatten(), axis=None) + + return frames_all_sessions + def make_signal_dataframe(self, data_raw): """Make the signal dataframe, which will be filled up with the stimulus information later on. @@ -316,7 +325,11 @@ def get_stimuli(self, data_raw): "Number of stimuli is not correct, some combinations are " + "missing or duplicated" ) - + self.uniques = { + "sf": stimuli.sf.unique(), + "tf": stimuli.tf.unique(), + "direction": stimuli.direction.unique(), + } return stimuli def fill_up_with_stim_info(self, signal, stimuli): From b6879efe0c58ddcd9ce1f2ec6076ada213790e8f Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:59:43 +0000 Subject: [PATCH 13/26] Change logging and error messages --- rsp_vision/load/load_data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rsp_vision/load/load_data.py b/rsp_vision/load/load_data.py index 17c4aa3..a4d8f0b 100644 --- a/rsp_vision/load/load_data.py +++ b/rsp_vision/load/load_data.py @@ -50,15 +50,17 @@ def load(folder_naming: FolderNamingSpecs, config: dict) -> DataRaw: with h5py.File(allen_data_files[0].path, "r") as h5py_file: data_raw = DataRaw(h5py_file, is_allen=True) - logging.info("Allen data loaded") + logging.info("Summary data loaded") return data_raw else: raise ValueError( - "There is more than one Allen file for sf_tf analysis" + "There is more than one summary file for sf_tf analysis" ) else: raise NotImplementedError( - "Only sf_tf analysis is implemented for Allen data" + "Only sf_tf analysis is implemented for summary data" ) else: - raise NotImplementedError("Only loading for Allen data is implemented") + raise NotImplementedError( + "Only loading for summary data is implemented" + ) From 23cf752685d3ec47c6757a94adef53e981722b0e Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 14:02:11 +0000 Subject: [PATCH 14/26] Add imporvements in windows estimation and initial version of statistical calculations (last one stil; buggy) --- .../analysis/spatial_freq_temporal_freq.py | 122 ++++++++++++++++-- 1 file changed, 108 insertions(+), 14 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index bb55dbd..d5609aa 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -1,6 +1,8 @@ import logging import numpy as np +import pandas as pd +import scipy.stats as ss from ..objects.enums import PhotonType from ..objects.photon_data import PhotonData @@ -40,11 +42,16 @@ def calculate_mean_response_and_baseline( logging.info("Start to edit the signal dataframe...") # Identify the rows in the window of each stimulus onset - window_start_response = stimulus_idxs + ( - self.data.n_frames_per_trigger * response_start + window_start_response = ( + stimulus_idxs + + (self.data.n_frames_per_trigger * response_start) + + (self.fps * 0.5) # ignore first 0.5s + - 1 ) - window_end_response = stimulus_idxs + ( - self.data.n_frames_per_trigger * (response_start + 1) + window_end_response = ( + stimulus_idxs + + (self.data.n_frames_per_trigger * (response_start + 1)) + - 1 ) window_mask_response = np.vstack( [ @@ -55,11 +62,16 @@ def calculate_mean_response_and_baseline( ] ) - window_start_baseline = stimulus_idxs + ( - self.data.n_frames_per_trigger * baseline_start + window_start_baseline = ( + stimulus_idxs + + (self.data.n_frames_per_trigger * baseline_start) + + (self.fps * 1.5) # ignore first 1.5s + - 1 ) - window_end_baseline = stimulus_idxs + ( - self.data.n_frames_per_trigger * (baseline_start + 1) + window_end_baseline = ( + stimulus_idxs + + (self.data.n_frames_per_trigger * (baseline_start + 1)) + - 1 ) window_mask_baseline = np.vstack( [ @@ -101,21 +113,103 @@ def calculate_mean_response_and_baseline( - self.adapted_signal["mean_baseline"] ) - self.only_stim_onset = self.adapted_signal[ + self.responses = self.adapted_signal[ self.adapted_signal["stimulus_onset"] + ][ + [ + "frames_id", + "roi_id", + "session_id", + "sf", + "tf", + "direction", + "mean_response", + "mean_baseline", + "subtracted", + ] ] - logging.info(f"Adapted signal dataframe:{self.only_stim_onset.head()}") def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian raise NotImplementedError("This method is not implemented yet") def responsiveness(self): - self.responsiveness_anova() - raise NotImplementedError("This method is not implemented yet") + self.calculate_mean_response_and_baseline() + logging.info(f"Adapted signal dataframe:{self.responses.head()}") - def responsiveness_anova(self): - raise NotImplementedError("This method is not implemented yet") + self.p_values = pd.DataFrame( + columns=[ + "Kruskal-Wallis test", + "Sign test", + "Wilcoxon signed rank test", + ] + ) + + self.p_values["Kruskal-Wallis test"] = self.nonparam_anova_over_rois() + ( + self.p_values["Sign test"], + self.p_values["Wilcoxon signed rank test"], + ) = self.are_responses_significant() + logging.info(f"P-values for each roi:\n{self.p_values}") + + def nonparam_anova_over_rois(self) -> dict: + # Use Kruskal-Wallis H Test because it is nonparametric. + # Compare if more than two independent + # samples have a different distribution + + p_values = {} + for roi in range(self.data.n_roi): + melted = pd.melt( + self.responses[self.responses.roi_id == roi], + id_vars=["sf", "tf"], + value_vars=["subtracted"], + ) + + _sf = self.data.uniques["sf"] + _tf = self.data.uniques["tf"] + sf_tf_combinations = np.array(np.meshgrid(_sf, _tf)).T.reshape( + -1, 2 + ) + _dir = self.data.uniques["direction"] + + samples = np.zeros( + ( + len(_dir) * self.data.n_triggers_per_stimulus, + len(sf_tf_combinations), + ) + ) + + for i, sf_tf in enumerate(sf_tf_combinations): + samples[:, i] = melted[ + (melted.sf == sf_tf[0]) & (melted.tf == sf_tf[1]) + ].value + + _, p_val = ss.kruskal(*samples) + p_values[roi] = p_val + + return p_values + + def are_responses_significant(self): + p_st = {} + p_wsrt = {} + for roi in range(self.data.n_roi): + subset = self.responses[self.responses.roi_id == roi] + + # Sign test (implemented with binomial test) + p_st[roi] = ss.binom_test( + sum([1 for d in subset.subtracted if d > 0]), + n=len(subset.subtracted), + alternative="greater", + ) + + # Wilcoxon signed rank test + _, p_wsrt[roi] = ss.wilcoxon( + x=subset.mean_response, + y=subset.mean_baseline, + alternative="less", + ) + + return p_st, p_wsrt def get_preferred_direction_all_rois(self): raise NotImplementedError("This method is not implemented yet") From a606ac0af121fd5c8d07976a241ec3fa45611bbf Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 14:38:27 +0000 Subject: [PATCH 15/26] Add fixes to anova --- rsp_vision/analysis/spatial_freq_temporal_freq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index d5609aa..4abb19d 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -165,8 +165,8 @@ def nonparam_anova_over_rois(self) -> dict: value_vars=["subtracted"], ) - _sf = self.data.uniques["sf"] - _tf = self.data.uniques["tf"] + _sf = np.sort(self.data.uniques["sf"]) + _tf = np.sort(self.data.uniques["tf"]) sf_tf_combinations = np.array(np.meshgrid(_sf, _tf)).T.reshape( -1, 2 ) @@ -184,7 +184,7 @@ def nonparam_anova_over_rois(self) -> dict: (melted.sf == sf_tf[0]) & (melted.tf == sf_tf[1]) ].value - _, p_val = ss.kruskal(*samples) + _, p_val = ss.kruskal(*samples.T) p_values[roi] = p_val return p_values From 8b1d791956d3a9cbf694253aa793eca76423e474 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 14:59:06 +0000 Subject: [PATCH 16/26] Fix signed rank test --- rsp_vision/analysis/spatial_freq_temporal_freq.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index 4abb19d..e96a424 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -204,9 +204,8 @@ def are_responses_significant(self): # Wilcoxon signed rank test _, p_wsrt[roi] = ss.wilcoxon( - x=subset.mean_response, - y=subset.mean_baseline, - alternative="less", + x=subset.subtracted, + alternative="greater", ) return p_st, p_wsrt From 7294f3da3996c107b4c38506349b8fdb3a3970f6 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 17:24:11 +0000 Subject: [PATCH 17/26] Add magintude calculations and refactoring --- .../analysis/spatial_freq_temporal_freq.py | 138 ++++++++++++++---- 1 file changed, 107 insertions(+), 31 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index e96a424..e07429d 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -18,12 +18,7 @@ def __init__(self, data: PhotonData, photon_type: PhotonType): self.padding_start = int(data.config["padding"][0]) self.padding_end = int(data.config["padding"][1]) - self.calculate_mean_response_and_baseline() - - def calculate_mean_response_and_baseline( - self, - ): - stimulus_idxs = self.data.signal[ + self.stimulus_idxs = self.data.signal[ self.data.signal["stimulus_onset"] ].index @@ -31,6 +26,7 @@ def calculate_mean_response_and_baseline( self.adapted_signal["mean_response"] = np.nan self.adapted_signal["mean_baseline"] = np.nan + def get_response_and_baseline_windows(self): baseline_start = 0 if self.data.n_triggers_per_stimulus == 3: response_start = 2 @@ -39,17 +35,15 @@ def calculate_mean_response_and_baseline( if self.data.n_triggers_per_stimulus == 2: response_start = 1 - logging.info("Start to edit the signal dataframe...") - # Identify the rows in the window of each stimulus onset window_start_response = ( - stimulus_idxs + self.stimulus_idxs + (self.data.n_frames_per_trigger * response_start) + (self.fps * 0.5) # ignore first 0.5s - 1 ) window_end_response = ( - stimulus_idxs + self.stimulus_idxs + (self.data.n_frames_per_trigger * (response_start + 1)) - 1 ) @@ -63,13 +57,13 @@ def calculate_mean_response_and_baseline( ) window_start_baseline = ( - stimulus_idxs + self.stimulus_idxs + (self.data.n_frames_per_trigger * baseline_start) + (self.fps * 1.5) # ignore first 1.5s - 1 ) window_end_baseline = ( - stimulus_idxs + self.stimulus_idxs + (self.data.n_frames_per_trigger * (baseline_start + 1)) - 1 ) @@ -82,30 +76,40 @@ def calculate_mean_response_and_baseline( ] ) + return window_mask_response, window_mask_baseline + + def calculate_mean_response_and_baseline( + self, + ): + logging.info("Start to edit the signal dataframe...") + + ( + self.window_mask_response, + self.window_mask_baseline, + ) = self.get_response_and_baseline_windows() + mean_response_signal = [ np.mean( self.adapted_signal.iloc[ - window_mask_response[i] - ].signal.values, - axis=0, + self.window_mask_response[i] + ].signal.values ) - for i in range(len(window_mask_response)) + for i in range(len(self.window_mask_response)) ] mean_baseline_signal = [ np.mean( self.adapted_signal.iloc[ - window_mask_baseline[i] - ].signal.values, - axis=0, + self.window_mask_baseline[i] + ].signal.values ) - for i in range(len(window_mask_baseline)) + for i in range(len(self.window_mask_baseline)) ] self.adapted_signal.loc[ - stimulus_idxs, "mean_response" + self.stimulus_idxs, "mean_response" ] = mean_response_signal self.adapted_signal.loc[ - stimulus_idxs, "mean_baseline" + self.stimulus_idxs, "mean_baseline" ] = mean_baseline_signal self.adapted_signal["subtracted"] = ( @@ -128,6 +132,7 @@ def calculate_mean_response_and_baseline( "subtracted", ] ] + self.responses = self.responses.reset_index() def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian @@ -152,6 +157,77 @@ def responsiveness(self): ) = self.are_responses_significant() logging.info(f"P-values for each roi:\n{self.p_values}") + self.magintude_over_medians = self.response_magnitude() + + def response_magnitude(self): + # get specific windows for each sf/tf combo + + magintude_over_medians = pd.DataFrame( + columns=[ + "roi", + "sf", + "tf", + "response_mean", + "baseline_mean", + "baseline_std", + "magnitude", + ] + ) + + for roi in range(self.data.n_roi): + for i, sf_tf in enumerate(self.sf_tf_combinations): + sf_tf_idx = self.responses[ + (self.responses.sf == sf_tf[0]) + & (self.responses.tf == sf_tf[1]) + & (self.responses.roi_id == roi) + ].index + r_windows = self.window_mask_response[sf_tf_idx] + b_windows = self.window_mask_baseline[sf_tf_idx] + + responses_dir_and_reps = np.zeros( + (r_windows.shape[0], r_windows.shape[1]) + ) + baseline_dir_and_reps = np.zeros( + (b_windows.shape[0], b_windows.shape[1]) + ) + + for i, w in enumerate(r_windows): + responses_dir_and_reps[i, :] = self.adapted_signal.iloc[ + w + ].signal.values + + for i, w in enumerate(b_windows): + baseline_dir_and_reps[i, :] = self.adapted_signal.iloc[ + w + ].signal.values + + median_response = np.median(responses_dir_and_reps, axis=0) + median_baseline = np.median(baseline_dir_and_reps, axis=0) + + m_r = np.mean(median_response) + m_b = np.mean(median_baseline) + std_b = np.std(median_baseline, ddof=1) + magnitude = (m_r - m_b) / std_b + + df = pd.DataFrame( + { + "roi": roi, + "sf": sf_tf[0], + "tf": sf_tf[1], + "response_mean": m_r, + "baseline_mean": m_b, + "baseline_std": std_b, + "magnitude": magnitude, + }, + index=[0], + ) + + magintude_over_medians = pd.concat( + [magintude_over_medians, df], ignore_index=True + ) + + return magintude_over_medians + def nonparam_anova_over_rois(self) -> dict: # Use Kruskal-Wallis H Test because it is nonparametric. # Compare if more than two independent @@ -165,21 +241,21 @@ def nonparam_anova_over_rois(self) -> dict: value_vars=["subtracted"], ) - _sf = np.sort(self.data.uniques["sf"]) - _tf = np.sort(self.data.uniques["tf"]) - sf_tf_combinations = np.array(np.meshgrid(_sf, _tf)).T.reshape( - -1, 2 - ) - _dir = self.data.uniques["direction"] + self._sf = np.sort(self.data.uniques["sf"]) + self._tf = np.sort(self.data.uniques["tf"]) + self.sf_tf_combinations = np.array( + np.meshgrid(self._sf, self._tf) + ).T.reshape(-1, 2) + self._dir = self.data.uniques["direction"] samples = np.zeros( ( - len(_dir) * self.data.n_triggers_per_stimulus, - len(sf_tf_combinations), + len(self._dir) * self.data.n_triggers_per_stimulus, + len(self.sf_tf_combinations), ) ) - for i, sf_tf in enumerate(sf_tf_combinations): + for i, sf_tf in enumerate(self.sf_tf_combinations): samples[:, i] = melted[ (melted.sf == sf_tf[0]) & (melted.tf == sf_tf[1]) ].value From 2ba1527cadc74ac72afe0f716f9ef18132460873 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 18:05:58 +0000 Subject: [PATCH 18/26] Add calculations for significance --- .../analysis/spatial_freq_temporal_freq.py | 82 +++++++++++++------ setup_for_demo.py | 4 + 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index e07429d..a18e2e8 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -26,6 +26,59 @@ def __init__(self, data: PhotonData, photon_type: PhotonType): self.adapted_signal["mean_response"] = np.nan self.adapted_signal["mean_baseline"] = np.nan + def responsiveness(self): + self.calculate_mean_response_and_baseline() + logging.info(f"Adapted signal dataframe:{self.responses.head()}") + + self.p_values = pd.DataFrame( + columns=[ + "Kruskal-Wallis test", + "Sign test", + "Wilcoxon signed rank test", + ] + ) + + self.p_values["Kruskal-Wallis test"] = self.nonparam_anova_over_rois() + ( + self.p_values["Sign test"], + self.p_values["Wilcoxon signed rank test"], + ) = self.are_responses_significant() + logging.info(f"P-values for each roi:\n{self.p_values}") + + self.magintude_over_medians = self.response_magnitude() + logging.info( + "Response magnitude calculated over median:\n" + + f"{self.magintude_over_medians.head()}" + ) + + responsive_rois = self.significantly_responsive_rois() + logging.info(f"Responsive ROIs: {responsive_rois}") + + def significantly_responsive_rois(self): + sig_kw = set( + np.where( + self.p_values["Kruskal-Wallis test"].values + < self.data.config["anova_threshold"] + )[0].tolist() + ) + sig_magnitude = set( + np.where( + self.magintude_over_medians.groupby("roi").magnitude.max() + > self.data.config["response_magnitude_threshold"] + )[0].tolist() + ) + + if self.data.config["consider_only_positive"]: + sig_positive = set( + np.where( + self.p_values["Wilcoxon signed rank test"].values + < self.data.config["only_positive_threshold"] + )[0].tolist() + ) + sig_kw = sig_kw & sig_positive + + return sig_kw & sig_magnitude + def get_response_and_baseline_windows(self): baseline_start = 0 if self.data.n_triggers_per_stimulus == 3: @@ -134,31 +187,6 @@ def calculate_mean_response_and_baseline( ] self.responses = self.responses.reset_index() - def get_fit_parameters(self): - # calls _fit_two_dimensional_elliptical_gaussian - raise NotImplementedError("This method is not implemented yet") - - def responsiveness(self): - self.calculate_mean_response_and_baseline() - logging.info(f"Adapted signal dataframe:{self.responses.head()}") - - self.p_values = pd.DataFrame( - columns=[ - "Kruskal-Wallis test", - "Sign test", - "Wilcoxon signed rank test", - ] - ) - - self.p_values["Kruskal-Wallis test"] = self.nonparam_anova_over_rois() - ( - self.p_values["Sign test"], - self.p_values["Wilcoxon signed rank test"], - ) = self.are_responses_significant() - logging.info(f"P-values for each roi:\n{self.p_values}") - - self.magintude_over_medians = self.response_magnitude() - def response_magnitude(self): # get specific windows for each sf/tf combo @@ -286,6 +314,10 @@ def are_responses_significant(self): return p_st, p_wsrt + def get_fit_parameters(self): + # calls _fit_two_dimensional_elliptical_gaussian + raise NotImplementedError("This method is not implemented yet") + def get_preferred_direction_all_rois(self): raise NotImplementedError("This method is not implemented yet") diff --git a/setup_for_demo.py b/setup_for_demo.py index 263bb81..5e02719 100644 --- a/setup_for_demo.py +++ b/setup_for_demo.py @@ -31,6 +31,10 @@ "n_tf": 6, "n_dir": 8, "trigger_interval_s": 2.5, + "anova_threshold": 0.0005, + "response_magnitude_threshold": 2.7, + "consider_only_positive": False, + "only_positive_threshold": 0.05, } yaml.dump(content, f) From a16d12752835eee6fa3aceed264e5090a926027d Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Tue, 28 Feb 2023 19:23:59 +0000 Subject: [PATCH 19/26] Reorder methods and add docstrings --- .../analysis/spatial_freq_temporal_freq.py | 395 ++++++++++++------ 1 file changed, 261 insertions(+), 134 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index a18e2e8..40e267e 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -1,4 +1,5 @@ import logging +from typing import Dict, Tuple import numpy as np import pandas as pd @@ -10,6 +11,11 @@ class SF_TF: + """ + Class for analyzing responses to stimuli with different + spatial and temporal frequencies. + """ + def __init__(self, data: PhotonData, photon_type: PhotonType): self.data = data self.photon_type = photon_type @@ -22,13 +28,40 @@ def __init__(self, data: PhotonData, photon_type: PhotonType): self.data.signal["stimulus_onset"] ].index - self.adapted_signal = self.data.signal - self.adapted_signal["mean_response"] = np.nan - self.adapted_signal["mean_baseline"] = np.nan + self._sf = np.sort(self.data.uniques["sf"]) + self._tf = np.sort(self.data.uniques["tf"]) + self.sf_tf_combinations = np.array( + np.meshgrid(self._sf, self._tf) + ).T.reshape(-1, 2) + self._dir = self.data.uniques["direction"] + + self.signal = self.data.signal + self.signal["mean_response"] = np.nan + self.signal["mean_baseline"] = np.nan def responsiveness(self): + """ + Calculate the responsiveness of each ROI in the signal dataframe. + + This method calculates the responsiveness of each ROI in the signal + dataframe, based on the mean response and mean baseline signals + calculated in the calculate_mean_response_and_baseline method. + First, it performs three different statistical tests to determine + if the response is significantly different from the baseline: + a Kruskal-Wallis test, a Sign test, and a Wilcoxon signed rank test. + The resulting p-values for each ROI are stored in a pandas DataFrame + and logged for debugging purposes. + + Next, the method calculates the response magnitude for each ROI, by + taking the difference between the mean response and mean baseline + signals, and dividing by standard deviation of the baseline signal. + The response and baseline mean are calculated over the median traces. + + Finally, the method identifies the ROIs that show significant + responsiveness based on the results of the statistical tests. + """ self.calculate_mean_response_and_baseline() - logging.info(f"Adapted signal dataframe:{self.responses.head()}") + logging.info(f"Edited signal dataframe:{self.responses.head()}") self.p_values = pd.DataFrame( columns=[ @@ -42,7 +75,7 @@ def responsiveness(self): ( self.p_values["Sign test"], self.p_values["Wilcoxon signed rank test"], - ) = self.are_responses_significant() + ) = self.perform_sign_tests() logging.info(f"P-values for each roi:\n{self.p_values}") self.magintude_over_medians = self.response_magnitude() @@ -51,35 +84,92 @@ def responsiveness(self): + f"{self.magintude_over_medians.head()}" ) - responsive_rois = self.significantly_responsive_rois() - logging.info(f"Responsive ROIs: {responsive_rois}") + self.responsive_rois = self.find_significant_rois() + logging.info(f"Responsive ROIs: {self.responsive_rois}") - def significantly_responsive_rois(self): - sig_kw = set( - np.where( - self.p_values["Kruskal-Wallis test"].values - < self.data.config["anova_threshold"] - )[0].tolist() - ) - sig_magnitude = set( - np.where( - self.magintude_over_medians.groupby("roi").magnitude.max() - > self.data.config["response_magnitude_threshold"] - )[0].tolist() - ) + def calculate_mean_response_and_baseline( + self, + ): + """ + Calculate the mean response and mean baseline signals for each ROI + in the signal dataframe. + + This method calculates the mean response and mean baseline signals + for each ROI in the signal dataframe, + based on the response and baseline windows defined by the method + `get_response_and_baseline_windows`, which this method calls. + It then subtracts the mean baseline from + the mean response to obtain the mean subtracted signal. The resulting + mean response, mean baseline, and mean subtracted signal values are + stored in the signal dataframe, and a subset of the dataframe + containing only the relevant columns is stored in the responses + attribute. + + Returns: + None + """ + logging.info("Start to edit the signal dataframe...") - if self.data.config["consider_only_positive"]: - sig_positive = set( - np.where( - self.p_values["Wilcoxon signed rank test"].values - < self.data.config["only_positive_threshold"] - )[0].tolist() + ( + self.window_mask_response, + self.window_mask_baseline, + ) = self.get_response_and_baseline_windows() + + # add calculated values in the row corresponding to + # the startframe of every stimulus + self.signal.loc[self.stimulus_idxs, "mean_response"] = [ + np.mean( + self.signal.iloc[self.window_mask_response[i]].signal.values ) - sig_kw = sig_kw & sig_positive + for i in range(len(self.window_mask_response)) + ] - return sig_kw & sig_magnitude + self.signal.loc[self.stimulus_idxs, "mean_baseline"] = [ + np.mean( + self.signal.iloc[self.window_mask_baseline[i]].signal.values + ) + for i in range(len(self.window_mask_baseline)) + ] + + self.signal["subtracted"] = ( + self.signal["mean_response"] - self.signal["mean_baseline"] + ) + + # new summary dataframe, more handy + self.responses = self.signal[self.signal["stimulus_onset"]][ + [ + "frames_id", + "roi_id", + "session_id", + "sf", + "tf", + "direction", + "mean_response", + "mean_baseline", + "subtracted", + ] + ] + self.responses = self.responses.reset_index() def get_response_and_baseline_windows(self): + """ + Get the window of indices corresponding to the response and + baseline periods for each stimulus presentation. + + Returns: + ------- + window_mask_response : numpy.ndarray + A 2D numpy array containing the signal dataframe indices + for the response period for each stimulus presentation. + Each row corresponds to a stimulus presentation and each + column contains the frame indices in that presentation. + window_mask_baseline : numpy.ndarray + A 2D numpy array containing the signal dataframe indices + for the baseline period for each stimulus presentation. + Each row corresponds to a stimulus presentation and each + column contains the frame indices in that presentation. + """ + baseline_start = 0 if self.data.n_triggers_per_stimulus == 3: response_start = 2 @@ -131,64 +221,109 @@ def get_response_and_baseline_windows(self): return window_mask_response, window_mask_baseline - def calculate_mean_response_and_baseline( - self, - ): - logging.info("Start to edit the signal dataframe...") - - ( - self.window_mask_response, - self.window_mask_baseline, - ) = self.get_response_and_baseline_windows() + def nonparam_anova_over_rois(self) -> dict: + """ + Perform a nonparametric ANOVA test over each ROI in the dataset. + This test is based on the Kruskal-Wallis H Test, which compares + whether more than two independent samples have different + distributions. For each ROI, this method creates a table with one row + for each combination of spatial and temporal frequencies, and one + column for each presentation of the stimulus. Then, it applies the + Kruskal-Wallis H Test to determine whether the distribution of + responses across different stimuli is significantly different. The + resulting p-values are returned as a dictionary where the keys are + the ROI IDs and the values are the p-values for each ROI. + + Returns + ------- + dict + A dictionary where the keys are the ROI IDs and the values are + the p-values for each ROI. + + """ - mean_response_signal = [ - np.mean( - self.adapted_signal.iloc[ - self.window_mask_response[i] - ].signal.values + p_values = {} + for roi in range(self.data.n_roi): + roi_responses = pd.melt( + self.responses[self.responses.roi_id == roi], + id_vars=["sf", "tf"], + value_vars=["subtracted"], ) - for i in range(len(self.window_mask_response)) - ] - mean_baseline_signal = [ - np.mean( - self.adapted_signal.iloc[ - self.window_mask_baseline[i] - ].signal.values + + samples = np.zeros( + ( + len(self._dir) * self.data.n_triggers_per_stimulus, + len(self.sf_tf_combinations), + ) ) - for i in range(len(self.window_mask_baseline)) - ] - self.adapted_signal.loc[ - self.stimulus_idxs, "mean_response" - ] = mean_response_signal - self.adapted_signal.loc[ - self.stimulus_idxs, "mean_baseline" - ] = mean_baseline_signal + for i, sf_tf in enumerate(self.sf_tf_combinations): + # samples are each presentation of an sf/tf combination, + # regardless of direction and repetition + samples[:, i] = roi_responses[ + (roi_responses.sf == sf_tf[0]) + & (roi_responses.tf == sf_tf[1]) + ].value - self.adapted_signal["subtracted"] = ( - self.adapted_signal["mean_response"] - - self.adapted_signal["mean_baseline"] - ) + _, p_val = ss.kruskal(*samples.T) + p_values[roi] = p_val - self.responses = self.adapted_signal[ - self.adapted_signal["stimulus_onset"] - ][ - [ - "frames_id", - "roi_id", - "session_id", - "sf", - "tf", - "direction", - "mean_response", - "mean_baseline", - "subtracted", - ] - ] - self.responses = self.responses.reset_index() + return p_values + + def perform_sign_tests(self) -> Tuple[Dict[int, float], Dict[int, float]]: + """ + Perform sign test and Wilcoxon signed rank test on the subtracted + response data for each ROI. + + Returns: + A tuple containing two dictionaries with ROI IDs as keys and + p-values as values. The first dictionary contains the p-values + for the sign test (implemented with a binomial test) for each ROI. + The test checks whether the proportion of positive differences + between the response and baseline periods is greater than 0.5. + The second dictionary contains the p-values for the Wilcoxon + signed rank test for each ROI. + The test checks whether the distribution of differences between + the response and baseline periods is shifted to the right. + """ + + p_st = {} + p_wsrt = {} + for roi in range(self.data.n_roi): + roi_responses = self.responses[self.responses.roi_id == roi] + + # Sign test (implemented with binomial test) + p_st[roi] = ss.binom_test( + sum([1 for d in roi_responses.subtracted if d > 0]), + n=len(roi_responses.subtracted), + alternative="greater", + ) + + # Wilcoxon signed rank test + _, p_wsrt[roi] = ss.wilcoxon( + x=roi_responses.subtracted, + alternative="greater", + ) + + return p_st, p_wsrt def response_magnitude(self): - # get specific windows for each sf/tf combo + """ + Compute the response magnitude for each combination of spatial and + temporal frequency and ROI. + + Returns: + A Pandas DataFrame with the following columns: + - "roi": the ROI index + - "sf": the spatial frequency + - "tf": the temporal frequency + - "response_mean": the mean of the median response signal + - "baseline_mean": the mean of the median baseline signal + - "baseline_std": the standard deviation of the median + baseline signal + - "magnitude": the response magnitude, defined as + (response_mean - baseline_mean) / baseline_std + """ magintude_over_medians = pd.DataFrame( columns=[ @@ -220,12 +355,12 @@ def response_magnitude(self): ) for i, w in enumerate(r_windows): - responses_dir_and_reps[i, :] = self.adapted_signal.iloc[ + responses_dir_and_reps[i, :] = self.signal.iloc[ w ].signal.values for i, w in enumerate(b_windows): - baseline_dir_and_reps[i, :] = self.adapted_signal.iloc[ + baseline_dir_and_reps[i, :] = self.signal.iloc[ w ].signal.values @@ -256,63 +391,55 @@ def response_magnitude(self): return magintude_over_medians - def nonparam_anova_over_rois(self) -> dict: - # Use Kruskal-Wallis H Test because it is nonparametric. - # Compare if more than two independent - # samples have a different distribution - - p_values = {} - for roi in range(self.data.n_roi): - melted = pd.melt( - self.responses[self.responses.roi_id == roi], - id_vars=["sf", "tf"], - value_vars=["subtracted"], - ) - - self._sf = np.sort(self.data.uniques["sf"]) - self._tf = np.sort(self.data.uniques["tf"]) - self.sf_tf_combinations = np.array( - np.meshgrid(self._sf, self._tf) - ).T.reshape(-1, 2) - self._dir = self.data.uniques["direction"] - - samples = np.zeros( - ( - len(self._dir) * self.data.n_triggers_per_stimulus, - len(self.sf_tf_combinations), - ) - ) - - for i, sf_tf in enumerate(self.sf_tf_combinations): - samples[:, i] = melted[ - (melted.sf == sf_tf[0]) & (melted.tf == sf_tf[1]) - ].value - - _, p_val = ss.kruskal(*samples.T) - p_values[roi] = p_val - - return p_values - - def are_responses_significant(self): - p_st = {} - p_wsrt = {} - for roi in range(self.data.n_roi): - subset = self.responses[self.responses.roi_id == roi] - - # Sign test (implemented with binomial test) - p_st[roi] = ss.binom_test( - sum([1 for d in subset.subtracted if d > 0]), - n=len(subset.subtracted), - alternative="greater", - ) + def find_significant_rois(self): + """ + Returns a set of ROIs that are significantly responsive, based on + statistical tests and a response magnitude threshold. + + The method first identifies ROIs that show significant differences + across at least one condition using the Kruskal-Wallis test. + ROIs with p-values below the `anova_threshold` specified in the + `config` attribute are considered significant. + + It then identifies ROIs with a response magnitude above the + `response_magnitude_threshold` specified in the `config` attribute. + The response magnitude is defined as the difference between the + mean response and mean baseline, divided by the standard deviation + of the baseline. + + If the `consider_only_positive` option is set to True in the `config` + attribute, the method also requires ROIs to show a significant + positive response according to the Wilcoxon signed rank test. ROIs + with p-values below the `only_positive_threshold` specified in the + `config` attribute are considered significant. + + Returns: + set: A set of ROIs that are significantly responsive based on the + specified criteria. + """ + sig_kw = set( + np.where( + self.p_values["Kruskal-Wallis test"].values + < self.data.config["anova_threshold"] + )[0].tolist() + ) + sig_magnitude = set( + np.where( + self.magintude_over_medians.groupby("roi").magnitude.max() + > self.data.config["response_magnitude_threshold"] + )[0].tolist() + ) - # Wilcoxon signed rank test - _, p_wsrt[roi] = ss.wilcoxon( - x=subset.subtracted, - alternative="greater", + if self.data.config["consider_only_positive"]: + sig_positive = set( + np.where( + self.p_values["Wilcoxon signed rank test"].values + < self.data.config["only_positive_threshold"] + )[0].tolist() ) + sig_kw = sig_kw & sig_positive - return p_st, p_wsrt + return sig_kw & sig_magnitude def get_fit_parameters(self): # calls _fit_two_dimensional_elliptical_gaussian From face2f412d9b77bdf0d46eead431ec992734e0ca Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 2 Mar 2023 13:37:13 +0000 Subject: [PATCH 20/26] Refactor and imporve PhotonData, imporve tests --- rsp_vision/objects/photon_data.py | 149 +++++++++++++-------- tests/test_integration/test_photon_data.py | 83 ++++++++---- 2 files changed, 150 insertions(+), 82 deletions(-) diff --git a/rsp_vision/objects/photon_data.py b/rsp_vision/objects/photon_data.py index 903d4ef..4aa7d08 100644 --- a/rsp_vision/objects/photon_data.py +++ b/rsp_vision/objects/photon_data.py @@ -121,14 +121,13 @@ def calculations_to_find_start_frames(self): self.n_frames_per_trigger != self.config["trigger_interval_s"] * self.fps ): - logging.error( - "Frames per trigger from data: " - + f"{self.n_frames_per_trigger} " - + "and calculated from fps: " - + f"{self.config['trigger_interval_s'] * self.fps}" - + " are different" + message = f"Frames per trigger from data: \ + {self.n_frames_per_trigger} and calculated from fps: \ + {self.config['trigger_interval_s'] * self.fps} are different" + logging.error(message) + raise RuntimeError( + "Number of frames per trigger is wrong\n" + message ) - raise RuntimeError("Number of frames per trigger is wrong") self.n_baseline_frames = ( self.n_session_boundary_baseline_triggers @@ -291,11 +290,58 @@ def get_stimuli(self, data_raw): ) stimuli = pd.concat([stimuli, df]) + self.check_consistency_of_stimuli_df(stimuli) + + self.uniques = { + "sf": stimuli.sf.unique(), + "tf": stimuli.tf.unique(), + "direction": stimuli.direction.unique(), + } + return stimuli + + def check_consistency_of_stimuli_df(self, stimuli): + """ + Check the consistency of stimuli dataframe with the expected + number of stimuli and their combinations. + + Parameters + ---------- + stimuli : pandas DataFrame + DataFrame containing stimuli information. + + Returns + ------- + None + + Raises + ------ + RuntimeError + If the number of stimuli in the input dataframe is not equal + to the expected number of stimuli or if there are missing or + duplicated combinations of stimuli. + + Notes + ----- + The method first checks if the number of stimuli in the input + dataframe is equal to the expected number of stimuli across all + sessions. If not, it raises a RuntimeError and logs an error message. + + Then, it creates a pivot table of the stimuli dataframe based on + the "sf", "tf", and "direction" columns and checks if the number + of rows in the pivot table is equal to the expected number of stimuli + combinations. If not, it raises a RuntimeError and logs an error + message. + + Finally, it checks if the number of triggers per stimulus is + consistent across all stimuli combinations in the pivot table. + If not, it raises a RuntimeError and logs an error message. + """ if not self.deactivate_checks: if len(stimuli) != self.n_of_stimuli_across_all_sessions: logging.error( f"Len of stimuli table: {len(stimuli)}, calculated " - + "stimuli lenght: {self.n_of_stimuli_across_all_sessions}" + + f"stimuli lenght: \ + {self.n_of_stimuli_across_all_sessions}" ) raise RuntimeError( "Number of stimuli in raw_data.stim differs from the " @@ -325,12 +371,6 @@ def get_stimuli(self, data_raw): "Number of stimuli is not correct, some combinations are " + "missing or duplicated" ) - self.uniques = { - "sf": stimuli.sf.unique(), - "tf": stimuli.tf.unique(), - "direction": stimuli.direction.unique(), - } - return stimuli def fill_up_with_stim_info(self, signal, stimuli): """Complete the signal dataframe with the stimulus information. @@ -339,7 +379,7 @@ def fill_up_with_stim_info(self, signal, stimuli): ---------- signal : pd.DataFrame Signal dataframe to be filled up with stimulus information - stimuli : _type_ + stimuli : pd.DataFrame Dataframe with the stimuli information Returns @@ -348,24 +388,15 @@ def fill_up_with_stim_info(self, signal, stimuli): Final signal dataframe with stimulus information """ - explored_idxs = set() # register only the stimulus onset - for k, start_frame in enumerate(self.stimulus_start_frames): - signal_idxs = signal.index[ - signal["frames_id"] == start_frame - ].tolist() - - s_idxs = set(signal_idxs) - if ( - explored_idxs.intersection(set(s_idxs)) - and not self.deactivate_checks - ): - raise RuntimeError("Index is duplicated across signals") - - explored_idxs.union(s_idxs) + for stimulus_index, start_frame in enumerate( + self.stimulus_start_frames + ): + mask = signal["frames_id"] == start_frame + signal_idxs = signal.index[mask] # starting frames and stimuli are alligned in source data - stimulus = stimuli.iloc[k] + stimulus = stimuli.iloc[stimulus_index] if len(signal_idxs) != self.n_roi and not self.deactivate_checks: raise RuntimeError( @@ -373,34 +404,40 @@ def fill_up_with_stim_info(self, signal, stimuli): ) # there would be the same start frame for each session for each roi - for signal_idx in signal_idxs: - signal.iloc[ - signal_idx, signal.columns.get_loc("sf") - ] = stimulus["sf"] - signal.iloc[ - signal_idx, signal.columns.get_loc("tf") - ] = stimulus["tf"] - signal.iloc[ - signal_idx, signal.columns.get_loc("direction") - ] = stimulus["direction"] - signal.iloc[ - signal_idx, signal.columns.get_loc("stimulus_onset") - ] = True + signal.loc[mask, "sf"] = stimulus["sf"] + signal.loc[mask, "tf"] = stimulus["tf"] + signal.loc[mask, "direction"] = stimulus["direction"] + signal.loc[mask, "stimulus_onset"] = True - if ( - np.any( - signal[signal.stimulus_onset] - .pivot_table(index=["sf", "tf", "direction"], aggfunc="size") - .values - != self.n_triggers_per_stimulus * self.n_roi - ) - and not self.deactivate_checks - ): - raise RuntimeError( - "Signal table was not populated correctly " - + "with stimulus information" - ) + self.check_consistency_of_signal_df(signal) logging.info("Stimulus information added to signal dataframe") return signal + + def check_consistency_of_signal_df(self, signal): + """Check the consistency of the signal dataframe with the + expected number of stimuli. + + Parameters + ---------- + signal : pd.DataFrame + Signal dataframe with stimulus information + + Raises + ------ + ValueError + If the signal table was not populated correctly with + stimulus information. + """ + if not self.deactivate_checks: + pivot_table = signal[signal.stimulus_onset].pivot_table( + index=["sf", "tf", "direction"], aggfunc="size" + ) + expected = self.n_triggers_per_stimulus * self.n_roi + if not np.all(pivot_table == expected): + raise ValueError( + f"Signal table was not populated correctly \ + with stimulus information.\nPivot table:{pivot_table}, \ + expercted:{expected}" + ) diff --git a/tests/test_integration/test_photon_data.py b/tests/test_integration/test_photon_data.py index caea2aa..0385893 100644 --- a/tests/test_integration/test_photon_data.py +++ b/tests/test_integration/test_photon_data.py @@ -9,12 +9,26 @@ @pytest.fixture def get_variables(): - n_sessions = 18 - n_roi = 11 - l_signal = 11400 - n_stim = 48 - - return n_sessions, n_roi, l_signal, n_stim + n_sessions = 2 + n_roi = 5 + n_stim = 8 + n_baseline_triggers = 4 + n_triggers_per_stim = 3 + n_frames_per_trigger = 75 + + len_session = int( + (2 * n_baseline_triggers + n_stim / n_sessions * n_triggers_per_stim) + * n_frames_per_trigger + ) + + return ( + n_sessions, + n_roi, + len_session, + n_stim, + n_baseline_triggers, + n_triggers_per_stim, + ) @pytest.fixture @@ -30,7 +44,14 @@ def get_config(): @pytest.fixture def get_data_raw_object(get_variables): - n_sessions, n_roi, l_signal, n_stim = get_variables + ( + n_sessions, + n_roi, + len_session, + n_stim, + n_baseline_triggers, + n_triggers_per_stim, + ) = get_variables data = { "day": { @@ -39,8 +60,7 @@ def get_data_raw_object(get_variables): "stimulus": "stimulus", }, "imaging": "imaging", - # it should be some kind of range - "f": np.ones((n_sessions, n_roi, l_signal)), + "f": np.random.randint(-200, 200, (n_sessions, n_roi, len_session)), "is_cell": "is_cell", "r_neu": "r_neu", "stim": [ @@ -69,12 +89,19 @@ def get_data_raw_object(get_variables): 116, ] ), - "n_baseline_triggers": 4, - "directions": np.arange(0, n_stim), - "cycles_per_visual_degree": np.arange(0, n_stim), - "cycles_per_second": np.arange(0, n_stim), + "n_baseline_triggers": n_baseline_triggers, + "directions": np.random.randint( + 100, size=int(n_stim / n_sessions) + ), + "cycles_per_visual_degree": np.random.randint( + 100, size=int(n_stim / n_sessions) + ), + "cycles_per_second": np.random.randint( + 100, size=int(n_stim / n_sessions) + ), }, - "n_triggers": 152, + "n_triggers": 2 * n_baseline_triggers + + (n_stim / n_sessions) * n_triggers_per_stim, } ] * n_sessions, @@ -98,28 +125,34 @@ def get_photon_data(get_data_raw_object, get_config): def test_set_general_variables(get_variables, get_photon_data): - n_sessions, n_roi, l_signal, n_stim = get_variables + n_sessions, n_roi, len_session, n_stim, _, _ = get_variables photon_data = get_photon_data assert photon_data.n_sessions == n_sessions assert photon_data.n_roi == n_roi - assert photon_data.n_frames_per_session == l_signal - assert photon_data.n_of_stimuli_per_session == n_stim - assert photon_data.stimulus_start_frames.shape[0] == n_sessions * n_stim + assert photon_data.n_frames_per_session == len_session + assert photon_data.n_of_stimuli_per_session == n_stim / 2 + assert ( + photon_data.stimulus_start_frames.shape[0] == n_sessions * n_stim / 2 + ) -def test_make_signal_dataframe(get_photon_data, get_data_raw_object): +def test_make_signal_dataframe( + get_photon_data, get_data_raw_object, get_variables +): photon_data = get_photon_data signal = photon_data.make_signal_dataframe(get_data_raw_object) + n_sessions, n_roi, len_session, _, _, _ = get_variables - assert signal.shape == (2257200, 10) + assert signal.shape == (len_session * n_sessions * n_roi, 10) -def test_get_stimuli(get_photon_data, get_data_raw_object): +def test_get_stimuli(get_photon_data, get_data_raw_object, get_variables): + _, _, _, n_stim, _, _ = get_variables photon_data = get_photon_data stimuli = photon_data.get_stimuli(get_data_raw_object) - assert stimuli.shape == (864, 4) + assert stimuli.shape == (n_stim, 4) def test_fill_up_with_stim_info(get_photon_data, get_data_raw_object): @@ -128,8 +161,6 @@ def test_fill_up_with_stim_info(get_photon_data, get_data_raw_object): stimuli = photon_data.get_stimuli(get_data_raw_object) signal = photon_data.fill_up_with_stim_info(signal, stimuli) - subset = signal[signal["stimulus_onset"]].iloc[7] + frames = set(signal[signal["stimulus_onset"]].frames_id) - assert subset["sf"] == 7 - assert subset["tf"] == 7 - assert subset["direction"] == 7 + assert frames == set(photon_data.stimulus_start_frames) From 396cf067fc335b7ca077a62e3a5ea2c6490c9be9 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 2 Mar 2023 15:58:42 +0000 Subject: [PATCH 21/26] Change sf_tf class name --- rsp_vision/analysis/spatial_freq_temporal_freq.py | 6 +++--- rsp_vision/main.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py index 40e267e..f3ccbb3 100644 --- a/rsp_vision/analysis/spatial_freq_temporal_freq.py +++ b/rsp_vision/analysis/spatial_freq_temporal_freq.py @@ -10,7 +10,7 @@ from .utils import get_fps -class SF_TF: +class FrequencyAnalysis: """ Class for analyzing responses to stimuli with different spatial and temporal frequencies. @@ -293,11 +293,11 @@ def perform_sign_tests(self) -> Tuple[Dict[int, float], Dict[int, float]]: roi_responses = self.responses[self.responses.roi_id == roi] # Sign test (implemented with binomial test) - p_st[roi] = ss.binom_test( + p_st[roi] = ss.binomtest( sum([1 for d in roi_responses.subtracted if d > 0]), n=len(roi_responses.subtracted), alternative="greater", - ) + ).pvalue # Wilcoxon signed rank test _, p_wsrt[roi] = ss.wilcoxon( diff --git a/rsp_vision/main.py b/rsp_vision/main.py index 091a185..bb261dd 100644 --- a/rsp_vision/main.py +++ b/rsp_vision/main.py @@ -1,6 +1,6 @@ from rich.prompt import Prompt -from .analysis.spatial_freq_temporal_freq import SF_TF +from .analysis.spatial_freq_temporal_freq import FrequencyAnalysis from .load.load_data import load_data from .objects.enums import PhotonType from .objects.photon_data import PhotonData @@ -30,7 +30,7 @@ def main(): photon_data = PhotonData(data, PhotonType.TWO_PHOTON, config) # make analysis object - analysis = SF_TF(photon_data, photon_type) + analysis = FrequencyAnalysis(photon_data, photon_type) # calculate responsiveness and display it in a nice way responsiveness = analysis.responsiveness() From c04fa1550fe317cdcede76e0911c1e19e6c8a7e1 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:00:29 +0000 Subject: [PATCH 22/26] Add share pytest.fixture file and sf_tf tests --- tests/test_integration/conftest.py | 199 +++++++++++++++++++++ tests/test_integration/test_photon_data.py | 126 ------------- tests/test_integration/test_sf_tf.py | 110 ++++++++++++ 3 files changed, 309 insertions(+), 126 deletions(-) create mode 100644 tests/test_integration/conftest.py create mode 100644 tests/test_integration/test_sf_tf.py diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py new file mode 100644 index 0000000..124ee11 --- /dev/null +++ b/tests/test_integration/conftest.py @@ -0,0 +1,199 @@ +import numpy as np +import pytest + +from rsp_vision.analysis.spatial_freq_temporal_freq import FrequencyAnalysis +from rsp_vision.analysis.utils import get_fps +from rsp_vision.objects.data_raw import DataRaw +from rsp_vision.objects.enums import PhotonType +from rsp_vision.objects.photon_data import PhotonData + + +@pytest.fixture +def get_variables(): + n_sessions = 2 # if you change this please adapt "stim" accordingly + n_roi = 5 + n_stim = 24 # 8 stim and 3 rep for all + n_baseline_triggers = 4 + n_triggers_per_stim = 3 + n_frames_per_trigger = 75 + + len_session = int( + (2 * n_baseline_triggers + n_stim / n_sessions * n_triggers_per_stim) + * n_frames_per_trigger + ) + + return ( + n_sessions, + n_roi, + len_session, + n_stim, + n_baseline_triggers, + n_triggers_per_stim, + ) + + +@pytest.fixture +def get_config(): + yield { + "fps_two_photon": 30, + "trigger_interval_s": 2.5, + "n_sf": 6, + "n_tf": 6, + "n_dir": 8, + "padding": [0, 1], + "baseline": "static", + "anova_threshold": 0.1, + "response_magnitude_threshold": 0.1, + "consider_only_positive": True, + "only_positive_threshold": 0.1, + } + + +@pytest.fixture +def get_data_raw_object(get_variables): + ( + n_sessions, + n_roi, + len_session, + n_stim, + n_baseline_triggers, + n_triggers_per_stim, + ) = get_variables + + np.random.seed(101) + + data = { + "day": { + "roi": "roi", + "roi_label": "roi_label", + "stimulus": "stimulus", + }, + "imaging": "imaging", + "f": np.random.randint(-200, 200, (n_sessions, n_roi, len_session)), + "is_cell": "is_cell", + "r_neu": "r_neu", + # number of stim == n_session + "stim": [ + { + "screen_size": "screen_size", + "stimulus": { + # ascii string + "grey_or_static": np.array( + [ + 103, + 114, + 101, + 121, + 95, + 115, + 116, + 97, + 116, + 105, + 99, + 95, + 100, + 114, + 105, + 102, + 116, + ] + ), + "n_baseline_triggers": n_baseline_triggers, + "directions": np.array( + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + ), + "cycles_per_visual_degree": np.array( + [3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4] + ), + "cycles_per_second": np.array( + [6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5] + ), + }, + "n_triggers": 2 * n_baseline_triggers + + (n_stim / n_sessions) * n_triggers_per_stim, + }, + { + "screen_size": "screen_size", + "stimulus": { + # ascii string + "grey_or_static": np.array( + [ + 103, + 114, + 101, + 121, + 95, + 115, + 116, + 97, + 116, + 105, + 99, + 95, + 100, + 114, + 105, + 102, + 116, + ] + ), + "n_baseline_triggers": n_baseline_triggers, + "directions": np.array( + [ + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + ] + ), + "cycles_per_visual_degree": np.array( + [3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 4, 4] + ), + "cycles_per_second": np.array( + [6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5] + ), + }, + "n_triggers": 2 * n_baseline_triggers + + (n_stim / n_sessions) * n_triggers_per_stim, + }, + ], + "trig": "trig", + } + data_raw = DataRaw(data, is_allen=False) + + yield data_raw + + +@pytest.fixture +def get_photon_data(get_data_raw_object, get_config): + photon_data = PhotonData.__new__(PhotonData) + photon_data.deactivate_checks = True + photon_data.photon_type = PhotonType.TWO_PHOTON + photon_data.config = get_config + photon_data.fps = get_fps(photon_data.photon_type, photon_data.config) + photon_data.set_general_variables(get_data_raw_object) + + yield photon_data + + +@pytest.fixture +def get_sf_tf_instance(get_config, get_data_raw_object): + pt = PhotonType.TWO_PHOTON + yield FrequencyAnalysis( + PhotonData( + data_raw=get_data_raw_object, + photon_type=pt, + config=get_config, + deactivate_checks=True, + ), + photon_type=pt, + ) diff --git a/tests/test_integration/test_photon_data.py b/tests/test_integration/test_photon_data.py index 0385893..e545758 100644 --- a/tests/test_integration/test_photon_data.py +++ b/tests/test_integration/test_photon_data.py @@ -1,129 +1,3 @@ -import numpy as np -import pytest - -from rsp_vision.analysis.utils import get_fps -from rsp_vision.objects.data_raw import DataRaw -from rsp_vision.objects.enums import PhotonType -from rsp_vision.objects.photon_data import PhotonData - - -@pytest.fixture -def get_variables(): - n_sessions = 2 - n_roi = 5 - n_stim = 8 - n_baseline_triggers = 4 - n_triggers_per_stim = 3 - n_frames_per_trigger = 75 - - len_session = int( - (2 * n_baseline_triggers + n_stim / n_sessions * n_triggers_per_stim) - * n_frames_per_trigger - ) - - return ( - n_sessions, - n_roi, - len_session, - n_stim, - n_baseline_triggers, - n_triggers_per_stim, - ) - - -@pytest.fixture -def get_config(): - yield { - "fps_two_photon": 30, - "trigger_interval_s": 2.5, - "n_sf": 6, - "n_tf": 6, - "n_dir": 8, - } - - -@pytest.fixture -def get_data_raw_object(get_variables): - ( - n_sessions, - n_roi, - len_session, - n_stim, - n_baseline_triggers, - n_triggers_per_stim, - ) = get_variables - - data = { - "day": { - "roi": "roi", - "roi_label": "roi_label", - "stimulus": "stimulus", - }, - "imaging": "imaging", - "f": np.random.randint(-200, 200, (n_sessions, n_roi, len_session)), - "is_cell": "is_cell", - "r_neu": "r_neu", - "stim": [ - { - "screen_size": "screen_size", - "stimulus": { - # ascii string - "grey_or_static": np.array( - [ - 103, - 114, - 101, - 121, - 95, - 115, - 116, - 97, - 116, - 105, - 99, - 95, - 100, - 114, - 105, - 102, - 116, - ] - ), - "n_baseline_triggers": n_baseline_triggers, - "directions": np.random.randint( - 100, size=int(n_stim / n_sessions) - ), - "cycles_per_visual_degree": np.random.randint( - 100, size=int(n_stim / n_sessions) - ), - "cycles_per_second": np.random.randint( - 100, size=int(n_stim / n_sessions) - ), - }, - "n_triggers": 2 * n_baseline_triggers - + (n_stim / n_sessions) * n_triggers_per_stim, - } - ] - * n_sessions, - "trig": "trig", - } - data_raw = DataRaw(data, is_allen=False) - - yield data_raw - - -@pytest.fixture -def get_photon_data(get_data_raw_object, get_config): - photon_data = PhotonData.__new__(PhotonData) - photon_data.deactivate_checks = True - photon_data.photon_type = PhotonType.TWO_PHOTON - photon_data.config = get_config - photon_data.fps = get_fps(photon_data.photon_type, photon_data.config) - photon_data.set_general_variables(get_data_raw_object) - - yield photon_data - - def test_set_general_variables(get_variables, get_photon_data): n_sessions, n_roi, len_session, n_stim, _, _ = get_variables photon_data = get_photon_data diff --git a/tests/test_integration/test_sf_tf.py b/tests/test_integration/test_sf_tf.py new file mode 100644 index 0000000..64bd307 --- /dev/null +++ b/tests/test_integration/test_sf_tf.py @@ -0,0 +1,110 @@ +import numpy as np + + +def test_get_response_and_baseline_windows(get_variables, get_sf_tf_instance): + ( + _, + n_roi, + _, + n_stim, + _, + _, + ) = get_variables + + sf_tf = get_sf_tf_instance + ( + window_mask_response, + window_mask_baseline, + ) = sf_tf.get_response_and_baseline_windows() + + assert ( + len(window_mask_baseline) + == len(window_mask_response) + == (n_stim * n_roi) + ) + + +def test_calculate_mean_response_and_baseline(get_sf_tf_instance): + sf_tf = get_sf_tf_instance + sf_tf.calculate_mean_response_and_baseline() + + # based on random seed = 101 + assert int(sf_tf.responses["subtracted"].values[1]) == 34 + + +def test_nonparam_anova_over_rois(get_sf_tf_instance): + sf_tf = get_sf_tf_instance + sf_tf.calculate_mean_response_and_baseline() + p_values = sf_tf.nonparam_anova_over_rois() + + decimal_points = 3 + p_values = np.around( + np.fromiter(p_values.values(), dtype=float), decimal_points + ) + # based on random seed = 101 + p_values_seed_101 = np.array([0.055, 0.473, 0.324, 0.127, 0.653]) + + assert np.all(p_values == p_values_seed_101) + + +def test_perform_sign_tests(get_sf_tf_instance): + sf_tf = get_sf_tf_instance + sf_tf.calculate_mean_response_and_baseline() + p_st, p_wsrt = sf_tf.perform_sign_tests() + + decimal_points = 3 + p_st = np.around(np.fromiter(p_st.values(), dtype=float), decimal_points) + p_wsrt = np.around( + np.fromiter(p_wsrt.values(), dtype=float), decimal_points + ) + + # based on random seed = 101 + p_st_seed_101 = np.array([0.968, 0.924, 0.032, 0.271, 0.846]) + p_wsrt_seed_101 = np.array([0.855, 0.928, 0.18, 0.195, 0.55]) + + assert np.all(p_st == p_st_seed_101) + assert np.all(p_wsrt == p_wsrt_seed_101) + + +def test_response_magnitude(get_sf_tf_instance): + sf_tf = get_sf_tf_instance + sf_tf.calculate_mean_response_and_baseline() + + magnitude = sf_tf.response_magnitude()["magnitude"] + + decimal_points = 3 + magnitude = np.around(np.fromiter(magnitude, dtype=float), decimal_points) + # based on random seed = 101 + magnitude_seed_101 = np.array( + [ + 0.2, + -0.118, + 0.163, + -0.282, + -0.179, + 0.17, + -0.428, + -0.364, + -0.238, + 0.356, + 0.028, + 0.089, + 0.698, + -0.224, + 0.302, + -0.049, + 0.014, + 0.346, + -0.315, + -0.062, + ] + ) + + assert np.all(magnitude == magnitude_seed_101) + + +def test_find_significant_rois(get_sf_tf_instance): + sf_tf = get_sf_tf_instance + sf_tf.responsiveness() + + assert len(sf_tf.responsive_rois) == 0 From 904f4182277238ea09adfe800c1f78631d9b81b6 Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:03:05 +0000 Subject: [PATCH 23/26] Add dependencies --- pyproject.toml | 3 ++- tox.ini | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8127b49..92fc611 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,8 @@ dependencies = [ "types-PyYAML", "h5py", "python-decouple", - "pandas" + "pandas", + "scipy", ] [project.urls] diff --git a/tox.ini b/tox.ini index 8dc8323..b507c54 100644 --- a/tox.ini +++ b/tox.ini @@ -15,5 +15,6 @@ deps = rich PyYAML pandas + scipy commands = pytest -v --color=yes --cov=rsp_vision --cov-report=xml From 4634e0d987505f0fca465950f83a3bf7ee5d667d Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:51:39 +0000 Subject: [PATCH 24/26] Add data processing description --- README.md | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b847a66..69c3c3e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -[![Python Version](https://img.shields.io/pypi/pyversions/cellfinder.svg)](https://pypi.org/project/cellfinder) [![Wheel](https://img.shields.io/pypi/wheel/cellfinder.svg)](https://pypi.org/project/cellfinder) [![Development Status](https://img.shields.io/pypi/status/cellfinder.svg)](https://github.com/brainglobe/cellfinder) [![Tests](https://img.shields.io/github/workflow/status/brainglobe/cellfinder/tests)]( @@ -27,3 +26,43 @@ main() ``` This script will call the `main()` method and ask you for the name of the folder containing the data you want to analyse, which corresponds to a portion of the name of the data file. + +## Data processing + +The original data is stored as a nested dictionary, usually referred to as the data_raw attribute. It contains the following keys: `day`, `imaging`, `f`, `is_cell`, `r_neu`, `stim`, `trig`. For our analysis, we focus mainly on `f` and `stim`. + +The `f` key holds a 3D array of fluorescence traces for all cells in the recording. These cells are identified as rois. The array has dimensions (`n_sessions`, `len_session`, `n_neurons`). In each session, there are `len_session` frames, which are subdivided into multiple triggers. Triggers can be part of a stimulus or not, and their distance from each other is constant. At the beginning and end of each session, there are "baseline triggers", while the rest are stimulus triggers. A stimulus can consist of two or three parts, signalled by triggers, in which what is displayed to the animal changes. The last part always consists of drifting gratings. If there are two parts, the drifting gratings are composed of static gratings. If there are three parts, the static gratings are preceded by a grey screen. + +The total length of a session is given by the following formula: + +```python +len_session = int( + (2 * n_baseline_triggers + n_stim / n_sessions * n_triggers_per_stim) + * n_frames_per_trigger + ) +``` +where len_trigger and len_baseline_trigger are the lengths of the triggers in frames. + +The `stim` key is a dictionary containing information about the stimuli, with `stim["stimulus"]` being the most important. It contains the sequence of randomized features of the gratings. A given stimulus, composed of `sf`/`tf`/`direction` features, is repeated three times a day, and distributed across sessions. + +`data_raw`, is reorganized into a `pandas.DataFrame` called signal, which is stored in the `PhotonData` object. The `pandas.DataFrame` has the following columns: + +``` +[ + "day", + "time from beginning", + "frames_id", + "signal", + "roi_id", + "session_id", + "sf", + "tf", + "direction", + "stimulus_onset" +] +``` +`frames_id` corresponds to the frame number in the original data, starting from 0 for every session. `signal` is the fluorescence trace of the cell, taken directly from the `f` matrix of `data_raw`. `roi_id` is the cell's ID, `session_id` is the session ID, and `sf`, `tf`, `direction` are the stimulus features. `stimulus_onset` is a boolean indicating whether the frame is the onset of the stimulus or not. Stimulus feature cells are filled only when `stimulus_onset` is True. The `PhotonData` object performs the intersection between the original `f` matrix and the `stim` dictionary in the `fill_up_with_stim_info` method. The indexes that make this join operation possible are the `stimulus_start_frames`. + +Overall, the `PhotonData` object provides a more organized and accessible format for analyzing the raw data. + +## Spatial and temporal frequency analysis From ed86e701491701d13de8c041ad51c1d75da848cd Mon Sep 17 00:00:00 2001 From: lauraporta <29216006+lauraporta@users.noreply.github.com> Date: Thu, 2 Mar 2023 17:04:01 +0000 Subject: [PATCH 25/26] Add description of SF TF analysis --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 69c3c3e..7316ca9 100644 --- a/README.md +++ b/README.md @@ -66,3 +66,14 @@ The `stim` key is a dictionary containing information about the stimuli, with `s Overall, the `PhotonData` object provides a more organized and accessible format for analyzing the raw data. ## Spatial and temporal frequency analysis + +The goal of this analysis is to identify the cells that respond to the drifting gratings. + +The features of the stimuli of which we care about are their spatial and temporal frequency, in particular the combination of the two. This is why we focus on the `sf` and `tf` columns of the `signal` dataframe. Combinations of these two features are repeated n times, where `n = len(directions) * len(repetitions)`. + +In order to compute the various statical analysis, two frames windows are taken into account, the response window, in the drifting grating part, and the baseline window, in the static or gray part of the stimulus. The mean is computed across the frames in these windows, and the difference between the two means is computed. These values are stored in the `response` `pandas.DataFrame`. + +Three non-parametric statistics are computed in order to quantify id the response to `sf`/`tf` combinations is significant: +- The Kruskal-Wallis test +- The Wilcoxon rank-sum test +- The sign-rank test From b10195834308b550dd24cc9f3ec8dc706387bda8 Mon Sep 17 00:00:00 2001 From: Laura Porta <29216006+lauraporta@users.noreply.github.com> Date: Mon, 13 Mar 2023 15:02:52 +0000 Subject: [PATCH 26/26] Update setup_for_demo.py --- setup_for_demo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup_for_demo.py b/setup_for_demo.py index 5e02719..6807c75 100644 --- a/setup_for_demo.py +++ b/setup_for_demo.py @@ -35,6 +35,7 @@ "response_magnitude_threshold": 2.7, "consider_only_positive": False, "only_positive_threshold": 0.05, + "baseline": "static", } yaml.dump(content, f)