Merge pull request #23 from Becksteinlab/develop

[WIP] v0.1.1
Becksteinlab · Jun 15, 2024 · 5b6d2bd · 5b6d2bd
2 parents 10b92c5 + 86bb596
commit 5b6d2bd
Show file tree

Hide file tree

Showing 23 changed files with 557 additions and 676 deletions.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -34,7 +34,7 @@
     "conda_channels": ["conda-forge", "defaults"],
 
     // A conda environment file that is used for environment creation.
-    // "conda_environment_file": "environment.yml",
+    // "conda_environment_file": "benchmark_env.yaml",
 
     // The matrix of dependencies to test.  Each key of the "req"
     // requirements dictionary is the name of a package (in PyPI) and

diff --git a/benchmarks/environment.yaml b/benchmarks/environment.yaml
@@ -0,0 +1,14 @@
+name: asv-zarrtraj
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - MDAnalysis>=2.7.0
+  - zarr>=2.11.0
+  - dask
+
+  ### AWS dependencies ###
+  - s3fs=2024.3.0 
+
+
+
diff --git a/benchmarks/reader_bms.py b/benchmarks/reader_bms.py
@@ -1,28 +1,106 @@
 from zarrtraj import *
+
 # from asv_runner.benchmarks.mark import skip_for_params
 from zarr.storage import DirectoryStore, LRUStoreCache
+import MDAnalysis.analysis.rms as rms
 
 import os
 
 BENCHMARK_DATA_DIR = os.getenv("BENCHMARK_DATA_DIR")
 
+os.environ["S3_REGION_NAME"] = "us-west-1"
+os.environ["AWS_PROFILE"] = "sample_profile"
+
+
 class TrajReaderDiskBenchmarks(object):
     """Benchmarks for zarrtraj file striding."""
-    # parameterize the input zarr group
-    # these zarr groups should vary on
-    # compression, filter_precision, chunk_frames
-    # reads should be parameterized based on LRU cache_size- size + presence
-    # cache_size sizes are 1, 10, 50, 98 (all) frames
-    params = ([0, 1, 9], ["all", 3], [1, 10, 50], [40136, 401360, 2006800, 3933328])
-    param_names = ['compressor_level', 'filter_precision', 'chunk_frames', 'cache_size']
-
-    def setup(self, compressor_level, filter_precision, chunk_frames, cache_size):
-        store = DirectoryStore(f"{BENCHMARK_DATA_DIR}/short_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj")
-        lruc = LRUStoreCache(store, max_size=cache_size)
-        self.traj_file = zarr.open_group(store=lruc, mode='r')
+
+    params = (
+        [0, 1, 9],
+        ["all", 3],
+        [1, 10, 50],
+    )
+    param_names = [
+        "compressor_level",
+        "filter_precision",
+        "chunk_frames",
+    ]
+
+    def setup(
+        self,
+        compressor_level,
+        filter_precision,
+        chunk_frames,
+    ):
+        self.traj_file = f"{BENCHMARK_DATA_DIR}/short_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj"
         self.reader_object = ZarrTrajReader(self.traj_file)
 
-    def time_strides(self, compressor_level, filter_precision, chunk_frames, cache_size):
+    def time_strides(
+        self,
+        compressor_level,
+        filter_precision,
+        chunk_frames,
+    ):
         """Benchmark striding over full trajectory"""
         for ts in self.reader_object:
-            pass
+            pass
+
+
+class TrajReaderAWSBenchmarks(object):
+    timeout = 86400
+    params = (
+        [0, 1, 9],
+        ["all", 3],
+        [10, 100],
+    )
+
+    param_names = [
+        "compressor_level",
+        "filter_precision",
+        "chunk_frames",
+    ]
+
+    def setup(self, compressor_level, filter_precision, chunk_frames):
+        self.traj_file = f"s3://zarrtraj-test-data/long_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj"
+        self.reader_object = ZarrTrajReader(
+            self.traj_file,
+        )
+        # self.universe = mda.Universe(
+        #    f"{BENCHMARK_DATA_DIR}/YiiP_system.pdb", self.traj_file
+        # )
+
+    def time_strides(self, compressor_level, filter_precision, chunk_frames):
+        """Benchmark striding over full trajectory"""
+        for ts in self.reader_object:
+            pass
+
+    # def time_RMSD(self, compressor_level, filter_precision, chunk_frames):
+    #    """Benchmark RMSF calculation"""
+    #    R = rms.RMSD(
+    #        self.universe,
+    #        self.universe,
+    #        select="backbone",
+    #        ref_frame=0,
+    #    ).run()
+
+
+class RawZarrReadBenchmarks(object):
+    timeout = 86400
+    params = (
+        [0, 1, 9],
+        ["all", 3],
+        [1, 10, 100],
+    )
+
+    param_names = [
+        "compressor_level",
+        "filter_precision",
+        "chunk_frames",
+    ]
+
+    def setup(self, compressor_level, filter_precision, chunk_frames):
+        self.traj_file = f"s3://zarrtraj-test-data/long_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj"
+        store = zarr.storage.FSStore(url=self.traj_file, mode="r")
+        # For consistency with zarrtraj defaults, use 256MB LRUCache store
+        cache = zarr.storage.LRUStoreCache(store, max_size=2**28)
+        self.zarr_group = zarr.open_group(store=cache, mode="r")
diff --git a/benchmarks/tmp.py b/benchmarks/tmp.py
@@ -0,0 +1,27 @@
+import zarrtraj
+import MDAnalysisData
+import MDAnalysis as mda
+import os
+
+yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_long()
+
+
+# os.environ["S3_REGION_NAME"] = "us-west-1"
+# os.environ["AWS_PROFILE"] = "sample_profile"
+
+storage_options = {
+    # "cache_type": "readahead",
+    "anon": False,
+    "profile": "sample_profile",
+    "client_kwargs": {
+        "region_name": "us-west-1",
+    },
+}
+
+u = mda.Universe(
+    yiip.topology,
+    "s3://zarrtraj-test-data/short_0_3_1.zarrtraj",
+    storage_options=storage_options,
+)
+
+print(u.trajectory[0])
diff --git a/devtools/conda-envs/asv_env.yaml b/devtools/conda-envs/asv_env.yaml
@@ -0,0 +1,7 @@
+name: zarrtraj-benchmark
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - asv>=0.6.3
+
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -31,8 +31,6 @@ dependencies:
   ### Notebooks ###
   - jupyter
 
-  ### Benchmarking ###
-  - asv
 
   # Pip-only installs
   # - pip:

diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
@@ -21,13 +21,25 @@ Create a virtual environment and activate it::
 
 Build this package from source::
 
-    pip install -e .
+    pip install -e <path/to/repo>
 
 Development environment installation
 ------------------------------------
 
-Perform a normal conda installation as described, and then 
-install the development and documentation dependencies::
+After creating and activating a conda environment as described, install 
+the package with documentation and testing dependencies::
+
+    pip install -e <path/to/repo>[doc, test]
+
+Then, to install the development dependencies::
 
     conda env update --name zarrtraj --file devtools/conda-envs/test_env.yaml
-    conda env update --name zarrtraj --file docs/requirements.yaml
+
+Or the documentation building dependencies::
+
+    conda env update --name zarrtraj --file docs/requirements.yaml
+
+Or the benchmarking dependencies (this may have to be in a separate conda env
+depending on package version conflicts)::
+
+    conda env update --name zarrtraj --file devtools/conda-envs/asv_env.yaml