Skip to content

Commit

Permalink
Merge pull request #23 from Becksteinlab/develop
Browse files Browse the repository at this point in the history
[WIP] v0.1.1
  • Loading branch information
ljwoods2 authored Jun 15, 2024
2 parents 10b92c5 + 86bb596 commit 5b6d2bd
Show file tree
Hide file tree
Showing 23 changed files with 557 additions and 676 deletions.
2 changes: 1 addition & 1 deletion benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"conda_channels": ["conda-forge", "defaults"],

// A conda environment file that is used for environment creation.
// "conda_environment_file": "environment.yml",
// "conda_environment_file": "benchmark_env.yaml",

// The matrix of dependencies to test. Each key of the "req"
// requirements dictionary is the name of a package (in PyPI) and
Expand Down
14 changes: 14 additions & 0 deletions benchmarks/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: asv-zarrtraj
channels:
- defaults
- conda-forge
dependencies:
- MDAnalysis>=2.7.0
- zarr>=2.11.0
- dask

### AWS dependencies ###
- s3fs=2024.3.0



106 changes: 92 additions & 14 deletions benchmarks/reader_bms.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,106 @@
from zarrtraj import *

# from asv_runner.benchmarks.mark import skip_for_params
from zarr.storage import DirectoryStore, LRUStoreCache
import MDAnalysis.analysis.rms as rms

import os

BENCHMARK_DATA_DIR = os.getenv("BENCHMARK_DATA_DIR")

os.environ["S3_REGION_NAME"] = "us-west-1"
os.environ["AWS_PROFILE"] = "sample_profile"


class TrajReaderDiskBenchmarks(object):
"""Benchmarks for zarrtraj file striding."""
# parameterize the input zarr group
# these zarr groups should vary on
# compression, filter_precision, chunk_frames
# reads should be parameterized based on LRU cache_size- size + presence
# cache_size sizes are 1, 10, 50, 98 (all) frames
params = ([0, 1, 9], ["all", 3], [1, 10, 50], [40136, 401360, 2006800, 3933328])
param_names = ['compressor_level', 'filter_precision', 'chunk_frames', 'cache_size']

def setup(self, compressor_level, filter_precision, chunk_frames, cache_size):
store = DirectoryStore(f"{BENCHMARK_DATA_DIR}/short_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj")
lruc = LRUStoreCache(store, max_size=cache_size)
self.traj_file = zarr.open_group(store=lruc, mode='r')

params = (
[0, 1, 9],
["all", 3],
[1, 10, 50],
)
param_names = [
"compressor_level",
"filter_precision",
"chunk_frames",
]

def setup(
self,
compressor_level,
filter_precision,
chunk_frames,
):
self.traj_file = f"{BENCHMARK_DATA_DIR}/short_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj"
self.reader_object = ZarrTrajReader(self.traj_file)

def time_strides(self, compressor_level, filter_precision, chunk_frames, cache_size):
def time_strides(
self,
compressor_level,
filter_precision,
chunk_frames,
):
"""Benchmark striding over full trajectory"""
for ts in self.reader_object:
pass
pass


class TrajReaderAWSBenchmarks(object):
timeout = 86400
params = (
[0, 1, 9],
["all", 3],
[10, 100],
)

param_names = [
"compressor_level",
"filter_precision",
"chunk_frames",
]

def setup(self, compressor_level, filter_precision, chunk_frames):
self.traj_file = f"s3://zarrtraj-test-data/long_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj"
self.reader_object = ZarrTrajReader(
self.traj_file,
)
# self.universe = mda.Universe(
# f"{BENCHMARK_DATA_DIR}/YiiP_system.pdb", self.traj_file
# )

def time_strides(self, compressor_level, filter_precision, chunk_frames):
"""Benchmark striding over full trajectory"""
for ts in self.reader_object:
pass

# def time_RMSD(self, compressor_level, filter_precision, chunk_frames):
# """Benchmark RMSF calculation"""
# R = rms.RMSD(
# self.universe,
# self.universe,
# select="backbone",
# ref_frame=0,
# ).run()


class RawZarrReadBenchmarks(object):
timeout = 86400
params = (
[0, 1, 9],
["all", 3],
[1, 10, 100],
)

param_names = [
"compressor_level",
"filter_precision",
"chunk_frames",
]

def setup(self, compressor_level, filter_precision, chunk_frames):
self.traj_file = f"s3://zarrtraj-test-data/long_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj"
store = zarr.storage.FSStore(url=self.traj_file, mode="r")
# For consistency with zarrtraj defaults, use 256MB LRUCache store
cache = zarr.storage.LRUStoreCache(store, max_size=2**28)
self.zarr_group = zarr.open_group(store=cache, mode="r")
27 changes: 27 additions & 0 deletions benchmarks/tmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import zarrtraj
import MDAnalysisData
import MDAnalysis as mda
import os

yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_long()


# os.environ["S3_REGION_NAME"] = "us-west-1"
# os.environ["AWS_PROFILE"] = "sample_profile"

storage_options = {
# "cache_type": "readahead",
"anon": False,
"profile": "sample_profile",
"client_kwargs": {
"region_name": "us-west-1",
},
}

u = mda.Universe(
yiip.topology,
"s3://zarrtraj-test-data/short_0_3_1.zarrtraj",
storage_options=storage_options,
)

print(u.trajectory[0])
7 changes: 7 additions & 0 deletions devtools/conda-envs/asv_env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: zarrtraj-benchmark
channels:
- defaults
- conda-forge
dependencies:
- asv>=0.6.3

2 changes: 0 additions & 2 deletions devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ dependencies:
### Notebooks ###
- jupyter

### Benchmarking ###
- asv

# Pip-only installs
# - pip:
Expand Down
20 changes: 16 additions & 4 deletions docs/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,25 @@ Create a virtual environment and activate it::

Build this package from source::

pip install -e .
pip install -e <path/to/repo>

Development environment installation
------------------------------------

Perform a normal conda installation as described, and then
install the development and documentation dependencies::
After creating and activating a conda environment as described, install
the package with documentation and testing dependencies::

pip install -e <path/to/repo>[doc, test]

Then, to install the development dependencies::

conda env update --name zarrtraj --file devtools/conda-envs/test_env.yaml
conda env update --name zarrtraj --file docs/requirements.yaml

Or the documentation building dependencies::

conda env update --name zarrtraj --file docs/requirements.yaml

Or the benchmarking dependencies (this may have to be in a separate conda env
depending on package version conflicts)::

conda env update --name zarrtraj --file devtools/conda-envs/asv_env.yaml
Loading

0 comments on commit 5b6d2bd

Please sign in to comment.