-
-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added dataset_size attribute to minari datasets (#158)
* added dataset_size attribute to minari datasets * made changes as per review comments * changed the way dataset_size is integrated with both the methods, changed unit tests * corrected pre-commit failures * transferred the get_dataset_size method in side MinariStorage class under the name get_size * removed duplciate data_path attribute * code cleanup in test file * removed more comments --------- Co-authored-by: Shreyans Jain <[email protected]>
- Loading branch information
1 parent
ff11da1
commit ee53b4c
Showing
4 changed files
with
191 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,24 @@ | ||
import copy | ||
import os | ||
|
||
import gymnasium as gym | ||
import numpy as np | ||
import pytest | ||
from gymnasium import spaces | ||
|
||
import minari | ||
from minari import DataCollectorV0 | ||
from minari.dataset.minari_storage import MinariStorage | ||
from tests.common import ( | ||
check_data_integrity, | ||
check_load_and_delete_dataset, | ||
register_dummy_envs, | ||
) | ||
|
||
|
||
register_dummy_envs() | ||
|
||
file_path = os.path.join(os.path.expanduser("~"), ".minari", "datasets") | ||
|
||
|
||
def _generate_episode_dict( | ||
|
@@ -170,3 +186,144 @@ def test_episode_metadata(tmp_dataset_dir): | |
|
||
ep_indices = [1, 4, 5] | ||
storage.update_episode_metadata(ep_metadatas, episode_indices=ep_indices) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"dataset_id,env_id", | ||
[ | ||
("cartpole-test-v0", "CartPole-v1"), | ||
("dummy-dict-test-v0", "DummyDictEnv-v0"), | ||
("dummy-box-test-v0", "DummyBoxEnv-v0"), | ||
("dummy-tuple-test-v0", "DummyTupleEnv-v0"), | ||
("dummy-combo-test-v0", "DummyComboEnv-v0"), | ||
("dummy-tuple-discrete-box-test-v0", "DummyTupleDiscreteBoxEnv-v0"), | ||
], | ||
) | ||
def test_minari_get_dataset_size_from_collector_env(dataset_id, env_id): | ||
"""Test get_dataset_size method for dataset made using create_dataset_from_collector_env method.""" | ||
# delete the test dataset if it already exists | ||
local_datasets = minari.list_local_datasets() | ||
if dataset_id in local_datasets: | ||
minari.delete_dataset(dataset_id) | ||
|
||
env = gym.make(env_id) | ||
|
||
env = DataCollectorV0(env) | ||
num_episodes = 100 | ||
|
||
# Step the environment, DataCollectorV0 wrapper will do the data collection job | ||
env.reset(seed=42) | ||
|
||
for episode in range(num_episodes): | ||
done = False | ||
while not done: | ||
action = env.action_space.sample() # User-defined policy function | ||
_, _, terminated, truncated, _ = env.step(action) | ||
done = terminated or truncated | ||
|
||
env.reset() | ||
|
||
# Create Minari dataset and store locally | ||
dataset = minari.create_dataset_from_collector_env( | ||
dataset_id=dataset_id, | ||
collector_env=env, | ||
algorithm_name="random_policy", | ||
code_permalink="https://github.com/Farama-Foundation/Minari/blob/f095bfe07f8dc6642082599e07779ec1dd9b2667/tutorials/LocalStorage/local_storage.py", | ||
author="WillDudley", | ||
author_email="[email protected]", | ||
) | ||
|
||
assert dataset.storage.metadata['dataset_size'] == dataset.storage.get_size() | ||
|
||
check_data_integrity(dataset.storage, dataset.episode_indices) | ||
|
||
env.close() | ||
|
||
check_load_and_delete_dataset(dataset_id) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"dataset_id,env_id", | ||
[ | ||
("cartpole-test-v0", "CartPole-v1"), | ||
("dummy-dict-test-v0", "DummyDictEnv-v0"), | ||
("dummy-box-test-v0", "DummyBoxEnv-v0"), | ||
("dummy-tuple-test-v0", "DummyTupleEnv-v0"), | ||
("dummy-text-test-v0", "DummyTextEnv-v0"), | ||
("dummy-combo-test-v0", "DummyComboEnv-v0"), | ||
("dummy-tuple-discrete-box-test-v0", "DummyTupleDiscreteBoxEnv-v0"), | ||
], | ||
) | ||
def test_minari_get_dataset_size_from_buffer(dataset_id, env_id): | ||
"""Test get_dataset_size method for dataset made using create_dataset_from_buffers method.""" | ||
buffer = [] | ||
|
||
# delete the test dataset if it already exists | ||
local_datasets = minari.list_local_datasets() | ||
if dataset_id in local_datasets: | ||
minari.delete_dataset(dataset_id) | ||
|
||
env = gym.make(env_id) | ||
|
||
observations = [] | ||
actions = [] | ||
rewards = [] | ||
terminations = [] | ||
truncations = [] | ||
|
||
num_episodes = 10 | ||
|
||
observation, info = env.reset(seed=42) | ||
|
||
# Step the environment, DataCollectorV0 wrapper will do the data collection job | ||
observation, _ = env.reset() | ||
observations.append(observation) | ||
for episode in range(num_episodes): | ||
terminated = False | ||
truncated = False | ||
|
||
while not terminated and not truncated: | ||
action = env.action_space.sample() # User-defined policy function | ||
observation, reward, terminated, truncated, _ = env.step(action) | ||
observations.append(observation) | ||
actions.append(action) | ||
rewards.append(reward) | ||
terminations.append(terminated) | ||
truncations.append(truncated) | ||
|
||
episode_buffer = { | ||
"observations": copy.deepcopy(observations), | ||
"actions": copy.deepcopy(actions), | ||
"rewards": np.asarray(rewards), | ||
"terminations": np.asarray(terminations), | ||
"truncations": np.asarray(truncations), | ||
} | ||
buffer.append(episode_buffer) | ||
|
||
observations.clear() | ||
actions.clear() | ||
rewards.clear() | ||
terminations.clear() | ||
truncations.clear() | ||
|
||
observation, _ = env.reset() | ||
observations.append(observation) | ||
|
||
# Create Minari dataset and store locally | ||
dataset = minari.create_dataset_from_buffers( | ||
dataset_id=dataset_id, | ||
env=env, | ||
buffer=buffer, | ||
algorithm_name="random_policy", | ||
code_permalink="https://github.com/Farama-Foundation/Minari/blob/f095bfe07f8dc6642082599e07779ec1dd9b2667/tutorials/LocalStorage/local_storage.py", | ||
author="WillDudley", | ||
author_email="[email protected]", | ||
) | ||
|
||
assert dataset.storage.metadata['dataset_size'] == dataset.storage.get_size() | ||
|
||
check_data_integrity(dataset.storage, dataset.episode_indices) | ||
|
||
env.close() | ||
|
||
check_load_and_delete_dataset(dataset_id) |