From 2fbd37e0391de596ae1427e49d0e99ae49a8e018 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Tue, 9 Apr 2024 09:10:40 +0100 Subject: [PATCH 01/17] Add new python versions --- .github/workflows/test_and_deploy.yml | 2 +- .github/workflows/test_package.yml | 2 +- .gitignore | 4 ++++ .pre-commit-config.yaml | 6 +++--- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 2812c2abb..eafa829da 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -18,7 +18,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9"] + python-version: ["3.8", "3.9", "3.10", "3.11"] os: [ubuntu-20.04, macos-latest] fail-fast: false steps: diff --git a/.github/workflows/test_package.yml b/.github/workflows/test_package.yml index 0c8c07ffb..f501f7a42 100644 --- a/.github/workflows/test_package.yml +++ b/.github/workflows/test_package.yml @@ -11,7 +11,7 @@ jobs: runs-on: macos-latest strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8"] + python-version: ["3.8", "3.9", "3.10", "3.11"] fail-fast: false steps: - name: Setup Python ${{ matrix.python-version }} env diff --git a/.gitignore b/.gitignore index 9d4ea2863..e7dd6fe85 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,7 @@ venv.bak/ # IDE .idea/ +.vscode/ # Rope project settings .ropeproject @@ -205,3 +206,6 @@ nle/version.py nle_data/ nle/fbs/ nle/nethackdir + +# Generated during tests +nle.ttyrec3.bz2 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a6ffdd9bc..20bfce297 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,11 @@ repos: - repo: https://github.com/ambv/black - rev: 22.3.0 + rev: 24.3.0 hooks: - id: black language_version: python3.8 - repo: https://github.com/pycqa/flake8 - rev: '3.9.2' + rev: '7.0.0' hooks: - id: flake8 additional_dependencies: [flake8-bugbear] @@ -23,7 +23,7 @@ repos: language: system files: ^(src\/nle|include\/nle|win\/rl|sys\/unix\/nle).*\.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ - repo: https://github.com/pycqa/isort - rev: 5.8.0 + rev: 5.13.2 hooks: - id: isort name: isort (python) From 8ab7e928670942702e2f4d05f5a3885a986f2617 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Tue, 9 Apr 2024 12:21:02 +0100 Subject: [PATCH 02/17] Add isort to dev dependencies --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 2eda2828e..f2862f1f6 100644 --- a/setup.py +++ b/setup.py @@ -102,6 +102,7 @@ def build_extension(self, ext): extras_deps = { "dev": [ "pre-commit>=2.0.1", + "isort>=5.13.2", "black>=19.10b0", "cmake_format>=0.6.10", "flake8>=3.7", From e5a0174bf16853d378182727bd8361a1848ea9df Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Tue, 9 Apr 2024 12:23:33 +0100 Subject: [PATCH 03/17] Switch GitHub Action PyPI publish dependency to release/v1 --- .github/workflows/test_and_deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index eafa829da..0c02c31a3 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -138,7 +138,7 @@ jobs: # NOTE: we assume that dist/ contains a built sdist (and only that). # Yes, we could be more defensively, but What Could Go Wrong?™ - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@master + uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} From 1e5eb059409ec2846c83286a1949ba9be6907680 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Tue, 9 Apr 2024 12:54:48 +0100 Subject: [PATCH 04/17] Fix Black code format complaint --- nle/scripts/collect_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nle/scripts/collect_env.py b/nle/scripts/collect_env.py index 9fb5ed67d..41b842cbe 100644 --- a/nle/scripts/collect_env.py +++ b/nle/scripts/collect_env.py @@ -293,9 +293,9 @@ def get_env_info(): cuda_available_str = torch.cuda.is_available() cuda_version_str = torch.version.cuda else: - torch_version_str = ( + torch_version_str = cuda_available_str = cuda_version_str = ( torch_debug_mode_str - ) = cuda_available_str = cuda_version_str = "N/A" + ) = "N/A" return SystemEnv( nle_version=nle_version, From 990b8eb0eb5aa5a979fb26e0def8a6721cee8ade Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Wed, 10 Apr 2024 18:49:58 +0100 Subject: [PATCH 05/17] Move distribution package to Python 3.11 --- .github/workflows/test_and_deploy.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 0c02c31a3..c70b45061 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -62,14 +62,14 @@ jobs: path: nle_test_ci_${{ github.sha }}.tar.gz test_sdist: - name: Test sdist on MacOS w/ Py3.8 + name: Test sdist on MacOS w/ Py3.11 needs: test_repo runs-on: macos-latest steps: - - name: Setup Python 3.8 env + - name: Setup Python 3.11 env uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.11 - name: Ensure latest pip & wheel run: "python -m pip install -q --upgrade pip wheel" - name: Install dependencies From 7656335d0397e9c108384bced6ad3aa95d1d235e Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Wed, 17 Apr 2024 13:11:21 +0100 Subject: [PATCH 06/17] Upgrade to Gym 26.2 and change render() calls for API compatibility --- nle/env/base.py | 7 +++++-- nle/scripts/play.py | 3 ++- nle/tests/test_envs.py | 4 ++-- setup.py | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/nle/env/base.py b/nle/env/base.py index 2f3a71ec4..6f813c000 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -193,6 +193,7 @@ def __init__( allow_all_yn_questions=False, allow_all_modes=False, spawn_monsters=True, + render_mode=str | None, ): """Constructs a new NLE environment. @@ -230,6 +231,7 @@ def __init__( self._allow_all_yn_questions = allow_all_yn_questions self._allow_all_modes = allow_all_modes self._save_ttyrec_every = save_ttyrec_every + self.render_mode = render_mode if actions is None: actions = FULL_ACTIONS @@ -475,8 +477,9 @@ def get_seeds(self): """ return self.nethack.get_current_seeds() - def render(self, mode="human"): + def render(self): """Renders the state of the environment.""" + mode = self.render_mode if mode == "human": obs = self.last_observation @@ -515,7 +518,7 @@ def render(self, mode="human"): # TODO: Why return a string here but print in the other branches? return "\n".join([line.tobytes().decode("utf-8") for line in chars]) - return super().render(mode=mode) + return super().render() def __repr__(self): return "<%s>" % self.__class__.__name__ diff --git a/nle/scripts/play.py b/nle/scripts/play.py index 141d02eca..f8b3f354e 100644 --- a/nle/scripts/play.py +++ b/nle/scripts/play.py @@ -92,6 +92,7 @@ def play(): allow_all_yn_questions=True, allow_all_modes=True, wizard=FLAGS.wizard, + render_mode=FLAGS.render_mode, ) if FLAGS.seeds is not None: env.seed(FLAGS.seeds) @@ -117,7 +118,7 @@ def play(): act_str = repr(env.actions[action]) if action is not None else "" print(f"Previous action: {str(act_str):64s}") print("-" * 8) - env.render(FLAGS.render_mode) + env.render() print("-" * 8) print(obs["blstats"]) if not FLAGS.print_frames_separately: diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index 00704c8e6..682843aa2 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -331,14 +331,14 @@ def test_seed_rollout_seeded_int(self, env_name, rollout_len): compare_rollouts(env0, env1, rollout_len) def test_render_ansi(self, env_name, rollout_len): - env = gym.make(env_name) + env = gym.make(env_name, render_mode="ansi") env.reset() for _ in range(rollout_len): action = env.action_space.sample() _, _, done, _ = env.step(action) if done: env.reset() - output = env.render(mode="ansi") + output = env.render() assert isinstance(output, str) assert len(output.replace("\n", "")) == np.prod(nle.env.DUNGEON_SHAPE) diff --git a/setup.py b/setup.py index f2862f1f6..f48360a86 100644 --- a/setup.py +++ b/setup.py @@ -163,7 +163,7 @@ def build_extension(self, ext): ext_modules=[setuptools.Extension("nle", sources=[])], cmdclass={"build_ext": CMakeBuild}, setup_requires=["pybind11>=2.2"], - install_requires=["pybind11>=2.2", "numpy>=1.16", "gym>=0.15,<=0.23"], + install_requires=["pybind11>=2.2", "numpy>=1.16", "gym>=0.15"], extras_require=extras_deps, python_requires=">=3.5", classifiers=[ From 861e72b5c4915473e656a168dfd6bf2f9ba22164 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Wed, 17 Apr 2024 15:33:42 +0100 Subject: [PATCH 07/17] Environment registry changed from class to dict - changing access --- nle/tests/test_envs.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index 682843aa2..3e849a22b 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -17,14 +17,12 @@ def get_nethack_env_ids(): - specs = gym.envs.registry.all() + specs = gym.envs.registry.keys() # Ignoring base environment, since we can't handle random actions yet with # the full action space, and this requires a whole different set of tests. # For now this is OK, since NetHackScore-v0 is very similar. return [ - spec.id - for spec in specs - if spec.id.startswith("NetHack") and spec.id != "NetHack-v0" + spec for spec in specs if spec.startswith("NetHack") and spec != "NetHack-v0" ] From b613742f229f3882a0b67ffa180971ec8c6c5d2d Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Wed, 17 Apr 2024 19:03:37 +0100 Subject: [PATCH 08/17] Change reset() return values for API compliance --- nle/env/base.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/nle/env/base.py b/nle/env/base.py index 6f813c000..824a11a41 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -331,6 +331,19 @@ def _get_observation(self, observation): for key, i in zip(self._original_observation_keys, self._original_indices) } + def _get_end_status(self, observation, done): + if self._check_abort(observation): + end_status = self.StepStatus.ABORTED + else: + end_status = self._is_episode_end(observation) + return self.StepStatus(done or end_status) + + def _get_information(self, end_status): + info = {} + info["end_status"] = end_status + info["is_ascended"] = self.nethack.how_done() == nethack.ASCENDED + return info + def print_action_meanings(self): for a_idx, a in enumerate(self.actions): print(a_idx, a) @@ -369,11 +382,7 @@ def step(self, action: int): self.last_observation = observation - if self._check_abort(observation): - end_status = self.StepStatus.ABORTED - else: - end_status = self._is_episode_end(observation) - end_status = self.StepStatus(done or end_status) + end_status = self._get_end_status(observation, done) reward = float( self._reward_fn(last_observation, action, observation, end_status) @@ -384,11 +393,12 @@ def step(self, action: int): self._quit_game(observation, done) done = True - info = {} - info["end_status"] = end_status - info["is_ascended"] = self.nethack.how_done() == nethack.ASCENDED - - return self._get_observation(observation), reward, done, info + return ( + self._get_observation(observation), + reward, + done, + self._get_information(end_status), + ) def _in_moveloop(self, observation): program_state = observation[self._program_state_index] @@ -416,6 +426,7 @@ def reset(self, wizkit_items=None): ) self._steps = 0 + done = False for _ in range(1000): # Get past initial phase of game. This should make sure @@ -434,7 +445,9 @@ def reset(self, wizkit_items=None): ) return self.reset(wizkit_items=wizkit_items) - return self._get_observation(self.last_observation) + return self._get_observation(self.last_observation), self._get_information( + self._get_end_status(self.last_observation, done) + ) def close(self): self._close_nethack() From b309918d8f169d608f6b7ff009c7cd4e4b41c330 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Wed, 17 Apr 2024 19:25:24 +0100 Subject: [PATCH 09/17] Add additional parameters to reset() for API compliance --- nle/env/base.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nle/env/base.py b/nle/env/base.py index 824a11a41..202406bdc 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -404,7 +404,7 @@ def _in_moveloop(self, observation): program_state = observation[self._program_state_index] return program_state[3] # in_moveloop - def reset(self, wizkit_items=None): + def reset(self, seed=None, options=None, wizkit_items=None): """Resets the environment. Note: @@ -413,9 +413,12 @@ def reset(self, wizkit_items=None): fail in case Nethack is initialized with some uncommon options. Returns: - [dict] Observation of the state as defined by - `self.observation_space`. + (tuple) (Observation of the state as defined by + `self.observation_space`, + Extra information) """ + super().reset(seed=seed) + self._episode += 1 if self.savedir and self._episode % self._save_ttyrec_every == 0: new_ttyrec = self._ttyrec_pattern % self._episode From e7c4ea2273ebe78d81513497f44e9019b3b6462f Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Fri, 19 Apr 2024 16:36:43 +0100 Subject: [PATCH 10/17] Fix to rendering to match gym passive checkers --- nle/env/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nle/env/base.py b/nle/env/base.py index 202406bdc..c0f724314 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -147,7 +147,7 @@ class NLE(gym.Env): >>> env.render() """ - metadata = {"render.modes": ["human", "ansi"]} + metadata = {"render_modes": ["human", "ansi", "full"]} class StepStatus(enum.IntEnum): """Specifies the status of the terminal state. @@ -193,7 +193,7 @@ def __init__( allow_all_yn_questions=False, allow_all_modes=False, spawn_monsters=True, - render_mode=str | None, + render_mode="human", ): """Constructs a new NLE environment. @@ -225,6 +225,9 @@ def __init__( If set to False, only skip click through 'MORE' on death. spawn_monsters: If False, disables normal NetHack behavior to randomly create monsters. + render_mode (str): mode used to render the screen. One of + "human" | "ansi" | "full". + Defaults to "human", i.e. what a human would see playing the game. """ self.character = character self._max_episode_steps = max_episode_steps @@ -534,8 +537,6 @@ def render(self): # TODO: Why return a string here but print in the other branches? return "\n".join([line.tobytes().decode("utf-8") for line in chars]) - return super().render() - def __repr__(self): return "<%s>" % self.__class__.__name__ From 0d662e4b7027d2753235406cc28a14c26b4faec8 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Fri, 19 Apr 2024 17:13:06 +0100 Subject: [PATCH 11/17] Update calls to reset() for new API --- nle/agent/agent.py | 8 ++++---- nle/env/base.py | 10 +++++++--- nle/scripts/play.py | 2 +- nle/tests/test_envs.py | 12 ++++++------ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/nle/agent/agent.py b/nle/agent/agent.py index 208126884..6498d11cf 100644 --- a/nle/agent/agent.py +++ b/nle/agent/agent.py @@ -141,7 +141,7 @@ def compute_policy_gradient_loss(logits, actions, advantages): def create_env(name, *args, **kwargs): - return gym.make(name, observation_keys=("glyphs", "blstats"), *args, **kwargs) + return gym.make(name, *args, observation_keys=("glyphs", "blstats"), **kwargs) def act( @@ -350,8 +350,8 @@ def initial(self): self.episode_return = torch.zeros(1, 1) self.episode_step = torch.zeros(1, 1, dtype=torch.int32) initial_done = torch.ones(1, 1, dtype=torch.uint8) - - result = _format_observations(self.gym_env.reset()) + obs, reset_info = self.gym_env.reset() + result = _format_observations(obs) result.update( reward=initial_reward, done=initial_done, @@ -368,7 +368,7 @@ def step(self, action): episode_step = self.episode_step episode_return = self.episode_return if done: - observation = self.gym_env.reset() + observation, reset_info = self.gym_env.reset() self.episode_return = torch.zeros(1, 1) self.episode_step = torch.zeros(1, 1, dtype=torch.int32) diff --git a/nle/env/base.py b/nle/env/base.py index c0f724314..d29d135b0 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -142,12 +142,16 @@ class NLE(gym.Env): Examples: >>> env = NLE() - >>> obs = env.reset() + >>> obs, reset_info = env.reset() >>> obs, reward, done, info = env.step(0) >>> env.render() """ - metadata = {"render_modes": ["human", "ansi", "full"]} + # Gym expects an fps rate > 0 for render checks, but + # NetHack doesn't have any. Setting it to 42 because + # that's always the answer to life, the universe and + # everything. + metadata = {"render_modes": ["human", "ansi", "full"], "render_fps": 42} class StepStatus(enum.IntEnum): """Specifies the status of the terminal state. @@ -418,7 +422,7 @@ def reset(self, seed=None, options=None, wizkit_items=None): Returns: (tuple) (Observation of the state as defined by `self.observation_space`, - Extra information) + Extra game state information) """ super().reset(seed=seed) diff --git a/nle/scripts/play.py b/nle/scripts/play.py index f8b3f354e..5d4b5f5d1 100644 --- a/nle/scripts/play.py +++ b/nle/scripts/play.py @@ -97,7 +97,7 @@ def play(): if FLAGS.seeds is not None: env.seed(FLAGS.seeds) - obs = env.reset() + obs, reset_info = env.reset() steps = 0 episodes = 0 diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index 3e849a22b..1bf528af0 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -32,7 +32,7 @@ def rollout_env(env, max_rollout_len): Returns final reward. Does not assume that the environment has already been reset. """ - obs = env.reset() + obs, reset_info = env.reset() assert env.observation_space.contains(obs) for _ in range(max_rollout_len): @@ -97,14 +97,14 @@ def test_init(self, env_name, wizard): def test_reset(self, env_name, wizard): """Tests default initialization given standard env specs.""" env = gym.make(env_name, wizard=wizard) - obs = env.reset() + obs, reset_info = env.reset() assert env.observation_space.contains(obs) def test_chars_colors_specials(self, env_name, wizard): env = gym.make( env_name, observation_keys=("chars", "colors", "specials", "blstats") ) - obs = env.reset() + obs, reset_info = env.reset() assert "specials" in obs x, y = obs["blstats"][:2] @@ -159,7 +159,7 @@ def test_meatball_exists(self): """Test loading stuff via wizkit""" env = gym.make("NetHack-v0", wizard=True) found = dict(meatball=0) - obs = env.reset(wizkit_items=list(found.keys())) + obs, reset_info = env.reset(wizkit_items=list(found.keys())) for line in obs["inv_strs"]: if np.all(line == 0): break @@ -199,7 +199,7 @@ def test_inventory(self, env_name): "inv_oclasses", ), ) - obs = env.reset() + obs, reset_info = env.reset() found = dict(spellbook=0, apple=0) for line in obs["inv_strs"]: @@ -373,7 +373,7 @@ def test_kick_and_quit(self, env): assert reward == 0.0 def test_final_reward(self, env): - obs = env.reset() + obs, reset_info = env.reset() for _ in range(100): obs, reward, done, info = env.step(env.action_space.sample()) From 64f9851a59458e2ea5c04bcc821d92752428647e Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Fri, 19 Apr 2024 17:30:02 +0100 Subject: [PATCH 12/17] Replace deprecated pkg_resources with importlib.resources --- nle/nethack/nethack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nle/nethack/nethack.py b/nle/nethack/nethack.py index a9794847f..f6639aeec 100644 --- a/nle/nethack/nethack.py +++ b/nle/nethack/nethack.py @@ -1,4 +1,5 @@ # Copyright (c) Facebook, Inc. and its affiliates. +import importlib.resources import os import shutil import sys @@ -7,7 +8,6 @@ import weakref import numpy as np -import pkg_resources from nle import _pynethack @@ -67,7 +67,7 @@ "time", ) -HACKDIR = pkg_resources.resource_filename("nle", "nethackdir") +HACKDIR = importlib.resources.files("nle") / "nethackdir" TTYREC_VERSION = 3 From b79bf1cf4163eeb872fa3901a79f2ee2f7d26daa Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Sun, 21 Apr 2024 13:09:43 +0100 Subject: [PATCH 13/17] Move wizkit_items inside reset() 'options' parameter --- nle/env/base.py | 8 +++----- nle/nethack/nethack.py | 13 +++++++------ nle/tests/test_envs.py | 8 ++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/nle/env/base.py b/nle/env/base.py index d29d135b0..bdb00a41e 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -411,7 +411,7 @@ def _in_moveloop(self, observation): program_state = observation[self._program_state_index] return program_state[3] # in_moveloop - def reset(self, seed=None, options=None, wizkit_items=None): + def reset(self, seed=None, options=None): """Resets the environment. Note: @@ -431,9 +431,7 @@ def reset(self, seed=None, options=None, wizkit_items=None): new_ttyrec = self._ttyrec_pattern % self._episode else: new_ttyrec = None - self.last_observation = self.nethack.reset( - new_ttyrec, wizkit_items=wizkit_items - ) + self.last_observation = self.nethack.reset(new_ttyrec, options=options) self._steps = 0 done = False @@ -453,7 +451,7 @@ def reset(self, seed=None, options=None, wizkit_items=None): warnings.warn( "Not in moveloop after 1000 tries, aborting (ttyrec: %s)." % new_ttyrec ) - return self.reset(wizkit_items=wizkit_items) + return self.reset(seed=seed, options=options) return self._get_observation(self.last_observation), self._get_information( self._get_end_status(self.last_observation, done) diff --git a/nle/nethack/nethack.py b/nle/nethack/nethack.py index f6639aeec..c801f2b24 100644 --- a/nle/nethack/nethack.py +++ b/nle/nethack/nethack.py @@ -250,12 +250,13 @@ def step(self, action): self._pynethack.step(action) return self._step_return(), self._pynethack.done() - def reset(self, new_ttyrec=None, wizkit_items=None): - if wizkit_items is not None: - if not self._wizard: - raise ValueError("Set wizard=True to use the wizkit option.") - # TODO ideally we need to check the validity of the requested items - self._pynethack.set_wizkit("\n".join(wizkit_items)) + def reset(self, new_ttyrec=None, options=None): + if options is not None: + if options["wizkit_items"] is not None: + if not self._wizard: + raise ValueError("Set wizard=True to use the wizkit option.") + # TODO ideally we need to check the validity of the requested items + self._pynethack.set_wizkit("\n".join(options["wizkit_items"])) if new_ttyrec is None: self._pynethack.reset() else: diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index 1bf528af0..d32ca369d 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -159,7 +159,7 @@ def test_meatball_exists(self): """Test loading stuff via wizkit""" env = gym.make("NetHack-v0", wizard=True) found = dict(meatball=0) - obs, reset_info = env.reset(wizkit_items=list(found.keys())) + obs, reset_info = env.reset(options={"wizkit_items": list(found.keys())}) for line in obs["inv_strs"]: if np.all(line == 0): break @@ -173,16 +173,16 @@ def test_meatball_exists(self): def test_wizkit_no_wizard_mode(self): env = gym.make("NetHack-v0", wizard=False) with pytest.raises(ValueError) as e_info: - env.reset(wizkit_items=["meatball"]) + env.reset(options={"wizkit_items": ["meatball"]}) assert e_info.value.args[0] == "Set wizard=True to use the wizkit option." def test_wizkit_file(self): env = gym.make("NetHack-v0", wizard=True) req_items = ["meatball", "apple"] - env.reset(wizkit_items=req_items) + env.reset(options={"wizkit_items": req_items}) # TODO: Test inventory here. - env.reset(wizkit_items=req_items) + env.reset(options={"wizkit_items": req_items}) del env From 4ded60c06608edb96bacf30ccca9274151c07495 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Sun, 21 Apr 2024 18:54:41 +0100 Subject: [PATCH 14/17] Updates to support newer step() API --- nle/agent/agent.py | 4 +++- nle/env/base.py | 6 +++++- nle/scripts/play.py | 2 +- nle/tests/test_envs.py | 18 +++++++++--------- nle/tests/test_profile.py | 2 +- nle/tests/test_system.py | 6 +++--- 6 files changed, 22 insertions(+), 16 deletions(-) diff --git a/nle/agent/agent.py b/nle/agent/agent.py index 6498d11cf..1b14ebf11 100644 --- a/nle/agent/agent.py +++ b/nle/agent/agent.py @@ -362,7 +362,9 @@ def initial(self): return result def step(self, action): - observation, reward, done, unused_info = self.gym_env.step(action.item()) + observation, reward, done, truncated, unused_info = self.gym_env.step( + action.item() + ) self.episode_step += 1 self.episode_return += reward episode_step = self.episode_step diff --git a/nle/env/base.py b/nle/env/base.py index bdb00a41e..0c02699d4 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -143,7 +143,7 @@ class NLE(gym.Env): Examples: >>> env = NLE() >>> obs, reset_info = env.reset() - >>> obs, reward, done, info = env.step(0) + >>> obs, reward, done, truncation, info = env.step(0) >>> env.render() """ @@ -371,6 +371,7 @@ def step(self, action: int): - (*float*): a reward; see ``self._reward_fn`` to see how it is specified. - (*bool*): True if the state is terminal, False otherwise. + - (*bool*): True if the episode is truncated, False otherwise. - (*dict*): a dictionary of extra information (such as `end_status`, i.e. a status info -- death, task win, etc. -- for the terminal state). @@ -400,10 +401,13 @@ def step(self, action: int): self._quit_game(observation, done) done = True + truncated = False + return ( self._get_observation(observation), reward, done, + truncated, self._get_information(end_status), ) diff --git a/nle/scripts/play.py b/nle/scripts/play.py index 5d4b5f5d1..187992320 100644 --- a/nle/scripts/play.py +++ b/nle/scripts/play.py @@ -142,7 +142,7 @@ def play(): if is_raw_env: obs, done = env.step(action) else: - obs, reward, done, info = env.step(action) + obs, reward, done, truncated, info = env.step(action) steps += 1 if is_raw_env: diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index d32ca369d..c316b7a43 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -37,7 +37,7 @@ def rollout_env(env, max_rollout_len): for _ in range(max_rollout_len): a = env.action_space.sample() - obs, reward, done, info = env.step(a) + obs, reward, done, truncated, info = env.step(a) assert env.observation_space.contains(obs) assert isinstance(reward, float) assert isinstance(done, bool) @@ -61,8 +61,8 @@ def compare_rollouts(env0, env1, max_rollout_len): step = 0 while True: a = env0.action_space.sample() - obs0, reward0, done0, info0 = env0.step(a) - obs1, reward1, done1, info1 = env1.step(a) + obs0, reward0, done0, truncated0, info0 = env0.step(a) + obs1, reward1, done1, truncated1, info1 = env1.step(a) step += 1 s0, s1 = term_screen(obs0), term_screen(obs1) @@ -73,7 +73,7 @@ def compare_rollouts(env0, env1, max_rollout_len): np.testing.assert_equal(obs0, obs1) assert reward0 == reward1 assert done0 == done1 - + assert truncated0 == truncated1 assert info0 == info1 if done0 or step >= max_rollout_len: @@ -333,7 +333,7 @@ def test_render_ansi(self, env_name, rollout_len): env.reset() for _ in range(rollout_len): action = env.action_space.sample() - _, _, done, _ = env.step(action) + _, _, done, _, _ = env.step(action) if done: env.reset() output = env.render() @@ -361,13 +361,13 @@ def env(self): def test_kick_and_quit(self, env): env.reset() kick = env.actions.index(nethack.Command.KICK) - obs, reward, done, _ = env.step(kick) + obs, reward, done, _, _ = env.step(kick) assert b"In what direction? " in bytes(obs["message"]) env.step(nethack.MiscAction.MORE) # Hack to quit. env.nethack.step(nethack.M("q")) - obs, reward, done, _ = env.step(env.actions.index(ord("y"))) + obs, reward, done, _, _ = env.step(env.actions.index(ord("y"))) assert done assert reward == 0.0 @@ -376,7 +376,7 @@ def test_final_reward(self, env): obs, reset_info = env.reset() for _ in range(100): - obs, reward, done, info = env.step(env.action_space.sample()) + obs, reward, done, _, info = env.step(env.action_space.sample()) if done: break @@ -391,7 +391,7 @@ def test_final_reward(self, env): # Hack to quit. env.nethack.step(nethack.M("q")) - _, reward, done, _ = env.step(env.actions.index(ord("y"))) + _, reward, done, _, _ = env.step(env.actions.index(ord("y"))) assert done assert reward == 0.0 diff --git a/nle/tests/test_profile.py b/nle/tests/test_profile.py index 273d092bc..21be4bcca 100644 --- a/nle/tests/test_profile.py +++ b/nle/tests/test_profile.py @@ -64,7 +64,7 @@ def seed(): def play_1k_steps(): env.reset() for a in actions: - _, _, done, _ = env.step(a) + _, _, done, _, _ = env.step(a) if done: seed() env.reset() diff --git a/nle/tests/test_system.py b/nle/tests/test_system.py index 0289c84be..375408a64 100644 --- a/nle/tests/test_system.py +++ b/nle/tests/test_system.py @@ -15,7 +15,7 @@ def new_env_one_step(): env = gym.make("NetHackScore-v0") env.reset() - obs, reward, done, _ = env.step(0) + obs, reward, done, _, _ = env.step(0) return done @@ -50,7 +50,7 @@ def test_two_nles(self): num_resets = 1 while num_resets < 4: - _, _, done, _ = env.step(random.choice(ACTIONS)) + _, _, done, _, _ = env.step(random.choice(ACTIONS)) if done: queue.append(env) env = queue.pop(0) @@ -82,7 +82,7 @@ def target(): while num_resets < 4: a = random.choice(ACTIONS) - _, _, done, _ = env.step(a) + _, _, done, _, _ = env.step(a) if done: resetqueue.put(env) env = readyqueue.get() From 361db5a997565721d61d0b3ef364fca0b26cee84 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Mon, 22 Apr 2024 16:10:31 +0100 Subject: [PATCH 15/17] Switch to Gymnasium --- .github/workflows/test_package.yml | 2 +- nle/agent/agent.py | 2 +- nle/env/__init__.py | 4 +-- nle/env/base.py | 2 +- nle/scripts/play.py | 10 +++--- nle/tests/test_db.py | 2 +- nle/tests/test_envs.py | 50 +++++++++++++++--------------- nle/tests/test_profile.py | 4 +-- nle/tests/test_system.py | 2 +- setup.py | 10 +++--- 10 files changed, 45 insertions(+), 43 deletions(-) diff --git a/.github/workflows/test_package.yml b/.github/workflows/test_package.yml index f501f7a42..fccefba57 100644 --- a/.github/workflows/test_package.yml +++ b/.github/workflows/test_package.yml @@ -27,4 +27,4 @@ jobs: run: "pip install nle" - name: Check nethack is installed run: | - python -c 'import nle, gym; e = gym.make("NetHack-v0"); e.reset(); e.step(0)' + python -c 'import nle; import gymnasium as gym; e = gym.make("NetHack-v0"); e.reset(); e.step(0)' diff --git a/nle/agent/agent.py b/nle/agent/agent.py index 1b14ebf11..f64ab7302 100644 --- a/nle/agent/agent.py +++ b/nle/agent/agent.py @@ -37,7 +37,7 @@ '`pip install "nle[agent]"`' ) -import gym # noqa: E402 +import gymnasium as gym # noqa: E402 import nle # noqa: F401, E402 from nle import nethack # noqa: E402 diff --git a/nle/env/__init__.py b/nle/env/__init__.py index 107c23198..03fa147a7 100644 --- a/nle/env/__init__.py +++ b/nle/env/__init__.py @@ -1,6 +1,6 @@ # Copyright (c) Facebook, Inc. and its affiliates. -import gym -from gym.envs import registration +import gymnasium as gym +from gymnasium.envs import registration from nle.env.base import NLE, DUNGEON_SHAPE diff --git a/nle/env/base.py b/nle/env/base.py index 0c02699d4..68bc665fa 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -9,7 +9,7 @@ import warnings import weakref -import gym +import gymnasium as gym import numpy as np from nle import nethack diff --git a/nle/scripts/play.py b/nle/scripts/play.py index 187992320..d4fd6c938 100644 --- a/nle/scripts/play.py +++ b/nle/scripts/play.py @@ -11,7 +11,7 @@ import timeit import tty -import gym +import gymnasium as gym import nle # noqa: F401 from nle import nethack @@ -60,7 +60,7 @@ def get_action(env, is_raw_env): if is_raw_env: action = ch else: - action = env.actions.index(ch) + action = env.unwrapped.actions.index(ch) break except ValueError: print( @@ -95,7 +95,7 @@ def play(): render_mode=FLAGS.render_mode, ) if FLAGS.seeds is not None: - env.seed(FLAGS.seeds) + env.unwrapped.seed(FLAGS.seeds) obs, reset_info = env.reset() @@ -115,7 +115,9 @@ def play(): if not is_raw_env: print("-" * 8 + " " * 71) print(f"Previous reward: {str(reward):64s}") - act_str = repr(env.actions[action]) if action is not None else "" + act_str = ( + repr(env.unwrapped.actions[action]) if action is not None else "" + ) print(f"Previous action: {str(act_str):64s}") print("-" * 8) env.render() diff --git a/nle/tests/test_db.py b/nle/tests/test_db.py index e370cb0ed..770cada33 100644 --- a/nle/tests/test_db.py +++ b/nle/tests/test_db.py @@ -26,7 +26,7 @@ def gen_ttyrecs(savedir): for _ in range(5): # Need to end naturally to be recorded. for c in [ord(" "), ord(" "), ord("<"), ord("y")]: - _, _, done, *_ = env.step(env.actions.index(c)) + _, _, done, *_ = env.step(env.unwrapped.actions.index(c)) assert done env.reset() env.close() diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index c316b7a43..f356038de 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -7,7 +7,7 @@ import sys import tempfile -import gym +import gymnasium as gym import numpy as np import pytest @@ -120,11 +120,11 @@ def test_default_wizard_mode(self, env_name, wizard): if env_name.startswith("NetHackChallenge-"): pytest.skip("No wizard mode in NetHackChallenge") env = gym.make(env_name, wizard=wizard) - assert "playmode:debug" in env.nethack.options + assert "playmode:debug" in env.unwrapped.nethack.options else: # do not send a parameter to test a default env = gym.make(env_name) - assert "playmode:debug" not in env.nethack.options + assert "playmode:debug" not in env.unwrapped.nethack.options class TestWizardMode: @@ -254,7 +254,7 @@ def test_rollout(self, env_name, rollout_len): def test_rollout_no_archive(self, env_name, rollout_len): """Tests rollout_len steps (or until termination) of random policy.""" env = gym.make(env_name, savedir=None) - assert env.savedir is None + assert env.unwrapped.savedir is None rollout_env(env, rollout_len) def test_seed_interface_output(self, env_name, rollout_len): @@ -267,12 +267,12 @@ def test_seed_interface_output(self, env_name, rollout_len): env0 = gym.make(env_name) env1 = gym.make(env_name) - seed_list0 = env0.seed() + seed_list0 = env0.unwrapped.seed() env0.reset() - assert env0.get_seeds() == seed_list0 + assert env0.unwrapped.get_seeds() == seed_list0 - seed_list1 = env1.seed(*seed_list0) + seed_list1 = env1.unwrapped.seed(*seed_list0) assert seed_list0 == seed_list1 def test_seed_rollout_seeded(self, env_name, rollout_len): @@ -285,15 +285,15 @@ def test_seed_rollout_seeded(self, env_name, rollout_len): env0 = gym.make(env_name) env1 = gym.make(env_name) - env0.seed(123456, 789012) + env0.unwrapped.seed(123456, 789012) obs0 = env0.reset() - seeds0 = env0.get_seeds() + seeds0 = env0.unwrapped.get_seeds() assert seeds0 == (123456, 789012, False) - env1.seed(*seeds0) + env1.unwrapped.seed(*seeds0) obs1 = env1.reset() - seeds1 = env1.get_seeds() + seeds1 = env1.unwrapped.get_seeds() assert seeds0 == seeds1 @@ -315,13 +315,13 @@ def test_seed_rollout_seeded_int(self, env_name, rollout_len): random.randrange(sys.maxsize), False, ) - env0.seed(*initial_seeds) + env0.unwrapped.seed(*initial_seeds) obs0 = env0.reset() - seeds0 = env0.get_seeds() + seeds0 = env0.unwrapped.get_seeds() - env1.seed(*seeds0) + env1.unwrapped.seed(*seeds0) obs1 = env1.reset() - seeds1 = env1.get_seeds() + seeds1 = env1.unwrapped.get_seeds() assert seeds0 == seeds1 == initial_seeds @@ -360,14 +360,14 @@ def env(self): def test_kick_and_quit(self, env): env.reset() - kick = env.actions.index(nethack.Command.KICK) + kick = env.unwrapped.actions.index(nethack.Command.KICK) obs, reward, done, _, _ = env.step(kick) assert b"In what direction? " in bytes(obs["message"]) env.step(nethack.MiscAction.MORE) # Hack to quit. - env.nethack.step(nethack.M("q")) - obs, reward, done, _, _ = env.step(env.actions.index(ord("y"))) + env.unwrapped.nethack.step(nethack.M("q")) + obs, reward, done, _, _ = env.step(env.unwrapped.actions.index(ord("y"))) assert done assert reward == 0.0 @@ -387,11 +387,11 @@ def test_final_reward(self, env): # Hopefully, we got some positive reward by now. # Get out of any menu / yn_function. - env.step(env.actions.index(ord("\r"))) + env.step(env.unwrapped.actions.index(ord("\r"))) # Hack to quit. - env.nethack.step(nethack.M("q")) - _, reward, done, _, _ = env.step(env.actions.index(ord("y"))) + env.unwrapped.nethack.step(nethack.M("q")) + _, reward, done, _, _ = env.step(env.unwrapped.actions.index(ord("y"))) assert done assert reward == 0.0 @@ -403,7 +403,7 @@ def test_ttyrec_every(self): for episode in range(10): env.reset() for c in [ord(" "), ord(" "), ord("<"), ord("y")]: - _, _, done, *_ = env.step(env.actions.index(c)) + _, _, done, *_ = env.step(env.unwrapped.actions.index(c)) assert done if episode % 2 != 0: @@ -449,10 +449,10 @@ def test_no_seed_setting(self): with pytest.raises( RuntimeError, match="NetHackChallenge doesn't allow seed changes" ): - env.seed() + env.unwrapped.seed() with pytest.raises(RuntimeError, match="Should not try changing seeds"): - env.nethack.set_initial_seeds(0, 0, True) + env.unwrapped.nethack.set_initial_seeds(0, 0, True) if not nethack.NLE_ALLOW_SEEDING: with pytest.raises(RuntimeError, match="Seeding not enabled"): - env.nethack._pynethack.set_initial_seeds(0, 0, True) + env.unwrapped.nethack._pynethack.set_initial_seeds(0, 0, True) diff --git a/nle/tests/test_profile.py b/nle/tests/test_profile.py index 21be4bcca..220f385bb 100644 --- a/nle/tests/test_profile.py +++ b/nle/tests/test_profile.py @@ -7,7 +7,7 @@ # to run import os -import gym +import gymnasium as gym import numpy as np import pytest @@ -59,7 +59,7 @@ def seed(): return nonlocal seeds seeds += 1 - env.seed(seeds, 2 * seeds) + env.unwrapped.seed(seeds, 2 * seeds) def play_1k_steps(): env.reset() diff --git a/nle/tests/test_system.py b/nle/tests/test_system.py index 375408a64..b7c79df67 100644 --- a/nle/tests/test_system.py +++ b/nle/tests/test_system.py @@ -4,7 +4,7 @@ import random import threading -import gym +import gymnasium as gym import pytest import nle # noqa: F401 diff --git a/setup.py b/setup.py index f48360a86..979bf0ede 100644 --- a/setup.py +++ b/setup.py @@ -163,19 +163,19 @@ def build_extension(self, ext): ext_modules=[setuptools.Extension("nle", sources=[])], cmdclass={"build_ext": CMakeBuild}, setup_requires=["pybind11>=2.2"], - install_requires=["pybind11>=2.2", "numpy>=1.16", "gym>=0.15"], + install_requires=["pybind11>=2.2", "numpy>=1.16", "gymnasium>=0.29.1"], extras_require=extras_deps, - python_requires=">=3.5", + python_requires=">=3.8", classifiers=[ "License :: OSI Approved :: Nethack General Public License", "Development Status :: 4 - Beta", "Operating System :: POSIX :: Linux", "Operating System :: MacOS", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: C", "Programming Language :: C++", "Topic :: Scientific/Engineering :: Artificial Intelligence", From 818d24909773523d37fb54d892fa69cd8055d505 Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Tue, 23 Apr 2024 09:13:49 +0100 Subject: [PATCH 16/17] Fix to enable importlib.resources in Python 3.8 --- nle/nethack/nethack.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nle/nethack/nethack.py b/nle/nethack/nethack.py index c801f2b24..d58188f4e 100644 --- a/nle/nethack/nethack.py +++ b/nle/nethack/nethack.py @@ -67,7 +67,16 @@ "time", ) -HACKDIR = importlib.resources.files("nle") / "nethackdir" +# "The past is a foreign country, they do things differently there." +# - L. P. Hartley (1895 - 1972), "The Go-Between" +# +# importlib.resources API was indeed different in Python 3.8. +if sys.version_info < (3, 9): + with importlib.resources.path("nle", "nethackdir") as nh: + HACKDIR = os.path.abspath(nh) +else: + HACKDIR = importlib.resources.files("nle") / "nethackdir" + TTYREC_VERSION = 3 From 8cb2d13e701b1deb35bf47a12493717329223c7c Mon Sep 17 00:00:00 2001 From: Stephen Oman Date: Tue, 23 Apr 2024 19:30:30 +0100 Subject: [PATCH 17/17] Update README.md with Gymnasium instructions --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bb18d9ff2..b465f8d74 100644 --- a/README.md +++ b/README.md @@ -76,12 +76,12 @@ to add papers. # Getting started Starting with NLE environments is extremely simple, provided one is familiar -with other gym / RL environments. +with other Gymnasium / RL environments. ## Installation -NLE requires `python>=3.5`, `cmake>=3.15` to be installed and available both when building the +NLE requires `python>=3.8`, `cmake>=3.15` to be installed and available both when building the package, and at runtime. On **MacOS**, one can use `Homebrew` as follows: @@ -136,7 +136,7 @@ README](docker/README.md). After installation, one can try out any of the provided tasks as follows: ```python ->>> import gym +>>> import gymnasium as gym >>> import nle >>> env = gym.make("NetHackScore-v0") >>> env.reset() # each reset generates a new dungeon @@ -174,8 +174,9 @@ $ pip install "nle[agent]" $ python -m nle.agent.agent --num_actors 80 --batch_size 32 --unroll_length 80 --learning_rate 0.0001 --entropy_cost 0.0001 --use_lstm --total_steps 1000000000 ``` -Plot the mean return over the last 100 episodes: +Plot the mean return over the last 100 episodes (requires gnuplotlib): ```bash +$ pip install gnuplotlib $ python -m nle.scripts.plot ``` ``` @@ -222,8 +223,8 @@ see [this document](./CONTRIBUTING.md). NLE is direct fork of [NetHack](https://github.com/nethack/nethack) and therefore contains code that operates on many different levels of abstraction. This ranges from low-level game logic, to the higher-level administration of -repeated nethack games, and finally to binding of these games to Python `gym` -environment. +repeated nethack games, and finally to binding of these games to Python +`gymbasium` environment. If you want to learn more about the architecture of `nle` and how it works under the hood, checkout the [architecture document](./doc/nle/ARCHITECTURE.md).