From f5ee34c5cd4187c514c601911bfdfab0c907066c Mon Sep 17 00:00:00 2001 From: Heinrich Kuttler Date: Fri, 4 Jun 2021 13:03:38 +0100 Subject: [PATCH 1/4] Make compiler happier. --- src/nle.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nle.c b/src/nle.c index a3ed8292b..2dce4b89c 100644 --- a/src/nle.c +++ b/src/nle.c @@ -73,6 +73,8 @@ vt_char_color_extract(TMTCHAR *c) case (TMT_COLOR_WHITE): color = (c->a.bold) ? CLR_WHITE : CLR_GRAY; // c = 15:7 break; + case (TMT_COLOR_MAX): + break; } if (c->a.reverse) { @@ -220,8 +222,8 @@ nle_fflush(FILE *stream) /* Only act on fflush(stdout). */ if (stream != stdout) { fprintf(stderr, - "Warning: nle_flush called with unexpected FILE pointer %d ", - (int) stream); + "Warning: nle_flush called with unexpected FILE pointer %p ", + stream); return fflush(stream); } nle_ctx_t *nle = current_nle_ctx; From c10db1249e1d4f70a697441d7e0d6276894b7cd5 Mon Sep 17 00:00:00 2001 From: Heinrich Kuttler Date: Fri, 4 Jun 2021 13:41:17 +0100 Subject: [PATCH 2/4] Add how_done method to NetHack object to expose done reason (DIED, ASCENDED, ...). --- include/nleobs.h | 1 + nle/tests/test_nethack.py | 2 + src/end.c | 4 ++ src/nle.c | 8 +++ win/rl/pynethack.cc | 102 ++++++++++++++++++++++++-------------- 5 files changed, 81 insertions(+), 36 deletions(-) diff --git a/include/nleobs.h b/include/nleobs.h index f83cfe508..cb7bcb184 100644 --- a/include/nleobs.h +++ b/include/nleobs.h @@ -16,6 +16,7 @@ typedef struct nle_observation { int action; int done; char in_normal_game; /* Bool indicating if other obs are set. */ + int how_done; /* If game is really_done, how it ended. */ short *glyphs; /* Size ROWNO * (COLNO - 1) */ unsigned char *chars; /* Size ROWNO * (COLNO - 1) */ unsigned char *colors; /* Size ROWNO * (COLNO - 1) */ diff --git a/nle/tests/test_nethack.py b/nle/tests/test_nethack.py index 370068f42..7cf52973f 100644 --- a/nle/tests/test_nethack.py +++ b/nle/tests/test_nethack.py @@ -74,6 +74,8 @@ def test_run_n_episodes(self, tmpdir, game, episodes=3): ch = random.choice(ACTIONS) _, done = game.step(ch) if done: + # This will typically be DIED, but could be POISONED, etc. + assert int(game._pynethack.how_done()) < int(nethack.GENOCIDED) break steps += 1 diff --git a/src/end.c b/src/end.c index 0c6366fb6..6501169e8 100644 --- a/src/end.c +++ b/src/end.c @@ -16,6 +16,8 @@ #endif #include "dlb.h" +extern void FDECL(nle_done, (int)); + /* add b to long a, convert wraparound to max value */ #define nowrap_add(a, b) (a = ((a + b) < 0 ? LONG_MAX : (a + b))) @@ -1473,6 +1475,8 @@ int how; /* don't bother counting to see whether it should be plural */ } + nle_done(how); + Sprintf(pbuf, "%s %s the %s...", Goodbye(), plname, (how != ASCENDED) ? (const char *) ((flags.female && urole.name.f) diff --git a/src/nle.c b/src/nle.c index 2dce4b89c..cd6baa4a6 100644 --- a/src/nle.c +++ b/src/nle.c @@ -330,6 +330,14 @@ nethack_exit(int status) nle_yield(NULL); } +/* Called in really_done() in end.c to get "how". */ +void +nle_done(int how) +{ + nle_ctx_t *nle = current_nle_ctx; + nle->observation->how_done = how; +} + nle_seeds_init_t *nle_seeds_init; /* See rng.c. */ diff --git a/win/rl/pynethack.cc b/win/rl/pynethack.cc index 9dda96639..6fad2f3cc 100644 --- a/win/rl/pynethack.cc +++ b/win/rl/pynethack.cc @@ -212,6 +212,12 @@ class Nethack return obs_.in_normal_game; } + game_end_types + how_done() + { + return static_cast(obs_.how_done); + } + private: void reset(FILE *ttyrec) @@ -268,7 +274,8 @@ PYBIND11_MODULE(_pynethack, m) .def("set_initial_seeds", &Nethack::set_initial_seeds) .def("set_seeds", &Nethack::set_seeds) .def("get_seeds", &Nethack::get_seeds) - .def("in_normal_game", &Nethack::in_normal_game); + .def("in_normal_game", &Nethack::in_normal_game) + .def("how_done", &Nethack::how_done); py::module mn = m.def_submodule( "nethack", "Collection of NetHack constants and functions"); @@ -354,6 +361,27 @@ PYBIND11_MODULE(_pynethack, m) // From monsym.h. mn.attr("MAXMCLASSES") = py::int_(static_cast(MAXMCLASSES)); + // game_end_types from hack.h (used in end.c) + py::enum_(mn, "game_end_types", + "This is the way the game ends.") + .value("DIED", DIED) + .value("CHOKING", CHOKING) + .value("POISONING", POISONING) + .value("STARVING", STARVING) + .value("DROWNING", DROWNING) + .value("BURNING", BURNING) + .value("DISSOLVED", DISSOLVED) + .value("CRUSHING", CRUSHING) + .value("STONING", STONING) + .value("TURNED_SLIME", TURNED_SLIME) + .value("GENOCIDED", GENOCIDED) + .value("PANICKED", PANICKED) + .value("TRICKED", TRICKED) + .value("QUIT", QUIT) + .value("ESCAPED", ESCAPED) + .value("ASCENDED", ASCENDED) + .export_values(); + // "Special" mapglyph mn.attr("MG_CORPSE") = py::int_(MG_CORPSE); mn.attr("MG_INVIS") = py::int_(MG_INVIS); @@ -392,19 +420,20 @@ PYBIND11_MODULE(_pynethack, m) [](int glyph) { return glyph_is_warning(glyph); }); py::class_(mn, "permonst", "The permonst struct.") - .def("__init__", - // See https://github.com/pybind/pybind11/issues/2394 - [](py::detail::value_and_holder &v_h, int index) { - if (index < 0 || index >= NUMMONS) - throw std::out_of_range( - "Index should be between 0 and NUMMONS (" - + std::to_string(NUMMONS) + ") but got " - + std::to_string(index)); - v_h.value_ptr() = &mons[index]; - v_h.inst->owned = false; - v_h.set_holder_constructed(true); - }, - py::detail::is_new_style_constructor()) + .def( + "__init__", + // See https://github.com/pybind/pybind11/issues/2394 + [](py::detail::value_and_holder &v_h, int index) { + if (index < 0 || index >= NUMMONS) + throw std::out_of_range( + "Index should be between 0 and NUMMONS (" + + std::to_string(NUMMONS) + ") but got " + + std::to_string(index)); + v_h.value_ptr() = &mons[index]; + v_h.inst->owned = false; + v_h.set_holder_constructed(true); + }, + py::detail::is_new_style_constructor()) .def_readonly("mname", &permonst::mname) /* full name */ .def_readonly("mlet", &permonst::mlet) /* symbol */ .def_readonly("mlevel", &permonst::mlevel) /* base monster level */ @@ -468,28 +497,29 @@ PYBIND11_MODULE(_pynethack, m) mn, "objclass", "The objclass struct.\n\n" "All fields are constant and don't reflect user changes.") - .def("__init__", - // See https://github.com/pybind/pybind11/issues/2394 - [](py::detail::value_and_holder &v_h, int i) { - if (i < 0 || i >= NUM_OBJECTS) - throw std::out_of_range( - "Index should be between 0 and NUM_OBJECTS (" - + std::to_string(NUM_OBJECTS) + ") but got " - + std::to_string(i)); - - /* Initialize. Cannot depend on o_init.c as it pulls - * in all kinds of other code. Instead, do what - * makedefs.c does at set it here. - * Alternative: Get the pointer from the game itself? - * Dangerous! - */ - objects[i].oc_name_idx = objects[i].oc_descr_idx = i; - - v_h.value_ptr() = &objects[i]; - v_h.inst->owned = false; - v_h.set_holder_constructed(true); - }, - py::detail::is_new_style_constructor()) + .def( + "__init__", + // See https://github.com/pybind/pybind11/issues/2394 + [](py::detail::value_and_holder &v_h, int i) { + if (i < 0 || i >= NUM_OBJECTS) + throw std::out_of_range( + "Index should be between 0 and NUM_OBJECTS (" + + std::to_string(NUM_OBJECTS) + ") but got " + + std::to_string(i)); + + /* Initialize. Cannot depend on o_init.c as it pulls + * in all kinds of other code. Instead, do what + * makedefs.c does at set it here. + * Alternative: Get the pointer from the game itself? + * Dangerous! + */ + objects[i].oc_name_idx = objects[i].oc_descr_idx = i; + + v_h.value_ptr() = &objects[i]; + v_h.inst->owned = false; + v_h.set_holder_constructed(true); + }, + py::detail::is_new_style_constructor()) .def_readonly("oc_name_idx", &objclass::oc_name_idx) /* index of actual name */ .def_readonly( From 8f13c34bccd7c55e26000981d93b096464fbdadc Mon Sep 17 00:00:00 2001 From: Heinrich Kuttler Date: Fri, 4 Jun 2021 17:09:55 +0100 Subject: [PATCH 3/4] Funnel through the how_done to Nethack class; add is_ascended to gym. --- nle/env/base.py | 20 +++++++++++--------- nle/nethack/nethack.py | 3 +++ nle/tests/test_envs.py | 5 +---- nle/tests/test_nethack.py | 2 +- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/nle/env/base.py b/nle/env/base.py index b5c04a608..98c8495e8 100644 --- a/nle/env/base.py +++ b/nle/env/base.py @@ -401,16 +401,18 @@ def step(self, action: int): done = True info = {} - if end_status: - # TODO: fix stats - # stats = self._collect_stats(last_observation, end_status) - # stats = stats._asdict() - stats = {} - info["stats"] = stats - - if self._stats_logger is not None: - self._stats_logger.writerow(stats) + # TODO: fix stats + # if end_status: + # # stats = self._collect_stats(last_observation, end_status) + # # stats = stats._asdict() + # # stats = {} + # # info["stats"] = stats + # + # # if self._stats_logger is not None: + # # self._stats_logger.writerow(stats) + info["end_status"] = end_status + info["is_ascended"] = self.env.how_done() == nethack.ASCENDED return self._get_observation(observation), reward, done, info diff --git a/nle/nethack/nethack.py b/nle/nethack/nethack.py index 547fd7a98..2c8a818b5 100644 --- a/nle/nethack/nethack.py +++ b/nle/nethack/nethack.py @@ -224,3 +224,6 @@ def get_current_seeds(self): def in_normal_game(self): return self._pynethack.in_normal_game() + + def how_done(self): + return self._pynethack.how_done() diff --git a/nle/tests/test_envs.py b/nle/tests/test_envs.py index 87e687b4e..45637766f 100644 --- a/nle/tests/test_envs.py +++ b/nle/tests/test_envs.py @@ -44,6 +44,7 @@ def rollout_env(env, max_rollout_len): assert isinstance(done, bool) assert isinstance(info, dict) if done: + assert not info["is_ascended"] break env.close() return reward @@ -74,10 +75,6 @@ def compare_rollouts(env0, env1, max_rollout_len): assert reward0 == reward1 assert done0 == done1 - if done0: - assert "stats" in info0 # just to be sure - assert "stats" in info1 - assert info0 == info1 if done0 or step >= max_rollout_len: diff --git a/nle/tests/test_nethack.py b/nle/tests/test_nethack.py index 7cf52973f..326b4a81c 100644 --- a/nle/tests/test_nethack.py +++ b/nle/tests/test_nethack.py @@ -75,7 +75,7 @@ def test_run_n_episodes(self, tmpdir, game, episodes=3): _, done = game.step(ch) if done: # This will typically be DIED, but could be POISONED, etc. - assert int(game._pynethack.how_done()) < int(nethack.GENOCIDED) + assert int(game.how_done()) < int(nethack.GENOCIDED) break steps += 1 From c5f077c8cddb748ac9b25c5163a48f2a3987f590 Mon Sep 17 00:00:00 2001 From: Heinrich Kuttler Date: Fri, 4 Jun 2021 17:57:07 +0100 Subject: [PATCH 4/4] Add "score" to internal observation. What exactly NH means by score is a bit context-dependent. The value u.urexp factors into botl_score() in botl.c (if enabled) as well as into the computation in really_done() in end.c. In many cases, the result of botl_score() will be what really_done() reports as the end-of-game score. In many other cases, it won't: E.g. after dying (not quitting, escaping, ascending or bug-related exits), score for acquired gold is reduced by 10%. Also if escaped or ascended, valuable gems, artefacts etc will give extra score. The computation in end.c adds to u.urexp, which will reflect the end-of-game score for a few briefs "steps" at the very end of the game (but not before). --- include/nleobs.h | 2 +- nle/tests/test_profile.py | 4 ++++ win/rl/winrl.cc | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nleobs.h b/include/nleobs.h index cb7bcb184..e1b4684f6 100644 --- a/include/nleobs.h +++ b/include/nleobs.h @@ -5,7 +5,7 @@ #define NLE_MESSAGE_SIZE 256 #define NLE_BLSTATS_SIZE 25 #define NLE_PROGRAM_STATE_SIZE 6 -#define NLE_INTERNAL_SIZE 8 +#define NLE_INTERNAL_SIZE 9 #define NLE_INVENTORY_SIZE 55 #define NLE_INVENTORY_STR_LENGTH 80 #define NLE_SCREEN_DESCRIPTION_LENGTH 80 diff --git a/nle/tests/test_profile.py b/nle/tests/test_profile.py index af49894b5..709f7da14 100644 --- a/nle/tests/test_profile.py +++ b/nle/tests/test_profile.py @@ -2,6 +2,10 @@ # # Copyright (c) Facebook, Inc. and its affiliates. +# Requires +# pip install pytest-benchmark +# to run + import pytest import numpy as np diff --git a/win/rl/winrl.cc b/win/rl/winrl.cc index 970a61fee..fd5f3d517 100644 --- a/win/rl/winrl.cc +++ b/win/rl/winrl.cc @@ -256,6 +256,8 @@ NetHackRL::fill_obs(nle_obs *obs) obs->internal[5] = nle_seeds[0]; /* core */ obs->internal[6] = nle_seeds[1]; /* disp */ obs->internal[7] = u.uhunger; + obs->internal[8] = + u.urexp; /* score (careful! check botl_score() and end.c) */ } if ((!program_state.something_worth_saving && !program_state.in_moveloop) @@ -412,7 +414,8 @@ NetHackRL::fill_obs(nle_obs *obs) } } if (obs->screen_descriptions) { - memcpy(obs->screen_descriptions, &screen_descriptions_, screen_descriptions_.size()); + memcpy(obs->screen_descriptions, &screen_descriptions_, + screen_descriptions_.size()); } }