Skip to content

Commit

Permalink
[Breaking] Differentiate between velocity-cartpole and position-cartp…
Browse files Browse the repository at this point in the history
…ole (#26)

* Release v1.0.3

* Rename cartpole and pendulum envs to avoid confusion. Also add velocity cartpole env
  • Loading branch information
smorad authored Aug 23, 2023
1 parent 1b6dfdc commit b6b9c3b
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 72 deletions.
4 changes: 2 additions & 2 deletions examples/play_cartpole.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from popgym.envs.stateless_cartpole import StatelessCartPole
from popgym.envs.stateless_cartpole import PositionOnlyCartPole

if __name__ == "__main__":
game = StatelessCartPole()
game = PositionOnlyCartPole()
done = False
obs, info = game.reset(return_info=True)
reward = -float("inf")
Expand Down
13 changes: 8 additions & 5 deletions examples/play_minesweeper.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from popgym.envs.minesweeper import MineSweeperHard
from popgym.envs.minesweeper import MineSweeperEasy

if __name__ == "__main__":
game = MineSweeperHard()
game = MineSweeperEasy()
done = False
obs, info = game.reset(return_info=True)
obs, info = game.reset()
reward = -float("inf")
game.render()
done = False

while not done:
action = input("input index:").split(",")
action_int = (int(action[0]), int(action[1]))
obs, reward, done, info = game.step(action_int)
game.render()
obs, reward, truncated, terminated, info = game.step(action_int)
done = truncated or terminated
print(obs)
# game.render()
print("reward:", reward)
4 changes: 2 additions & 2 deletions examples/play_pendulum.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from popgym.envs.stateless_pendulum import StatelessPendulum
from popgym.envs.stateless_pendulum import PositionOnlyPendulum

if __name__ == "__main__":
game = StatelessPendulum()
game = PositionOnlyPendulum()
done = False
obs, info = game.reset(return_info=True)
reward = -float("inf")
Expand Down
108 changes: 64 additions & 44 deletions popgym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,29 @@
MultiarmedBanditHard,
MultiarmedBanditMedium,
)
from popgym.envs.noisy_stateless_cartpole import (
NoisyStatelessCartPole,
NoisyStatelessCartPoleEasy,
NoisyStatelessCartPoleHard,
NoisyStatelessCartPoleMedium,
from popgym.envs.noisy_position_only_cartpole import (
NoisyPositionOnlyCartPole,
NoisyPositionOnlyCartPoleEasy,
NoisyPositionOnlyCartPoleHard,
NoisyPositionOnlyCartPoleMedium,
)
from popgym.envs.noisy_stateless_pendulum import (
NoisyStatelessPendulum,
NoisyStatelessPendulumEasy,
NoisyStatelessPendulumHard,
NoisyStatelessPendulumMedium,
NoisyPositionOnlyPendulum,
NoisyPositionOnlyPendulumEasy,
NoisyPositionOnlyPendulumHard,
NoisyPositionOnlyPendulumMedium,
)
from popgym.envs.position_only_cartpole import (
PositionOnlyCartPole,
PositionOnlyCartPoleEasy,
PositionOnlyCartPoleHard,
PositionOnlyCartPoleMedium,
)
from popgym.envs.position_only_pendulum import (
PositionOnlyPendulum,
PositionOnlyPendulumEasy,
PositionOnlyPendulumHard,
PositionOnlyPendulumMedium,
)
from popgym.envs.repeat_first import (
RepeatFirst,
Expand All @@ -73,17 +85,11 @@
RepeatPreviousHard,
RepeatPreviousMedium,
)
from popgym.envs.stateless_cartpole import (
StatelessCartPole,
StatelessCartPoleEasy,
StatelessCartPoleHard,
StatelessCartPoleMedium,
)
from popgym.envs.stateless_pendulum import (
StatelessPendulum,
StatelessPendulumEasy,
StatelessPendulumHard,
StatelessPendulumMedium,
from popgym.envs.velocity_only_cartpole import (
VelocityOnlyCartPole,
VelocityOnlyCartPoleEasy,
VelocityOnlyCartPoleHard,
VelocityOnlyCartPoleMedium,
)

#
Expand Down Expand Up @@ -123,34 +129,46 @@
# Control envs
#
CONTROL: Dict[gym.Env, Dict[str, Any]] = {
StatelessCartPole: {"id": "popgym-StatelessCartPole-v0"},
StatelessPendulum: {
"id": "popgym-StatelessPendulum-v0",
PositionOnlyCartPole: {"id": "popgym-PositionOnlyCartPole-v0"},
PositionOnlyPendulum: {
"id": "popgym-PositionOnlyPendulum-v0",
},
VelocityOnlyCartPole: {
"id": "popgym-VelocityOnlyCartpole-v0",
}
# BipedalWalker: {"id": "popgym-BipedalWalker-v0"},
}

CONTROL_EASY: Dict[gym.Env, Dict[str, Any]] = {
StatelessCartPoleEasy: {"id": "popgym-StatelessCartPoleEasy-v0"},
StatelessPendulumEasy: {
"id": "popgym-StatelessPendulumEasy-v0",
PositionOnlyCartPoleEasy: {"id": "popgym-PositionOnlyCartPoleEasy-v0"},
PositionOnlyPendulumEasy: {
"id": "popgym-PositionOnlyPendulumEasy-v0",
},
VelocityOnlyCartPoleEasy: {
"id": "popgym-VelocityOnlyCartpoleEasy-v0",
}
# BipedalWalkerEasy: {"id": "popgym-BipedalWalkerEasy-v0"},
}

CONTROL_MEDIUM: Dict[gym.Env, Dict[str, Any]] = {
StatelessCartPoleMedium: {"id": "popgym-StatelessCartPoleMedium-v0"},
StatelessPendulumMedium: {
"id": "popgym-StatelessPendulumMedium-v0",
PositionOnlyCartPoleMedium: {"id": "popgym-PositionOnlyCartPoleMedium-v0"},
PositionOnlyPendulumMedium: {
"id": "popgym-PositionOnlyPendulumMedium-v0",
},
VelocityOnlyCartPoleMedium: {
"id": "popgym-VelocityOnlyCartpoleMedium-v0",
}
# BipedalWalkerMedium: {"id": "popgym-BipedalWalkerMedium-v0"},
}

CONTROL_HARD: Dict[gym.Env, Dict[str, Any]] = {
StatelessCartPoleHard: {"id": "popgym-StatelessCartPoleHard-v0"},
StatelessPendulumHard: {
"id": "popgym-StatelessPendulumHard-v0",
PositionOnlyCartPoleHard: {"id": "popgym-PositionOnlyCartPoleHard-v0"},
PositionOnlyPendulumHard: {
"id": "popgym-PositionOnlyPendulumHard-v0",
},
VelocityOnlyCartPoleHard: {
"id": "popgym-VelocityOnlyCartpoleHard-v0",
}
# BipedalWalkerHard: {"id": "popgym-BipedalWalkerHard-v0"},
}

Expand All @@ -160,36 +178,38 @@
# Noisy envs
#
NOISY: Dict[gym.Env, Dict[str, Any]] = {
NoisyStatelessCartPole: {"id": "popgym-NoisyStatelessCartPole-v0"},
NoisyStatelessPendulum: {
"id": "popgym-NoisyStatelessPendulum-v0",
NoisyPositionOnlyCartPole: {"id": "popgym-NoisyPositionOnlyCartPole-v0"},
NoisyPositionOnlyPendulum: {
"id": "popgym-NoisyPositionOnlyPendulum-v0",
},
MultiarmedBandit: {"id": "popgym-MultiarmedBandit-v0"},
# NonstationaryBandit: {"id": "popgym-NonstationaryBandit-v0"},
}

NOISY_EASY: Dict[gym.Env, Dict[str, Any]] = {
NoisyStatelessCartPoleEasy: {"id": "popgym-NoisyStatelessCartPoleEasy-v0"},
NoisyStatelessPendulumEasy: {
"id": "popgym-NoisyStatelessPendulumEasy-v0",
NoisyPositionOnlyCartPoleEasy: {"id": "popgym-NoisyPositionOnlyCartPoleEasy-v0"},
NoisyPositionOnlyPendulumEasy: {
"id": "popgym-NoisyPositionOnlyPendulumEasy-v0",
},
MultiarmedBanditEasy: {"id": "popgym-MultiarmedBanditEasy-v0"},
# NonstationaryBanditEasy: {"id": "popgym-NonstationaryBanditEasy-v0"},
}

NOISY_MEDIUM: Dict[gym.Env, Dict[str, Any]] = {
NoisyStatelessCartPoleMedium: {"id": "popgym-NoisyStatelessCartPoleMedium-v0"},
NoisyStatelessPendulumMedium: {
"id": "popgym-NoisyStatelessPendulumMedium-v0",
NoisyPositionOnlyCartPoleMedium: {
"id": "popgym-NoisyPositionOnlyCartPoleMedium-v0"
},
NoisyPositionOnlyPendulumMedium: {
"id": "popgym-NoisyPositionOnlyPendulumMedium-v0",
},
MultiarmedBanditMedium: {"id": "popgym-MultiarmedBanditMedium-v0"},
# NonstationaryBanditMedium: {"id": "popgym-NonstationaryBanditMedium-v0"},
}

NOISY_HARD: Dict[gym.Env, Dict[str, Any]] = {
NoisyStatelessCartPoleHard: {"id": "popgym-NoisyStatelessCartPoleHard-v0"},
NoisyStatelessPendulumHard: {
"id": "popgym-NoisyStatelessPendulumHard-v0",
NoisyPositionOnlyCartPoleHard: {"id": "popgym-NoisyPositionOnlyCartPoleHard-v0"},
NoisyPositionOnlyPendulumHard: {
"id": "popgym-NoisyPositionOnlyPendulumHard-v0",
},
MultiarmedBanditHard: {"id": "popgym-MultiarmedBanditHard-v0"},
# NonstationaryBanditHard: {"id": "popgym-NonstationaryBanditHard-v0"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import numpy as np
from gymnasium.core import ActType, ObsType

from popgym.envs.stateless_cartpole import StatelessCartPole
from popgym.envs.position_only_cartpole import PositionOnlyCartPole


class NoisyStatelessCartPole(StatelessCartPole):
class NoisyPositionOnlyCartPole(PositionOnlyCartPole):
def __init__(self, *args, **kwargs):
noise_sigma = kwargs.pop("noise_sigma", 0.1)
super().__init__(*args, **kwargs)
Expand All @@ -33,16 +33,16 @@ def reset(
return init_obs, info


class NoisyStatelessCartPoleEasy(NoisyStatelessCartPole):
class NoisyPositionOnlyCartPoleEasy(NoisyPositionOnlyCartPole):
def __init__(self):
super().__init__(noise_sigma=0.1)


class NoisyStatelessCartPoleMedium(NoisyStatelessCartPole):
class NoisyPositionOnlyCartPoleMedium(NoisyPositionOnlyCartPole):
def __init__(self):
super().__init__(noise_sigma=0.2)


class NoisyStatelessCartPoleHard(NoisyStatelessCartPole):
class NoisyPositionOnlyCartPoleHard(NoisyPositionOnlyCartPole):
def __init__(self):
super().__init__(noise_sigma=0.3)
10 changes: 5 additions & 5 deletions popgym/envs/noisy_stateless_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import numpy as np
from gymnasium.core import ActType, ObsType

from popgym.envs.stateless_pendulum import StatelessPendulum
from popgym.envs.position_only_pendulum import PositionOnlyPendulum


class NoisyStatelessPendulum(StatelessPendulum):
class NoisyPositionOnlyPendulum(PositionOnlyPendulum):
def __init__(self, *args, **kwargs):
noise_sigma = kwargs.pop("noise_sigma", 0.1)
super().__init__(*args, **kwargs)
Expand All @@ -32,16 +32,16 @@ def reset(
return init_obs, info


class NoisyStatelessPendulumEasy(NoisyStatelessPendulum):
class NoisyPositionOnlyPendulumEasy(NoisyPositionOnlyPendulum):
def __init__(self):
super().__init__(noise_sigma=0.1)


class NoisyStatelessPendulumMedium(NoisyStatelessPendulum):
class NoisyPositionOnlyPendulumMedium(NoisyPositionOnlyPendulum):
def __init__(self):
super().__init__(noise_sigma=0.2)


class NoisyStatelessPendulumHard(NoisyStatelessPendulum):
class NoisyPositionOnlyPendulumHard(NoisyPositionOnlyPendulum):
def __init__(self):
super().__init__(noise_sigma=0.3)
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from popgym.core.env import POPGymEnv


class StatelessCartPole(CartPoleEnv, POPGymEnv):
class PositionOnlyCartPole(CartPoleEnv, POPGymEnv):
"""Partially observable variant of the CartPole gym environment.
https://github.com/openai/gym/blob/master/gym/envs/classic_control/
Expand Down Expand Up @@ -73,15 +73,15 @@ def reset(
return init_obs, info


class StatelessCartPoleEasy(StatelessCartPole):
class PositionOnlyCartPoleEasy(PositionOnlyCartPole):
pass


class StatelessCartPoleMedium(StatelessCartPole):
class PositionOnlyCartPoleMedium(PositionOnlyCartPole):
def __init__(self, *args, **kwargs):
super().__init__(max_episode_length=400)


class StatelessCartPoleHard(StatelessCartPole):
class PositionOnlyCartPoleHard(PositionOnlyCartPole):
def __init__(self, *args, **kwargs):
super().__init__(max_episode_length=600)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from popgym.core.env import POPGymEnv


class StatelessPendulum(PendulumEnv, POPGymEnv):
class PositionOnlyPendulum(PendulumEnv, POPGymEnv):
"""Partially observable variant of the Pendulum gym environment.
https://github.com/openai/gym/blob/master/gym/envs/classic_control/
Expand Down Expand Up @@ -64,15 +64,15 @@ def reset(
return init_obs[:-1], info


class StatelessPendulumEasy(StatelessPendulum):
class PositionOnlyPendulumEasy(PositionOnlyPendulum):
pass


class StatelessPendulumMedium(StatelessPendulum):
class PositionOnlyPendulumMedium(PositionOnlyPendulum):
def __init__(self):
super().__init__(max_episode_length=150)


class StatelessPendulumHard(StatelessPendulum):
class PositionOnlyPendulumHard(PositionOnlyPendulum):
def __init__(self):
super().__init__(max_episode_length=100)
Loading

0 comments on commit b6b9c3b

Please sign in to comment.