-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f0136e7
Showing
84 changed files
with
3,475 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
import random | ||
import gymnasium as gym | ||
from gymnasium.spaces import Discrete, Box | ||
from iphyre.simulator import IPHYRE | ||
import numpy as np | ||
|
||
|
||
class IPHYRE_inadvance(gym.Env): | ||
''' | ||
Plan in advance: generate one-step action times based on the initial scenes. | ||
''' | ||
def __init__(self, config): | ||
self.total_reward = 0 | ||
self.game_list = config.get("game_list") | ||
self.seed = config.get("seed") | ||
random.seed(self.seed) | ||
self.game = None | ||
self.env = None | ||
self.action_list = None | ||
self.action_candidates = None | ||
self.cur_obs = None | ||
self.fps = 10 | ||
self.game_time = 15. | ||
self.iter_len = 0 | ||
self.action_space = Box(low=0., high=1., shape=(6,), dtype=np.float32) | ||
self.observation_space = Box(low=0., high=1., shape=(12 * 9 + 6 * 2,), dtype=np.float32) | ||
self.reset_num = 0 | ||
|
||
def reset(self, *, seed=None, options=None): | ||
self.iter_len = 0 | ||
self.game = random.choice(self.game_list) | ||
self.env = IPHYRE(game=self.game, fps=self.fps) | ||
self.total_reward = 0 | ||
self.cur_obs = self.env.reset() | ||
self.action_list = self.env.get_action_space()[1:] | ||
self.action_candidates = np.array(self.action_list, dtype=np.float32).reshape(-1) / 600 | ||
self.process() | ||
self.reset_num += 1 | ||
return self.cur_obs, {} | ||
|
||
def step(self, action): | ||
''' | ||
:param action: the time sequence of executing each action (1 * 6) | ||
''' | ||
total_reward = 0 | ||
terminated = False | ||
truncated = True | ||
tmp = np.round(action * self.game_time, 1) | ||
for time in np.round(np.arange(0, self.game_time, 1/self.fps), 1): | ||
if time > 0. and time in tmp: | ||
id = np.argwhere(tmp == time)[0][0] | ||
pos = self.action_list[id] | ||
else: | ||
pos = [0., 0.] | ||
self.cur_obs, reward, terminated = self.env.step(pos) | ||
total_reward += reward | ||
if terminated: | ||
truncated = False | ||
break | ||
self.process() | ||
return self.cur_obs, total_reward, terminated, truncated, {} | ||
|
||
def process(self): | ||
self.cur_obs = np.array(self.cur_obs, dtype=np.float32) | ||
self.cur_obs[:, :5] /= 600 | ||
self.cur_obs = self.cur_obs.clip(0., 1.) | ||
self.cur_obs = self.cur_obs.reshape(-1) | ||
self.cur_obs = np.concatenate((self.cur_obs, self.action_candidates)) | ||
|
||
|
||
class IPHYRE_onthefly(gym.Env): | ||
''' | ||
Plan on-the-fly: generate actions step by step based on the intermediate state. | ||
''' | ||
def __init__(self, config): | ||
self.total_reward = 0 | ||
self.game_list = config.get("game_list") | ||
self.seed = config.get("seed") | ||
random.seed(self.seed) | ||
self.game = None | ||
self.env = None | ||
self.action_list = None | ||
self.action_candidates = None | ||
self.cur_obs = None | ||
self.fps = 10 | ||
self.iter_len = 0 | ||
self.action_space = Discrete(7) | ||
self.observation_space = Box(low=0., high=1., shape=(12 * 9 + 7 * 2,), dtype=np.float32) | ||
self.reset_num = 0 | ||
|
||
def reset(self, *, seed=None, options=None): | ||
self.iter_len = 0 | ||
self.game = random.choice(self.game_list) | ||
self.env = IPHYRE(game=self.game, fps=self.fps) | ||
self.total_reward = 0 | ||
self.cur_obs = self.env.reset() | ||
self.action_list = self.env.get_action_space() | ||
self.action_candidates = np.array(self.action_list, dtype=np.float32).reshape(-1) / 600 | ||
self.process() | ||
self.reset_num += 1 | ||
return self.cur_obs, {} | ||
|
||
def step(self, action): | ||
self.iter_len += 1 | ||
pos = self.action_list[action] | ||
self.cur_obs, reward, terminated = self.env.step(pos) | ||
self.process() | ||
truncated = (self.iter_len >= 15 * self.fps) | ||
self.total_reward += reward | ||
return self.cur_obs, reward, terminated, truncated, {} | ||
|
||
def process(self): | ||
self.cur_obs = np.array(self.cur_obs, dtype=np.float32) | ||
self.cur_obs[:, :5] /= 600 | ||
self.cur_obs = self.cur_obs.clip(0., 1.) | ||
self.cur_obs = self.cur_obs.reshape(-1) | ||
self.cur_obs = np.concatenate((self.cur_obs, self.action_candidates)) | ||
|
||
|
||
class IPHYRE_combine(gym.Env): | ||
''' | ||
Combined strategy: generate one-step action times based on the initial scenes but update after each execution. | ||
''' | ||
def __init__(self, config): | ||
self.total_reward = 0 | ||
self.game_list = config.get("game_list") | ||
self.seed = config.get("seed") | ||
random.seed(self.seed) | ||
self.game = None | ||
self.env = None | ||
self.action_list = None | ||
self.action_candidates = None | ||
self.cur_obs = None | ||
self.fps = 10 | ||
self.game_time = 15. | ||
self.iter_len = 0 | ||
self.action_space = Box(low=0., high=1., shape=(6,), dtype=np.float32) | ||
self.observation_space = Box(low=0., high=1., shape=(12 * 9 + 6 * 2,), dtype=np.float32) | ||
self.reset_num = 0 | ||
self.mask = None | ||
|
||
def reset(self, *, seed=None, options=None): | ||
self.iter_len = 0 | ||
self.game = random.choice(self.game_list) | ||
self.env = IPHYRE(game=self.game, fps=self.fps) | ||
self.total_reward = 0 | ||
self.cur_obs = self.env.reset() | ||
self.action_list = self.env.get_action_space()[1:] | ||
self.action_candidates = np.array(self.action_list, dtype=np.float32).reshape(-1) / 600 | ||
self.mask = np.ones((6,)) | ||
self.process() | ||
self.reset_num += 1 | ||
return self.cur_obs, {} | ||
|
||
def step(self, action): | ||
total_reward = 0 | ||
terminated = False | ||
tmp = np.round(action * self.game_time, 1) | ||
for time in np.round(np.arange(self.iter_len/self.fps, self.game_time, 1/self.fps), 1): | ||
self.iter_len += 1 | ||
truncated = (self.iter_len >= 15 * self.fps) | ||
if time > 0. and time in tmp: | ||
id = np.argwhere(tmp == time)[0][0] | ||
if self.mask[id]: | ||
pos = self.action_list[id] | ||
self.mask[id] = 0 | ||
else: | ||
pos = [0., 0.] | ||
else: | ||
pos = [0., 0.] | ||
self.cur_obs, reward, terminated = self.env.step(pos) | ||
total_reward += reward | ||
if terminated: | ||
truncated = False | ||
break | ||
if pos != [0., 0.]: | ||
break | ||
self.process() | ||
return self.cur_obs, total_reward, terminated, truncated, {} | ||
|
||
def process(self): | ||
self.cur_obs = np.array(self.cur_obs, dtype=np.float32) | ||
self.cur_obs[:, :5] /= 600 | ||
self.cur_obs = self.cur_obs.clip(0., 1.) | ||
self.cur_obs = self.cur_obs.reshape(-1) | ||
self.cur_obs = np.concatenate((self.cur_obs, self.action_candidates)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
# <img src="./images/logo.png" width = "60" height = "20"> IPHYRE | ||
This is a project to explore **I**nteractive **PHY**sical **RE**asoning. | ||
|
||
<p align="left"> | ||
<a href='https://lishiqianhugh.github.io/IPHYRE/'> | ||
<img src='https://img.shields.io/badge/Web-Page-yellow?style=plastic&logo=Google%20chrome&logoColor=yellow' alt='Web'> | ||
</a> | ||
<a href='https://vimeo.com/793260764/2f77f9d5cb'> | ||
<img src='https://img.shields.io/badge/Video-Vimeo-green?style=plastic&logo=Google%20chrome&logoColor=green' alt='Video'> | ||
</a> | ||
<a href='https://www.youtube.com/watch?v=Ko2ZIc9YypY'> | ||
<img src='https://img.shields.io/badge/Games-YouTube-blue?style=plastic&logo=Google%20chrome&logoColor=blue' alt='YouTube Games'> | ||
</a> | ||
</p> | ||
|
||
<div align="center"> | ||
<kbd><img src='images/gifs/hole.0.gif' width="150"></kbd><kbd><img src='images/gifs/fill.0.gif' width="150"></kbd><kbd><img src='images/gifs/seesaw.0.gif' width="150"></kbd><kbd><img src='images/gifs/angle.0.gif' width="150"></kbd> | ||
</div> | ||
|
||
## Getting started | ||
Run the commands below to set the environment and install the packages required in this project. | ||
``` | ||
conda create -n iphyre python=3.10 | ||
conda activate iphyre | ||
pip install numpy pygame pymunk ray | ||
pip install iphyre | ||
``` | ||
|
||
## Easy-to-use package | ||
We build the `iphyre` package to promote research on interactive physical reasoning. | ||
|
||
The following shows how to define and play a game: | ||
``` | ||
from iphyre.simulator import IPHYRE | ||
env = IPHYRE(game='hole') | ||
env.play() | ||
``` | ||
|
||
You can get names and configurations aof all 40 games by `GAMES` and `PARAS`: | ||
``` | ||
from iphyre.games import GAMES, PARAS | ||
``` | ||
We integrate the games into Gym with three planning strategies in [`IPHYREEnv.py`](./IPHYREEnv.py) to enable training advanced RL agents using [`Ray RLlib`](https://docs.ray.io/en/latest/rllib/index.html). See [`train_online_RL.py`](./train_online_RL.py) for details. You can run this .py file using: | ||
``` | ||
python train_online_RL.py --strategy inadvance --model PPO --lr 1e-6 | ||
``` | ||
|
||
If you are running a customized RL agent, you may want to obtain the intermediate states like this: | ||
``` | ||
from iphyre.simulator import IPHYRE | ||
max_iter = 150 | ||
model = DQN() # define your own model | ||
env = IPHYRE(game='hole') | ||
s = env.reset() | ||
while iter < max_iter: | ||
# get the action (clicking coordinate [x, y]) based on the current state | ||
a = model.get_action(s) | ||
# get next state by executing action a; set use_image=True if using visual state. | ||
s_, r, done = env.step(a) | ||
s = s_ | ||
``` | ||
|
||
If you want to generate some successful and failed actions and collect some offline data, you can call the corresponding APIs like this: | ||
``` | ||
from iphyre.games import MAX_ELI_OBJ_NUM | ||
from iphyre.utils import generate_actions | ||
from iphyre.simulator import IPHYRE | ||
succeed_list, fail_list, _ = generate_actions(game='hole', | ||
num_succeed=1, | ||
num_fail=1, | ||
interval=0.1, | ||
max_game_time=15., | ||
max_action_time=7., | ||
max_step=MAX_ELI_OBJ_NUM, | ||
seed=0) | ||
env = IPHYRE(game='hole') | ||
# save the initial data and image | ||
env.collect_initial_data(save_path='./game_initial_data/') | ||
# build executable action list | ||
positions = env.get_action_space() | ||
act_lists = [] | ||
for a in succeed_list + fail_list: | ||
act_lists.append([positions[i + 1] + [a[i]] for i in range(MAX_ELI_OBJ_NUM)]) | ||
# run actions and save the sequential data and images | ||
env.collect_seq_data(save_path='./game_seq_data/', act_lists=act_lists) | ||
``` | ||
|
||
[//]: # (## Baselines) | ||
|
||
[//]: # (We utilize [ray [rllib]](https://docs.ray.io/en/latest/rllib/index.html) to implement the model-free RL baselines including PPO, A2C, SAC, DQN, and DDPG. We also evaluate model-based [World Model](https://arxiv.org/pdf/1803.10122.pdf) and offline [Decision Transformer](https://github.com/kzl/decision-transformer) on our benchmark.) | ||
|
||
## API | ||
Some useful APIs are provided in `iphyre`. | ||
* **iphyre.games.GAMES:** Get the names of all the games. | ||
* **iphyre.games.PARAS:** Get the design parameters of all the games. | ||
* **iphyre.simulator.IPHYRE.reset():** Reset the bodyies in the game and return the initial state. | ||
* **iphyre.simulator.IPHYRE.step():** Apply an action and forward a timestep. Return the next state, reward and whether the game is finished. | ||
* **iphyre.simulator.IPHYRE.simulate():** Simulate the game with the specified actions and only return the final results without a UI. | ||
* **iphyre.simulator.IPHYRE.simulate_vis():** Simulate the game with the specified actions and display in a UI. | ||
* **iphyre.simulator.IPHYRE.play():** Play the game in a UI with mouse clicks to eliminate blocks. | ||
* **iphyre.simulator.IPHYRE.collect_initial_data():** Save images and body properties of only initial states without a UI. | ||
* **iphyre.simulator.IPHYRE.collect_seq_data():** Save raw data, actions and body properties of the dynamic state sequence without a UI. | ||
* **iphyre.simulator.IPHYRE.collect_while_play():** Save player's actions and rewards after playing with a UI. | ||
* **iphyre.simulator.IPHYRE.get_action_space():** Get the central positions of the eliminable blocks with `no_action` at the first place and at the padding place. | ||
* **iphyre.utils.generate_actions():** Random generate successful and failed actions of specifc numbers in one game. | ||
* **iphyre.utils.play_all():** play all the games. | ||
* **iphyre.utils.collect_initial_all():** Save images and body properties of initial states in all the games. | ||
* **iphyre.utils.collect_play_all():** Play and save player's actions and rewards in all the games. | ||
|
||
## Customize your games | ||
See our game display for example [here](https://lishiqianhugh.github.io/IPHYRE/). The parameters of different games are set in `iphyre.games`, which contains the vertices of the blocks and the central positions of the balls with radiuses. See the following structure for example: | ||
```angular2html | ||
''' | ||
block: [[x1, y1],[x2, y2]] with a radius of 10 | ||
ball: [X, Y, Radius] | ||
eli: 0/1 indicates whether the body can be eliminated | ||
dynamic: 0/1 indicates whether the body can move under Newtonian laws | ||
When designing, be careful about the following points: | ||
1. the original point is at the left-top corner of the screen | ||
2. when specifying the vertices of blocks, try to write from smaller numbers to larger numbers | ||
3. the number of blocks equals to the number of eli and the number of dynamic. | ||
''' | ||
game_paras = {'support': | ||
{'block': [[[200., 400.], [300., 400.]], | ||
[[200., 500.], [300., 500.]]], | ||
'ball': [[250., 340., 20.]], | ||
'eli': [1, 1, 0], | ||
'dynamic': [0, 0, 1], | ||
}, | ||
'hinder': | ||
{'block': [[[200., 400.], [500., 400.]], | ||
[[450., 300.], [450., 380.]], | ||
[[500., 300.], [500., 380.]], | ||
[[200., 150.], [300., 200.]]], | ||
'ball': [[250., 100., 20.]], | ||
'eli': [0, 1, 1, 1, 0], | ||
'dynamic': [0, 0, 0, 0, 1], | ||
}, | ||
} | ||
``` | ||
You can easily create your own games baesd on the rules and structures above! |
Oops, something went wrong.