Initial upload

lishiqianhugh · Nov 30, 2023 · f0136e7 · f0136e7
commit f0136e7
Show file tree

Hide file tree

Showing 84 changed files with 3,475 additions and 0 deletions.
diff --git a/IPHYREEnv.py b/IPHYREEnv.py
@@ -0,0 +1,186 @@
+import random
+import gymnasium as gym
+from gymnasium.spaces import Discrete, Box
+from iphyre.simulator import IPHYRE
+import numpy as np
+
+
+class IPHYRE_inadvance(gym.Env):
+    '''
+    Plan in advance: generate one-step action times based on the initial scenes.
+    '''
+    def __init__(self, config):
+        self.total_reward = 0
+        self.game_list = config.get("game_list")
+        self.seed = config.get("seed")
+        random.seed(self.seed)
+        self.game = None
+        self.env = None
+        self.action_list = None
+        self.action_candidates = None
+        self.cur_obs = None
+        self.fps = 10
+        self.game_time = 15.
+        self.iter_len = 0
+        self.action_space = Box(low=0., high=1., shape=(6,), dtype=np.float32)
+        self.observation_space = Box(low=0., high=1., shape=(12 * 9 + 6 * 2,), dtype=np.float32)
+        self.reset_num = 0
+
+    def reset(self, *, seed=None, options=None):
+        self.iter_len = 0
+        self.game = random.choice(self.game_list)
+        self.env = IPHYRE(game=self.game, fps=self.fps)
+        self.total_reward = 0
+        self.cur_obs = self.env.reset()
+        self.action_list = self.env.get_action_space()[1:]
+        self.action_candidates = np.array(self.action_list, dtype=np.float32).reshape(-1) / 600
+        self.process()
+        self.reset_num += 1
+        return self.cur_obs, {}
+
+    def step(self, action):
+        '''
+        :param action: the time sequence of executing each action (1 * 6)
+        '''
+        total_reward = 0
+        terminated = False
+        truncated = True
+        tmp = np.round(action * self.game_time, 1)
+        for time in np.round(np.arange(0, self.game_time, 1/self.fps), 1):
+            if time > 0. and time in tmp:
+                id = np.argwhere(tmp == time)[0][0]
+                pos = self.action_list[id]
+            else:
+                pos = [0., 0.]
+            self.cur_obs, reward, terminated = self.env.step(pos)
+            total_reward += reward
+            if terminated:
+                truncated = False
+                break
+        self.process()
+        return self.cur_obs, total_reward, terminated, truncated, {}
+
+    def process(self):
+        self.cur_obs = np.array(self.cur_obs, dtype=np.float32)
+        self.cur_obs[:, :5] /= 600
+        self.cur_obs = self.cur_obs.clip(0., 1.)
+        self.cur_obs = self.cur_obs.reshape(-1)
+        self.cur_obs = np.concatenate((self.cur_obs, self.action_candidates))
+
+
+class IPHYRE_onthefly(gym.Env):
+    '''
+    Plan on-the-fly: generate actions step by step based on the intermediate state.
+    '''
+    def __init__(self, config):
+        self.total_reward = 0
+        self.game_list = config.get("game_list")
+        self.seed = config.get("seed")
+        random.seed(self.seed)
+        self.game = None
+        self.env = None
+        self.action_list = None
+        self.action_candidates = None
+        self.cur_obs = None
+        self.fps = 10
+        self.iter_len = 0
+        self.action_space = Discrete(7)
+        self.observation_space = Box(low=0., high=1., shape=(12 * 9 + 7 * 2,), dtype=np.float32)
+        self.reset_num = 0
+
+    def reset(self, *, seed=None, options=None):
+        self.iter_len = 0
+        self.game = random.choice(self.game_list)
+        self.env = IPHYRE(game=self.game, fps=self.fps)
+        self.total_reward = 0
+        self.cur_obs = self.env.reset()
+        self.action_list = self.env.get_action_space() 
+        self.action_candidates = np.array(self.action_list, dtype=np.float32).reshape(-1) / 600
+        self.process()
+        self.reset_num += 1
+        return self.cur_obs, {}
+
+    def step(self, action):
+        self.iter_len += 1
+        pos = self.action_list[action]
+        self.cur_obs, reward, terminated = self.env.step(pos)
+        self.process()
+        truncated = (self.iter_len >= 15 * self.fps)
+        self.total_reward += reward
+        return self.cur_obs, reward, terminated, truncated, {}
+
+    def process(self):
+        self.cur_obs = np.array(self.cur_obs, dtype=np.float32)
+        self.cur_obs[:, :5] /= 600
+        self.cur_obs = self.cur_obs.clip(0., 1.)
+        self.cur_obs = self.cur_obs.reshape(-1)
+        self.cur_obs = np.concatenate((self.cur_obs, self.action_candidates))
+
+
+class IPHYRE_combine(gym.Env):
+    '''
+    Combined strategy: generate one-step action times based on the initial scenes but update after each execution.
+    '''
+    def __init__(self, config):
+        self.total_reward = 0
+        self.game_list = config.get("game_list")
+        self.seed = config.get("seed")
+        random.seed(self.seed)
+        self.game = None
+        self.env = None
+        self.action_list = None
+        self.action_candidates = None
+        self.cur_obs = None
+        self.fps = 10
+        self.game_time = 15.
+        self.iter_len = 0
+        self.action_space = Box(low=0., high=1., shape=(6,), dtype=np.float32)
+        self.observation_space = Box(low=0., high=1., shape=(12 * 9 + 6 * 2,), dtype=np.float32)
+        self.reset_num = 0
+        self.mask = None
+
+    def reset(self, *, seed=None, options=None):
+        self.iter_len = 0
+        self.game = random.choice(self.game_list)
+        self.env = IPHYRE(game=self.game, fps=self.fps)
+        self.total_reward = 0
+        self.cur_obs = self.env.reset()
+        self.action_list = self.env.get_action_space()[1:]
+        self.action_candidates = np.array(self.action_list, dtype=np.float32).reshape(-1) / 600
+        self.mask = np.ones((6,))
+        self.process()
+        self.reset_num += 1
+        return self.cur_obs, {}
+
+    def step(self, action):
+        total_reward = 0
+        terminated = False
+        tmp = np.round(action * self.game_time, 1)
+        for time in np.round(np.arange(self.iter_len/self.fps, self.game_time, 1/self.fps), 1):
+            self.iter_len += 1
+            truncated = (self.iter_len >= 15 * self.fps)
+            if time > 0. and time in tmp:
+                id = np.argwhere(tmp == time)[0][0]
+                if self.mask[id]:
+                    pos = self.action_list[id]
+                    self.mask[id] = 0
+                else:
+                    pos = [0., 0.]
+            else:
+                pos = [0., 0.]
+            self.cur_obs, reward, terminated = self.env.step(pos)
+            total_reward += reward
+            if terminated:
+                truncated = False
+                break
+            if pos != [0., 0.]:
+                break
+        self.process()
+        return self.cur_obs, total_reward, terminated, truncated, {}
+
+    def process(self):
+        self.cur_obs = np.array(self.cur_obs, dtype=np.float32)
+        self.cur_obs[:, :5] /= 600
+        self.cur_obs = self.cur_obs.clip(0., 1.)
+        self.cur_obs = self.cur_obs.reshape(-1)
+        self.cur_obs = np.concatenate((self.cur_obs, self.action_candidates))
diff --git a/README.md b/README.md
@@ -0,0 +1,150 @@
+# <img src="./images/logo.png" width = "60" height = "20"> IPHYRE
+This is a project to explore **I**nteractive **PHY**sical **RE**asoning.
+
+<p align="left">
+     <a href='https://lishiqianhugh.github.io/IPHYRE/'>
+      <img src='https://img.shields.io/badge/Web-Page-yellow?style=plastic&logo=Google%20chrome&logoColor=yellow' alt='Web'>
+    </a>
+    <a href='https://vimeo.com/793260764/2f77f9d5cb'>
+      <img src='https://img.shields.io/badge/Video-Vimeo-green?style=plastic&logo=Google%20chrome&logoColor=green' alt='Video'>
+    </a>
+    <a href='https://www.youtube.com/watch?v=Ko2ZIc9YypY'>
+      <img src='https://img.shields.io/badge/Games-YouTube-blue?style=plastic&logo=Google%20chrome&logoColor=blue' alt='YouTube Games'>
+    </a>
+</p>
+
+<div align="center">
+<kbd><img src='images/gifs/hole.0.gif' width="150"></kbd><kbd><img src='images/gifs/fill.0.gif' width="150"></kbd><kbd><img src='images/gifs/seesaw.0.gif' width="150"></kbd><kbd><img src='images/gifs/angle.0.gif' width="150"></kbd>
+</div>
+
+## Getting started
+Run the commands below to set the environment and install the packages required in this project.
+```
+conda create -n iphyre python=3.10
+conda activate iphyre
+pip install numpy pygame pymunk ray
+pip install iphyre
+```
+
+## Easy-to-use package
+We build the `iphyre` package to promote research on interactive physical reasoning.
+
+The following shows how to define and play a game:
+```
+from iphyre.simulator import IPHYRE
+
+env = IPHYRE(game='hole')
+env.play()
+```
+
+You can get names and configurations aof all 40 games by `GAMES` and `PARAS`:
+```
+from iphyre.games import GAMES, PARAS
+```
+We integrate the games into Gym with three planning strategies in [`IPHYREEnv.py`](./IPHYREEnv.py) to enable training advanced RL agents using [`Ray RLlib`](https://docs.ray.io/en/latest/rllib/index.html). See [`train_online_RL.py`](./train_online_RL.py) for details. You can run this .py file using:
+```
+python train_online_RL.py --strategy inadvance --model PPO --lr 1e-6 
+```
+
+If you are running a customized RL agent, you may want to obtain the intermediate states like this:
+```
+from iphyre.simulator import IPHYRE
+
+max_iter = 150
+model = DQN()  # define your own model
+env = IPHYRE(game='hole')
+s = env.reset()
+while iter < max_iter:
+    # get the action (clicking coordinate [x, y]) based on the current state
+    a = model.get_action(s)
+    # get next state by executing action a; set use_image=True if using visual state.
+    s_, r, done = env.step(a)
+    s = s_
+```
+
+If you want to generate some successful and failed actions and collect some offline data, you can call the corresponding APIs like this:
+```
+from iphyre.games import MAX_ELI_OBJ_NUM
+from iphyre.utils import generate_actions
+from iphyre.simulator import IPHYRE
+
+succeed_list, fail_list, _ = generate_actions(game='hole',
+                                              num_succeed=1,
+                                              num_fail=1,
+                                              interval=0.1,
+                                              max_game_time=15.,
+                                              max_action_time=7.,
+                                              max_step=MAX_ELI_OBJ_NUM,
+                                              seed=0)
+
+env = IPHYRE(game='hole')
+
+# save the initial data and image
+env.collect_initial_data(save_path='./game_initial_data/')
+
+# build executable action list
+positions = env.get_action_space()
+act_lists = []
+for a in succeed_list + fail_list:
+    act_lists.append([positions[i + 1] + [a[i]] for i in range(MAX_ELI_OBJ_NUM)])
+
+# run actions and save the sequential data and images
+env.collect_seq_data(save_path='./game_seq_data/', act_lists=act_lists)
+```
+
+[//]: # (## Baselines)
+
+[//]: # (We utilize [ray [rllib]]&#40;https://docs.ray.io/en/latest/rllib/index.html&#41; to implement the model-free RL baselines including PPO, A2C, SAC, DQN, and DDPG. We also evaluate model-based [World Model]&#40;https://arxiv.org/pdf/1803.10122.pdf&#41; and offline [Decision Transformer]&#40;https://github.com/kzl/decision-transformer&#41; on our benchmark.)
+
+## API
+Some useful APIs are provided in `iphyre`.
+* **iphyre.games.GAMES:** Get the names of all the games.
+* **iphyre.games.PARAS:** Get the design parameters of all the games.
+* **iphyre.simulator.IPHYRE.reset():** Reset the bodyies in the game and return the initial state.
+* **iphyre.simulator.IPHYRE.step():** Apply an action and forward a timestep. Return the next state, reward and whether the game is finished.
+* **iphyre.simulator.IPHYRE.simulate():** Simulate the game with the specified actions and only return the final results without a UI.
+* **iphyre.simulator.IPHYRE.simulate_vis():** Simulate the game with the specified actions and display in a UI.
+* **iphyre.simulator.IPHYRE.play():** Play the game in a UI with mouse clicks to eliminate blocks.
+* **iphyre.simulator.IPHYRE.collect_initial_data():** Save images and body properties of only initial states without a UI.
+* **iphyre.simulator.IPHYRE.collect_seq_data():** Save raw data, actions and body properties of the dynamic state sequence without a UI.
+* **iphyre.simulator.IPHYRE.collect_while_play():** Save player's actions and rewards after playing with a UI.
+* **iphyre.simulator.IPHYRE.get_action_space():** Get the central positions of the eliminable blocks with `no_action` at the first place and at the padding place.
+* **iphyre.utils.generate_actions():** Random generate successful and failed actions of specifc numbers in one game.
+* **iphyre.utils.play_all():** play all the games.
+* **iphyre.utils.collect_initial_all():** Save images and body properties of initial states in all the games.
+* **iphyre.utils.collect_play_all():** Play and save player's actions and rewards in all the games.
+
+## Customize your games
+See our game display for example [here](https://lishiqianhugh.github.io/IPHYRE/). The parameters of different games are set in `iphyre.games`, which contains the vertices of the blocks and the central positions of the balls with radiuses. See the following structure for example:
+```angular2html
+'''
+block: [[x1, y1],[x2, y2]] with a radius of 10
+ball: [X, Y, Radius]
+eli: 0/1 indicates whether the body can be eliminated
+dynamic: 0/1 indicates whether the body can move under Newtonian laws
+
+When designing, be careful about the following points:
+1. the original point is at the left-top corner of the screen
+2. when specifying the vertices of blocks, try to write from smaller numbers to larger numbers
+3. the number of blocks equals to the number of eli and the number of dynamic.
+'''
+
+game_paras = {'support':
+                  {'block': [[[200., 400.], [300., 400.]],
+                             [[200., 500.], [300., 500.]]],
+                   'ball': [[250., 340., 20.]],
+                   'eli': [1, 1, 0],
+                   'dynamic': [0, 0, 1],
+                   },
+              'hinder':
+                  {'block': [[[200., 400.], [500., 400.]],
+                             [[450., 300.], [450., 380.]],
+                             [[500., 300.], [500., 380.]],
+                             [[200., 150.], [300., 200.]]],
+                   'ball': [[250., 100., 20.]],
+                   'eli': [0, 1, 1, 1, 0],
+                   'dynamic': [0, 0, 0, 0, 1],
+                   },
+              }
+```
+You can easily create your own games baesd on the rules and structures above!