Skip to content

Commit

Permalink
manage illegal move
Browse files Browse the repository at this point in the history
  • Loading branch information
DHDev0 authored Jan 19, 2023
1 parent c86eded commit 08af790
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Features
* [x] MCTS with 0 simulation (use of prior) or any number of simulation.
* [x] Model weights automatically saved at best selfplay average reward.
* [x] Priority or Uniform for sampling in replay buffer.
* [X] Manage illegal move with negative reward.
* [X] Scale the loss using the importance sampling ratio.
* [x] Custom "Loss function" class to apply transformation and loss on label/prediction.
* [X] Load your pretrained model from tag number.
Expand Down
13 changes: 12 additions & 1 deletion game.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,18 @@ def observation(self,observation_shape=None,
state = self.flatten_state(self.tuple_test_obs(state))
else:
state = feedback[0]
self.feedback_state = state
return state

def step(self,action):
try:
next_step = (self.env.step(action))
except:
obs = self.feedback_state
reward = min(-len(self.rewards),-self.limit_of_game_play,-1)
done = self.done
next_step = (obs,reward,done)
return next_step


def store_search_statistics(self, root):
Expand Down Expand Up @@ -185,7 +196,7 @@ def policy_step(self, policy = None, action = None , temperature = 0 ):
action_onehot_encoded[selected_action] = 1

# # # apply mouve and return variable of the env
step_output = (self.env.step(self.action_map[selected_action]))
step_output = self.step(self.action_map[selected_action])

# # # save game variable to a list to return them
#contain [observation, reward, done, info] + [meta_data for some gym env]
Expand Down

0 comments on commit 08af790

Please sign in to comment.