Skip to content

Commit

Permalink
ToString() update
Browse files Browse the repository at this point in the history
  • Loading branch information
acforvs committed Jul 2, 2022
1 parent f3d776a commit 688b76d
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 106 deletions.
1 change: 1 addition & 0 deletions open_spiel/games/nim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ NimState::NimState(std::shared_ptr<const Game> game, int num_piles, std::vector<

std::string NimState::ToString() const {
std::string str;
absl::StrAppend(&str, "(", current_player_, "): ");
for (std::size_t pile_idx = 0; pile_idx < piles_.size(); pile_idx++) {
absl::StrAppend(&str, piles_[pile_idx]);
if (pile_idx != piles_.size() - 1) {
Expand Down
21 changes: 16 additions & 5 deletions open_spiel/games/nim_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,23 @@ void SinglePileMisereTest() {
SPIEL_CHECK_EQ(state->PlayerReturn(1), 1);
}

void VISinglePileNormalTest() {
std::shared_ptr<const Game> game = LoadGame(
"nim", {
{"pile_sizes", GameParameter("100")},
{"is_misere", GameParameter(false)},
});
auto values = algorithms::ValueIteration(*game, -1, 0.01);
SPIEL_CHECK_EQ(values["(0): 100"], 1);
}

void VISinglePileMisereTest() {
std::shared_ptr<const Game> game = LoadGame(
"nim", {
{"pile_sizes", GameParameter("100")},
});
auto values = algorithms::ValueIteration(*game, -1, 0.01);
SPIEL_CHECK_EQ(values["100"], 1);
SPIEL_CHECK_EQ(values["(0): 100"], 1);
}

// See "Winning positions" here
Expand All @@ -97,7 +107,7 @@ void VIThreeOnesNormalTest() {
{"is_misere", GameParameter(false)},
});
auto values = algorithms::ValueIteration(*normal_game, -1, 0.01);
SPIEL_CHECK_EQ(values["1 1 1"], 1);
SPIEL_CHECK_EQ(values["(0): 1 1 1"], 1);
}

void VIThreeOnesMisereTest() {
Expand All @@ -106,7 +116,7 @@ void VIThreeOnesMisereTest() {
{"pile_sizes", GameParameter("1;1;1")},
});
auto values = algorithms::ValueIteration(*game, -1, 0.01);
SPIEL_CHECK_EQ(values["1 1 1"], -1);
SPIEL_CHECK_EQ(values["(0): 1 1 1"], -1);
}

void VIThreePilesTest() {
Expand All @@ -116,7 +126,7 @@ void VIThreePilesTest() {
{"is_misere", GameParameter(false)},
});
auto values = algorithms::ValueIteration(*normal_game, -1, 0.01);
SPIEL_CHECK_EQ(values["5 8 13"], -1);
SPIEL_CHECK_EQ(values["(0): 5 8 13"], -1);
}

void VIFourPilesTest() {
Expand All @@ -126,7 +136,7 @@ void VIFourPilesTest() {
{"is_misere", GameParameter(false)},
});
auto values = algorithms::ValueIteration(*normal_game, -1, 0.01);
SPIEL_CHECK_EQ(values["2 3 8 10"], 1);
SPIEL_CHECK_EQ(values["(0): 2 3 8 10"], 1);
}

} // namespace
Expand All @@ -137,6 +147,7 @@ int main(int argc, char **argv) {
open_spiel::nim::BasicNimTests();
open_spiel::nim::SinglePileNormalTest();
open_spiel::nim::SinglePileMisereTest();
open_spiel::nim::VISinglePileNormalTest();
open_spiel::nim::VISinglePileMisereTest();
open_spiel::nim::VIThreeOnesNormalTest();
open_spiel::nim::VIThreeOnesMisereTest();
Expand Down
182 changes: 81 additions & 101 deletions open_spiel/integration_tests/playthroughs/nim.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ MaxGameLength() = 16
ToString() = "nim()"

# State 0
# 1 3 5 7
# (0): 1 3 5 7
IsTerminal() = False
History() = []
HistoryString() = ""
Expand All @@ -40,161 +40,141 @@ IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = ""
InformationStateString(1) = ""
ObservationString(0) = "1 3 5 7"
ObservationString(1) = "1 3 5 7"
ObservationString(0) = "(0): 1 3 5 7"
ObservationString(1) = "(0): 1 3 5 7"
ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉
ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◉
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, 18, 19, 23, 27]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:2, take:3;", "pile:3, take:3;", "pile:4, take:3;", "pile:3, take:4;", "pile:4, take:4;", "pile:3, take:5;", "pile:4, take:5;", "pile:4, take:6;", "pile:4, take:7;"]

# Apply action "pile:4, take:3;"
action: 11
# Apply action "pile:4, take:5;"
action: 19

# State 1
# 1 3 5 4
# (1): 1 3 5 2
IsTerminal() = False
History() = [11]
HistoryString() = "11"
History() = [19]
HistoryString() = "19"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "11"
InformationStateString(1) = "11"
ObservationString(0) = "1 3 5 4"
ObservationString(1) = "1 3 5 4"
ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
InformationStateString(0) = "19"
InformationStateString(1) = "19"
ObservationString(0) = "(1): 1 3 5 2"
ObservationString(1) = "(1): 1 3 5 2"
ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉
ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, 18]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:2, take:3;", "pile:3, take:3;", "pile:4, take:3;", "pile:3, take:4;", "pile:4, take:4;", "pile:3, take:5;"]
LegalActions() = [0, 1, 2, 3, 5, 6, 7, 9, 10, 14, 18]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:2, take:3;", "pile:3, take:3;", "pile:3, take:4;", "pile:3, take:5;"]

# Apply action "pile:3, take:1;"
action: 2
# Apply action "pile:3, take:5;"
action: 18

# State 2
# 1 3 4 4
# (0): 1 3 0 2
IsTerminal() = False
History() = [11, 2]
HistoryString() = "11, 2"
History() = [19, 18]
HistoryString() = "19, 18"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "11, 2"
InformationStateString(1) = "11, 2"
ObservationString(0) = "1 3 4 4"
ObservationString(1) = "1 3 4 4"
ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
InformationStateString(0) = "19, 18"
InformationStateString(1) = "19, 18"
ObservationString(0) = "(0): 1 3 0 2"
ObservationString(1) = "(0): 1 3 0 2"
ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉
ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:2, take:3;", "pile:3, take:3;", "pile:4, take:3;", "pile:3, take:4;", "pile:4, take:4;"]
LegalActions() = [0, 1, 3, 5, 7, 9]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:4, take:2;", "pile:2, take:3;"]

# Apply action "pile:2, take:1;"
action: 1
# Apply action "pile:1, take:1;"
action: 0

# State 3
# 1 2 4 4
# (1): 0 3 0 2
IsTerminal() = False
History() = [11, 2, 1]
HistoryString() = "11, 2, 1"
History() = [19, 18, 0]
HistoryString() = "19, 18, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "11, 2, 1"
InformationStateString(1) = "11, 2, 1"
ObservationString(0) = "1 2 4 4"
ObservationString(1) = "1 2 4 4"
ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
InformationStateString(0) = "19, 18, 0"
InformationStateString(1) = "19, 18, 0"
ObservationString(0) = "(1): 0 3 0 2"
ObservationString(1) = "(1): 0 3 0 2"
ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉
ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1, 2, 3, 5, 6, 7, 10, 11, 14, 15]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:3, take:3;", "pile:4, take:3;", "pile:3, take:4;", "pile:4, take:4;"]
LegalActions() = [1, 3, 5, 7, 9]
StringLegalActions() = ["pile:2, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:4, take:2;", "pile:2, take:3;"]

# Apply action "pile:4, take:2;"
action: 7
# Apply action "pile:2, take:1;"
action: 1

# State 4
# 1 2 4 2
# (0): 0 2 0 2
IsTerminal() = False
History() = [11, 2, 1, 7]
HistoryString() = "11, 2, 1, 7"
History() = [19, 18, 0, 1]
HistoryString() = "19, 18, 0, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
InformationStateString(0) = "11, 2, 1, 7"
InformationStateString(1) = "11, 2, 1, 7"
ObservationString(0) = "1 2 4 2"
ObservationString(1) = "1 2 4 2"
ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
InformationStateString(0) = "19, 18, 0, 1"
InformationStateString(1) = "19, 18, 0, 1"
ObservationString(0) = "(0): 0 2 0 2"
ObservationString(1) = "(0): 0 2 0 2"
ObservationTensor(0): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
ObservationTensor(1): ◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1, 2, 3, 5, 6, 7, 10, 14]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:3, take:2;", "pile:4, take:2;", "pile:3, take:3;", "pile:3, take:4;"]
LegalActions() = [1, 3, 5, 7]
StringLegalActions() = ["pile:2, take:1;", "pile:4, take:1;", "pile:2, take:2;", "pile:4, take:2;"]

# Apply action "pile:2, take:1;"
action: 1
# Apply action "pile:4, take:2;"
action: 7

# State 5
# 1 1 4 2
# (1): 0 2 0 0
IsTerminal() = False
History() = [11, 2, 1, 7, 1]
HistoryString() = "11, 2, 1, 7, 1"
History() = [19, 18, 0, 1, 7]
HistoryString() = "19, 18, 0, 1, 7"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 1
InformationStateString(0) = "11, 2, 1, 7, 1"
InformationStateString(1) = "11, 2, 1, 7, 1"
ObservationString(0) = "1 1 4 2"
ObservationString(1) = "1 1 4 2"
ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
InformationStateString(0) = "19, 18, 0, 1, 7"
InformationStateString(1) = "19, 18, 0, 1, 7"
ObservationString(0) = "(1): 0 2 0 0"
ObservationString(1) = "(1): 0 2 0 0"
ObservationTensor(0): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
ObservationTensor(1): ◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
Rewards() = [0, 0]
Returns() = [0, 0]
LegalActions() = [0, 1, 2, 3, 6, 7, 10, 14]
StringLegalActions() = ["pile:1, take:1;", "pile:2, take:1;", "pile:3, take:1;", "pile:4, take:1;", "pile:3, take:2;", "pile:4, take:2;", "pile:3, take:3;", "pile:3, take:4;"]
LegalActions() = [1, 5]
StringLegalActions() = ["pile:2, take:1;", "pile:2, take:2;"]

# Apply action "pile:1, take:1;"
action: 0
# Apply action "pile:2, take:2;"
action: 5

# State 6
# Apply action "pile:3, take:3;"
action: 10

# State 7
# Apply action "pile:3, take:1;"
action: 2

# State 8
# Apply action "pile:4, take:1;"
action: 3

# State 9
# Apply action "pile:4, take:1;"
action: 3

# State 10
# Apply action "pile:2, take:1;"
action: 1

# State 11
# 0 0 0 0
# (0): 0 0 0 0
IsTerminal() = True
History() = [11, 2, 1, 7, 1, 0, 10, 2, 3, 3, 1]
HistoryString() = "11, 2, 1, 7, 1, 0, 10, 2, 3, 3, 1"
History() = [19, 18, 0, 1, 7, 5]
HistoryString() = "19, 18, 0, 1, 7, 5"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = -4
InformationStateString(0) = "11, 2, 1, 7, 1, 0, 10, 2, 3, 3, 1"
InformationStateString(1) = "11, 2, 1, 7, 1, 0, 10, 2, 3, 3, 1"
ObservationString(0) = "0 0 0 0"
ObservationString(1) = "0 0 0 0"
ObservationTensor(0): ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
ObservationTensor(1): ◯◉◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
Rewards() = [-1, 1]
Returns() = [-1, 1]
InformationStateString(0) = "19, 18, 0, 1, 7, 5"
InformationStateString(1) = "19, 18, 0, 1, 7, 5"
ObservationString(0) = "(0): 0 0 0 0"
ObservationString(1) = "(0): 0 0 0 0"
ObservationTensor(0): ◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
ObservationTensor(1): ◉◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◉◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯◯
Rewards() = [1, -1]
Returns() = [1, -1]

0 comments on commit 688b76d

Please sign in to comment.