datamllab · meiyoumingzile · May 5, 2023 · Jun 16, 2023 · Jun 16, 2023
diff --git a/examples/run_random.py b/examples/run_random.py
@@ -38,7 +38,7 @@ def run(args):
     parser.add_argument(
         '--env',
         type=str,
-        default='leduc-holdem',
+        default='bridge',
         choices=[
             'blackjack',
             'leduc-holdem',

diff --git a/rlcard/envs/tractor.py b/rlcard/envs/tractor.py
@@ -0,0 +1,8 @@
+import numpy as np
+from collections import OrderedDict
+
+from rlcard.envs import Env
+from rlcard.games.uno import Game
+from rlcard.games.uno.utils import encode_hand, encode_target
+from rlcard.games.uno.utils import ACTION_SPACE, ACTION_LIST
+from rlcard.games.uno.utils import cards2list
diff --git a/rlcard/games/tractor/__init__.py b/rlcard/games/tractor/__init__.py
diff --git a/rlcard/games/tractor/baselinePolicy.py b/rlcard/games/tractor/baselinePolicy.py
@@ -0,0 +1,86 @@
+import random
+import numpy as np
+from functools import cmp_to_key
+
+import tractor_game
+# from DNQ.mygame.TractorArtifice.cheater import mkDeck, cheator1
+from tractor_gameUtil import fenInd,getNum
+from tractor_game import Tractor
+from tractor_player import Player
+env=Tractor()
+def _baselinecmp(a,b):#比较两组牌大小。返回1是a大，返回0是b大
+    return a[1]>b[1] and 1 or -1
+def baselineColdeck(env:Tractor,p: Player,undercard_i):#贪心的换牌算法,cards有8张
+    li = []
+    for i in range(4):
+        if i!=p.lordDecor and len(p.cards[i])>0:
+            li.append((i,len(p.cards[i])))
+    li = sorted(li, key=cmp_to_key(_baselinecmp))
+    mincard=0
+    minnum=100
+    for _ in li:#无脑扣每个类别里最小的牌
+        kind,cardslen=_[0],_[1]
+        for card in reversed(p.cards[kind]):
+            if minnum>env.orderInd[card] and fenInd[getNum(card)]==0:
+                mincard,minnum=card,env.orderInd[card]
+                break
+    if mincard==0:#只有主牌，则从主牌里扣
+        return p.cards[p.lordDecor][-1]
+    return mincard
+
+def bidFun_random(env,p,round,allActList):#回调函数
+    n=len(allActList)
+    return random.randint(0,n-1)
+def firstPolicyFun_random(env,p,usedAct,allActList):#回调函数
+    n = len(allActList)
+    act_id = random.randint(0, n - 1)
+    # dfsPrintActList(allActList )
+    # dfsPrintActList(allActList[act_id])
+    # env.printAllInfo(p.id)
+    # print("")
+    return act_id
+def otherPolicyFun_random(env, p, usedAct, allActList):#回调函数
+    n = len(allActList)
+    act_id=random.randint(0, n - 1)
+    # dfsPrintActList(allActList )
+    # dfsPrintActList(allActList[act_id])
+    # env.printAllInfo(p.id)
+    # print("")
+    return act_id
+
+# def baselineAct(p,knowCards,cardsCnt,knowCards_seq,maxi,kind):#player_cards代表自己玩家的手牌，knowCards是已知前置位的牌,knowCards_seq代表第几个出
+#     #cardsCnt代表knowCards每组牌的数量
+#     #maxi是最大的玩家的位置，kind代表出的花色，级牌算主牌的花色
+#     #player_cards通过排序被编码成了4个颜色，每个颜色又分为3组分别代表；oneList,doubleList,traList
+#     #
+#     cardsMax=knowCards[maxi]
+#     player_cards=p.toSortCardsList1(env)
+#     if knowCards_seq==0:#先出
+#         #算跑了多少分，如果外置位分数大于50，则无脑出大的。外置位分数计算方式：200-自己的分数-底牌的分数(如果可见)-已经出去的别人的分数
+#         #如果外置位分数不大于50，随机尽量出小的，可以出分
+#         #
+#         pass
+#     elif knowCards_seq==1:#第二个出
+#         #无脑大过0号，否则随机跟小牌且尽量不跟分
+#         if len(cardsMax[2])>0:#敌方又拖拉机
+#             if len(player_cards[kind][2])==0:#我方没拖拉机
+#                 if len(player_cards[kind][1])==0:#我方没对子
+#                     if len(player_cards[kind][0])>=cardsCnt:#这一类花色有牌可出
+#                         cards=baselineAct_followSmall1()#跟小牌
+#                         return
+#
+#             # else:#个数大于它且比他多
+#         for a in cardsMax[2]:  # 看看有没有拖拉机
+#             pass
+#         pass
+#     elif knowCards_seq == 2:#第三个
+#         #如果1号大，就无脑大过1号
+#         #如果0号大，且0号是王或级牌或大于1张的甩牌拖拉机对子，就跟分，没有分跟小牌
+#         #如果0号大，且0号较小，无脑大过他
+#
+#         pass
+#     elif knowCards_seq == 3:#最后出牌，策略是：
+#         #如果我方大:就无脑跟分,能用分杀就用分杀,没有分就随机跟小牌;
+#         #如果敌方大:且没有分:能用分杀就用分杀，否则就随机跟小牌且尽量不跟分；
+#         #如果敌方大且有分:就尽量大过前面的，大不过就随机跟小牌且尽量不跟分
+#         pass
diff --git a/rlcard/games/tractor/example.py b/rlcard/games/tractor/example.py
@@ -0,0 +1,81 @@
+
+#使用该游戏环境的样例
+import random
+
+from DNQ.mygame.TractorArtifice.game_env.tractor_cheat import mkDeck, cheator1
+from baselinePolicy import baselineColdeck
+from tractor_action import dfsPrintActList
+from tractor_game import Tractor
+def bidFun(env,p,round,allActList):#回调函数
+    n=len(allActList)
+    return random.randint(0,n-1)
+def firstPolicyFun(env,p,usedAct,allActList):#回调函数
+    n = len(allActList)
+    act_id = random.randint(0, n - 1)
+    # dfsPrintActList(allActList )
+    # dfsPrintActList(allActList[act_id])
+    # env.printAllInfo(p.id)
+    # print("")
+    return act_id
+def otherPolicyFun(env, p, usedAct, allActList):#回调函数
+    n = len(allActList)
+    act_id=random.randint(0, n - 1)
+    # dfsPrintActList(allActList )
+    # dfsPrintActList(allActList[act_id])
+    # env.printAllInfo(p.id)
+    # print("")
+    return act_id
+def playAGame(env):#4个人双方随机游戏
+    # deck1=[39, 39, 23, 12, 26, 5, 53, 1, 38, 30, 46, 54, 48, 40, 36, 6, 28, 46, 26, 18, 7, 16, 2, 27, 5, 22, 20, 47, 41, 41, 34, 8, 3, 31, 30, 13, 16, 23, 15, 48, 13, 51, 4, 37, 44, 33, 25, 52, 34, 9, 37, 21, 3, 17, 50, 29, 24, 51, 49, 38, 35, 43, 24, 6, 18, 32, 22, 29, 7, 20, 11, 19, 15, 36, 14, 42, 27, 45, 14, 12, 50, 45, 52, 31, 11, 42, 40, 47, 33, 54, 32, 8, 28, 21, 10, 49, 9, 25, 53, 44, 1, 4, 17, 19, 10, 2, 35, 43]
+    # deck1, setDecor, setNum, setDealer=mkDeck(cheator1)
+    # env.dealCards(deck1,bidFun,(setDecor, setNum , setDealer))
+    env.dealCards(None, bidFun)
+    # env.printAllInfo()
+    env.setUnderCards(baselineColdeck)#换底牌，baselineColdeck是基于贪心的换底牌策略：无脑扣小排
+    env.printAllInfo()
+    dfsPrintActList(env.players[env.dealer].cards[env.lordDecor])
+    firstPlayerId=env.dealer
+    isTer=False
+    epoch=0
+    while(not isTer):#开始出牌
+        # env.printAllCards()
+        # print("轮次：",epoch,"  先出牌玩家：",firstPlayerId)
+        act4 = [None,None,None,None]
+        # print("先出玩家：", firstPlayerId)
+        act4[firstPlayerId] = env.firstPolicy(firstPlayerId,firstPolicyFun)#获取动作
+        # firstKind=env.getActKind(act4[firstPlayerId])
+
+        # print(env.players[firstPlayerId].cards_decorList)
+        # env.dfsPrintActList(sortCardList2[firstPlayerId])
+        # env.dfsPrintActList(allAct[firstPlayerId],printCmp)
+        # env.dfsPrintActList(act[firstPlayerId] )
+        # print(firstKind)
+        # act[firstPlayerId].println()
+        for i in range(1,4):
+            nextID=(firstPlayerId+i)%4
+            act4[nextID]= env.otherPolicy(act4,firstPlayerId,nextID,otherPolicyFun)
+
+        firstPlayerId,sc,isTer,endSc=env.step(act4,firstPlayerId)#评价谁赢，返回赢者id,本轮分数(双方都会得分)，isTer是游戏有木有结束
+        # reset
+        # env.printAllInfo(firstPlayerId,act4)
+        if isTer :
+            # env.printUnderCards()
+            sc=env.sumSc
+            winPlayer,playerId,grade=env.calcGameScore()#重置游戏，playerId==-1代表继续,否则代表先达到A的玩家。
+            print(sc,winPlayer)
+            isTer=playerId!=-1
+            return isTer,winPlayer,grade
+        epoch+=1
+    return -1,-1
+
+def train_game(trainMaxCnt):
+    env=Tractor()
+    for _ in range(trainMaxCnt):
+        env.reset_game()
+        while (True):#有先超过A的玩家就游戏结束
+            isTer,winPlayer,grade=playAGame(env)
+            # print(env.levelOfBoth)
+            if isTer:
+                break
+
+train_game(1)
diff --git a/rlcard/games/tractor/game.py b/rlcard/games/tractor/game.py
@@ -0,0 +1,67 @@
+import random
+
+from tractor_game import Tractor
+
+class TractorGame(Tractor):
+    def __init__(self):
+        super().__init__()  # 调用父类的初始化方法
+        self.allow_step_back = False
+        self.isBeginGame=False#是否是游戏开始阶段
+    # 以下是rlcard的通用方法：
+    def init_game(self):
+        playerId=0
+        if self.dealer!=-1:#不是-1说明是非初始阶段
+            winPlayer,playerId,grade=self.calcGameScore()
+        if playerId!=-1:
+            self.reset_game()
+            return [], self.currentPlayer
+        else:
+            return [], (self.dealer+1)%4
+
+    def get_num_players(self):
+        return 4
+
+    def get_num_actions(self):
+        return 25
+
+    def configure(self, game_config):
+        """
+        Specify some game specific parameters, such as number of players, initial chips, and dealer id.
+        If dealer_id is None, he will be randomly chosen
+        """
+        self.num_players = game_config['game_num_players']
+        # must have num_players length
+        self.init_chips = [game_config['chips_for_each']] * game_config["game_num_players"]
+        self.dealer_id = game_config['dealer_id']
+    def step(self,act):#step
+        next_state=0
+        player_id=0
+        if self.game_stage=="bid":
+            player_id=0
+            env.dealCards(None, bidFun)
+            '''
+            把回调拆开
+            '''
+
+        elif self.game_stage=="ambush":
+
+        elif self.game_stage == "play":
+
+
+        return next_state, player_id
+
+    def step_back(self):
+        pass
+    def is_over(self):
+        return self.isTer
+    def get_player_id(self):
+        #返回当前出牌的玩家id
+        return self.currentPlayer
+
+def bidFun(env,p,round,allActList):#回调函数
+    n=len(allActList)
+    return random.randint(0,n-1)
+env=TractorGame()
+env.reset_game()
+env.dealCards(None, bidFun)
+print(env.get_player_id())
diff --git a/rlcard/games/tractor/tractor_action.py b/rlcard/games/tractor/tractor_action.py
@@ -0,0 +1,143 @@
+import math
+import random
+from functools import cmp_to_key
+import numpy as np
+
+from tractor_gameUtil import getNum, fenInd, printCard, cardToString, INF, getKind
+
+
+class Action():
+    def __init__(self, one=[],double=[],playerId=-1):  # double里包含对子和拖拉机，如[[3,3],[4,4,5,5]]
+        self.one=one.copy()
+        self.double = double.copy()
+        self.len=len(one)
+        self.playerId=playerId
+        for dou in double:
+            self.len+=len(dou)
+    def add(self,one=[],double=[]):
+        for a in one:
+            self.one.append(a)
+        self.len += len(one)
+        for dou in double.copy():
+            self.double.append(dou)
+            self.len += len(dou)
+    def addOne(self,a):
+        self.one.append(a)
+        self.len += 1
+    def addDou(self,dou):
+        self.double.append(dou.copy())
+        self.len+=len(dou)
+    def setDou(self,i,dou):
+        if i<len(self.double):
+            self.len += len(dou) - len(self.double[i])
+            self.double[i]=dou
+        elif i==len(self.double):
+            self.double.append(dou)
+            self.len += len(dou)
+    def isCombination(self):#判断是否为甩牌
+        return len(self.one)+len(self.double)>1
+    def getDouleCnt(self):#返回对子数量
+        return (self.len-len(self.one))//2
+    def getDouleLen(self):#返回对子数组长度
+        return len(self.double)
+    def isSeq(self):#是否为甩牌
+        return len(self.double)+len(self.one)>1
+    def getFen(self):
+        sc=0
+        for dou in self.double:
+            for a in dou:
+                num = getNum(a)  # 点数，[1,13]王是14
+                sc += fenInd[num]  # 分数
+        for a in self.one:
+            num = getNum(a)  # 点数，[1,13]王是14
+            sc += fenInd[num]  # 分数
+        return sc
+    def print(self,i=0):
+        print("act"+str(i)+":",end="")
+        i=0
+        for dou in self.double:
+            for a in dou:
+                printCard(a,i)
+                i+=1
+        for a in self.one:
+            printCard(a,i)
+            i+=1
+    def println(self,i=0):
+        self.print(i)
+        print("")
+    def toString(self):
+        ans=""
+        for dou in self.double:
+            for a in dou:
+                ans+=cardToString(a)
+        for a in self.one:
+            ans+=cardToString(a)
+        return ans
+
+    def tolist(self):
+        li=self.one.copy()
+        for dou in self.double:
+            for a in dou:
+                li.append(a)
+        return li
+    def getKind(self,env):
+        if len(self.one)>0:
+            return getKind(self.one[0],env.lordDecor,env.lordNum)
+        return getKind(self.double[0][0], env.lordDecor, env.lordNum)
+    def sort(self,env):
+        self.one.sort(key=cmp_to_key(env._sortCardList_cmp1))
+        for dou in self.double:
+            dou.sort(key=cmp_to_key(env._sortCardList_cmp1))
+    def getMinCard(self,env):#返回最小的牌
+        mincard=0
+        minOrder=INF
+        for dou in self.double:
+            for a in dou:
+                if minOrder>env.orderInd[a]:
+                    minOrder =env.orderInd[a]
+                    mincard=a
+        for a in self.one:
+            if minOrder > env.orderInd[a]:
+                minOrder = env.orderInd[a]
+                mincard = a
+        return Action([a])
+
+def __dfsPrintActList(newLi, li0,printFun=None):#printFun是打印这张牌的条件
+    if isinstance(li0,np.ndarray):
+        n=li0.shape[0]
+    else:
+        n=len(li0)
+
+    for i in range(n):
+        if isinstance(li0[i],int) or isinstance(li0[i],np.int32) or isinstance(li0[i],np.int64):
+            if printFun==None or printFun(li0[i]):
+                newLi.append(cardToString(li0[i]))
+        elif isinstance(li0[i],Action):
+            if printFun==None or printFun(li0[i]):
+                newLi.append(li0[i].toString())
+        else:
+            t=[]
+            __dfsPrintActList(t,li0[i])
+            if printFun==None or printFun(t):
+                newLi.append(t)
+def dfsPrintActList(li,printFun=None):
+    newLi=[]
+    if isinstance(li,int) or isinstance(li,np.int32):
+        newLi.append(cardToString(li))
+    elif isinstance(li,Action):
+        newLi.append(li.toString())
+    else:
+        __dfsPrintActList(newLi,li,printFun)
+    print(newLi)
+
+def cardsListToAction(env,pid,cards):#cards只有一个类型
+    li=(env.sortCardList2(cards))
+    li=(env.sortCardList1(li[0], li[1], li[2]))
+    dou=[]
+    for a in li[1]:
+        dou.append([a,a])
+    for tra in li[2]:
+        dou.append([tra[i//2] for i in range(len(tra)*2)])
+    act = Action(li[0], dou, playerId=pid)
+    act.sort(env)
+    return act