Engine now uses MCTS algorithm.

author: InigoGutierrez <inigogf.95@gmail.com> 2021-06-17 20:13:41 +0200
committer: InigoGutierrez <inigogf.95@gmail.com> 2021-06-17 20:13:41 +0200
commit: 25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1 (patch)
tree: fc45d496d5a3b961ceaedd026afe8bb1f8c54994
parent: 4a39a8fd07e49db5feb0c403b784423f0b673f97 (diff)
download: imago-25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1.tar.gz
imago-25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1.zip
6 files changed, 170 insertions, 66 deletions
diff --git a/imago/engine/core.py b/imago/engine/core.py
index 48af241..3e52f8c 100644
--- a/imago/engine/core.py
+++ b/imago/engine/core.py
@@ -3,6 +3,7 @@
 from random import randrange
 
 from imago.data.enums import Player
+from imago.engine.monteCarlo import MCTS
 from imago.gameLogic.gameState import GameState
 
 DEF_SIZE = 7
@@ -14,6 +15,7 @@ class GameEngine:
     def __init__(self):
         self.komi = DEF_KOMI
         self.gameState = GameState(DEF_SIZE)
+        self.mcts = MCTS(self.gameState.lastMove)
 
     def setBoardsize(self, newSize):
         """Changes the size of the board.
@@ -47,30 +49,15 @@ class GameEngine:
         row = vertex[0]
         col = vertex[1]
         self.gameState.playMoveForPlayer(row, col, color)
+        self.mcts.forceNextMove(vertex)
 
     def genmove(self, color):
-        """The key of this TFG."""
-
-        # Get valid vertices to play at
-        validCells = []
-        board = self.gameState.getBoard().board
-        size = self.gameState.size
-        for row in range(size):
-            for col in range(size):
-                if board[row][col] == Player.EMPTY:
-                    # Don't play on eyes!
-                    if ( self.gameState.getBoard().getGroupCellsCount(row, col) != 1
-                        and self.gameState.getBoard().isCellEye(row, col) == Player.EMPTY ):
-                        validCells.append([row, col])
-        # Pass if no valid vertices
-        # Select a random vertex
-        randIndex = randrange(0, len(validCells))
-        move = validCells[randIndex]
-        self.gameState.playMoveForPlayer(move[0], move[1], color)
-        # NOTA: Esto usa gameState para hacer play, y en monteCarlo.py se usa GameMove
-        # para hacer add. Incoherente. Idealmente monteCarlo.py usaría un GameState en vez
-        # de acceder a los GameMove y gestionar un árbol...
-        return move
+        """Returns a list representing coordinates of the board in the form (row, col)."""
+        coords = self.mcts.pickMove().coords
+        #TODO: The move should NOT be played in its generation. This method is just for
+        #suggesting a move.
+        self.gameState.playMoveForPlayer(coords[0], coords[1], color)
+        return coords
 
     def undo(self):
         """The board configuration and number of captured stones are reset to the state
diff --git a/imago/engine/monteCarlo.py b/imago/engine/monteCarlo.py
index 6e5d54d..2a531b6 100644
--- a/imago/engine/monteCarlo.py
+++ b/imago/engine/monteCarlo.py
@@ -1,18 +1,49 @@
 """Monte Carlo Tree Search module."""
 
+import sys
+import random
+
+from imago.data.enums import Player
+
 class MCTS:
     """Monte Carlo tree."""
 
-    def __init__(self, root):
-        self.root = root
+    def __init__(self, move):
+        self.root = MCTSNode(move, None)
 
-    def selection(self):
-        """Select the most promising node with unexplored children."""
-        bestNode = self.root.selectionRec(self.root)
-        return bestNode
+    def forceNextMove(self, coords):
+        """Selects given move as next move."""
+        for node in self.root.children:
+            if (node.move.getRow() == coords[0]
+                and node.move.getCol() == coords[1]):
+                self.root = node
+                return
+        self.root = self.root.expansionForCoords(coords)
 
-    def backup(self, node):
-        """Update nodes backbards up to root."""
+    def pickMove(self):
+        """
+        Performs an exploratory cycle, updates the root to the best node and returns its
+        corresponding move."""
+        #NOTE: with only one selection-expansion the match is
+        #      completely random
+        for _ in range(5):
+            self.root.selection().expansion().simulation(10, 20)
+        self.root = self.selectBestNextNode()
+        return self.root.move
+
+    def selectBestNextNode(self):
+        """Returns best ucb node available for the current player."""
+
+        # Assumes at least one expansion has occured
+        bestUCB = -sys.maxsize - 1
+        bestNode = None
+        for node in self.root.children:
+            ucb = node.ucbForPlayer()
+            if ucb > bestUCB:
+                bestUCB = ucb
+                bestNode = node
+
+        return bestNode
 
 class MCTSNode:
     """Monte Carlo tree node."""
@@ -28,56 +59,92 @@ class MCTSNode:
     def ucb(self):
         """Returns Upper Confidence Bound of node"""
         # UCB = meanVictories + 1/visits
+        if self.visits == 0:
+            return 0
         mean = self.score / self.visits
         adjust = 1/self.visits
         return mean + adjust
 
+    def ucbForPlayer(self):
+        """
+        Returns Upper Confidence Bound of node changing the symbol if the move is for the
+        wite player."""
+
+        # Account for white player score being negative
+        if self.move.getPlayer() == Player.WHITE:
+            return self.ucb() * -1
+        return self.ucb()
+
+    def selection(self):
+        """Select the most promising node with unexplored children."""
+        bestNode = self.selectionRec(self)
+        return bestNode
+
     def selectionRec(self, bestNode):
         """Searches this node and its children for the node with the best UCB value."""
 
         # Check if node has unexplored children and better UCB than previously explored
         if len(self.unexploredVertices) > 0:
-            if self.ucb() > bestNode.ucb():
+            if self.ucbForPlayer() > bestNode.ucbForPlayer():
                 bestNode = self
 
         # Recursively search children for better UCB
         for child in self.children:
-            bestNode = child.selectionRec(bestNode)
+            bestChildNode = child.selectionRec(bestNode)
+            if bestChildNode.ucbForPlayer() > bestNode.ucbForPlayer():
+                bestNode = bestChildNode
 
         return bestNode
 
     def expansion(self):
         """Pick an unexplored vertex from this node and add it as a new MCTSNode."""
-        newVertex = self.unexploredVertices.pop() # Random?
-        newMove = self.move.addMove(newVertex[0], newVertex[1])
+        newVertex = random.choice(list(self.unexploredVertices))
+        return self.expansionForCoords(newVertex)
+
+    def expansionForCoords(self, coords):
+        """
+        Adds a move for the given coordinates as a new node to the children of this
+        node."""
+        newMove = self.move.addMove(coords[0], coords[1])
         newNode = MCTSNode(newMove, self)
         self.children.add(newNode)
+        self.unexploredVertices.remove((coords[0], coords[1]))
         return newNode
 
-    def simulation(self):
+    def simulation(self, nMatches, scoreDiffHeur):
         """Play random matches to accumulate reward information on the node."""
-        matches = 10
-        for _ in range(matches):
-            result = self._randomMatch()
+        scoreAcc = 0
+        for _ in range(nMatches):
+            result = self._randomMatch(scoreDiffHeur)
             self.visits += 1
             scoreDiff = result[0]-result[1]
-            self.score += scoreDiff / abs(scoreDiff)
-            self._printBoardInfo()
+            if scoreDiff != 0:
+                scoreAcc += scoreDiff / abs(scoreDiff)
+        # Backup
+        node = self
+        while node is not None:
+            node.score += scoreAcc
+            node.visits += nMatches
+            node = node.parent
 
-    def _randomMatch(self):
+    def _randomMatch(self, scoreDiffHeur):
         """Play a random match and return the resulting score."""
         #IMPORTANT: the score heuristic doesn't work for the first move of the game, since
         #the black player holds all except for one vertex!
         currentMove = self.move
-        scoreHeuristic = 15
         score = currentMove.board.score()
-        while currentMove.getGameLength() < 5 or abs(score[0] - score[1]) < scoreHeuristic:
-            validMoves = currentMove.getPlayableVertices()
-            selectedMove = validMoves.pop()
-            currentMove = currentMove.addMove(selectedMove[0], selectedMove[1])
-            print("Current move: %d, %d" % (currentMove.getRow(), currentMove.getCol()))
-            print("Current move game length: ", currentMove.getGameLength())
+        while currentMove.getGameLength() < 5 or abs(score[0] - score[1]) < scoreDiffHeur:
+            if currentMove.isPass and currentMove.previousMove.isPass:
+                return score
+            sensibleMoves = currentMove.getSensibleVertices()
+            if len(sensibleMoves) == 0:
+                currentMove = currentMove.addPass()
+            else:
+                selectedMove = random.choice(list(sensibleMoves))
+                currentMove = currentMove.addMoveByCoords(selectedMove)
             score = currentMove.board.score()
+            print("Current move: %s" % (currentMove.toString()))
+            print("Current move game length: ", currentMove.getGameLength())
             print("Score of the board: %d, %d (%d)"
                     % (score[0],
                         score[1],
diff --git a/imago/gameLogic/gameBoard.py b/imago/gameLogic/gameBoard.py
index eaa1545..74098ed 100644
--- a/imago/gameLogic/gameBoard.py
+++ b/imago/gameLogic/gameBoard.py
@@ -226,6 +226,16 @@ class GameBoard:
             return False, "Illegal by ko rule."
         return True, ""
 
+    def isSensible(self, row, col, player, prevBoards):
+        """Determines if a move is playable and sensible."""
+        playable, playableText = self.isPlayable(row, col, player, prevBoards)
+        if not playable:
+            return playable, playableText
+        if ( self.getGroupCellsCount(row, col) == 1
+            and self.isCellEye(row, col) == player ):
+            return False, "Move fills own eye."""
+        return True, ""
+
     def score(self):
         """Gets the current score given by the already surrounded territory for Japanese
         rules. The format of the returned score is (black, white).
diff --git a/imago/gameLogic/gameMove.py b/imago/gameLogic/gameMove.py
index 224e120..6d2d464 100644
--- a/imago/gameLogic/gameMove.py
+++ b/imago/gameLogic/gameMove.py
@@ -7,12 +7,13 @@ class GameMove:
     Tree: the board can be empty, or the move can consist of more than one added or
     removed stones."""
 
-    def __init__(self, board, coords=None, isPass=False):
+    def __init__(self, board, coords=None, isPass=False, playerWhoPassed=None):
         self.board = board
         self.nextMoves = []
         self.previousMove = None
         self.isPass = isPass
         self.coords = coords
+        self.playerWhoPassed = playerWhoPassed
 
     def getRow(self):
         """Returns the row of the vertex the move was played on."""
@@ -31,7 +32,7 @@ class GameMove:
         if self.isPass:
             if self.previousMove is None:
                 return Player.BLACK
-            return Player.otherPlayer(self.previousMove.getPlayer())
+            return self.playerWhoPassed
 
         if self.coords is None: # Not pass and no coordinates: root move of the tree
             return Player.EMPTY
@@ -65,15 +66,30 @@ class GameMove:
 
     def getPlayableVertices(self):
         """Returns a set with the playable vertices."""
-        playables = set()
+        return self._getVerticesByFilter(self.board.isPlayable)
+
+    def getSensibleVertices(self):
+        """Returns a set with the sensible vertices."""
+        return self._getVerticesByFilter(self.board.isSensible)
+
+    def _getVerticesByFilter(self, filterFunction):
+        """Returns a set with the vertices which fill a requirement."""
+        vertices = set()
         player = self.getNextPlayer()
         prevBoards = self.getThisAndPrevBoards()
         for row in range(self.board.getBoardHeight()):
             for col in range(self.board.getBoardWidth()):
-                isPlayable, _ = self.board.isPlayable(row, col, player, prevBoards)
-                if isPlayable:
-                    playables.add((row, col))
-        return playables
+                valid, _ = filterFunction(row, col, player, prevBoards)
+                if valid:
+                    vertices.add((row, col))
+        return vertices
+
+    def addMoveByCoords(self, coords):
+        """Adds a move to the next moves list creating its board from this move's board
+        plus a new stone at the specified coordinates.
+        """
+        return self.addMove(coords[0], coords[1])
+
 
     def addMove(self, row, col):
         """Adds a move to the next moves list creating its board from this move's board
@@ -98,12 +114,24 @@ class GameMove:
 
     def addPass(self):
         """Adds a pass move to the next moves list."""
+        return self.addPassForPlayer(self.getNextPlayer())
+
+    def addPassForPlayer(self, player):
+        """Adds a pass move for the given player to the next moves list."""
         newBoard = self.board.getDeepCopy()
-        newMove = GameMove(newBoard, True)
+        newMove = GameMove(newBoard, isPass=True, playerWhoPassed=player)
         newMove.previousMove = self
         self.nextMoves.append(newMove)
         return newMove
 
+    def toString(self):
+        """Returns the coordinates of the move as a string."""
+        if self.isPass:
+            return "Pass"
+        if self.coords is None:
+            return "Root move"
+        return "(%d, %d)" % (self.getRow(), self.getCol())
+
     def printBoard(self):
         """Prints the board as of this move."""
         self.board.printBoard()
diff --git a/imago/gameLogic/gameState.py b/imago/gameLogic/gameState.py
index 55bac5f..d596ba1 100644
--- a/imago/gameLogic/gameState.py
+++ b/imago/gameLogic/gameState.py
@@ -49,9 +49,13 @@ class GameState:
         return False
 
     def playPass(self):
-        """Passes the turn for the given player."""
+        """Passes the turn for the player who should make a move."""
         self.lastMove.addPass()
 
+    def passForPlayer(self, player):
+        """Passes the turn for the given player."""
+        self.lastMove.addPassForPlayer(player)
+
     def undo(self):
         """Sets the move before the last move as the new last move."""
         self.lastMove = self.lastMove.previousMove
diff --git a/tests/test_monteCarlo.py b/tests/test_monteCarlo.py
index 8fe6f47..283c657 100644
--- a/tests/test_monteCarlo.py
+++ b/tests/test_monteCarlo.py
@@ -2,21 +2,29 @@
 
 import unittest
 
-from imago.gameLogic.gameBoard import GameBoard
-from imago.gameLogic.gameMove import GameMove
+from imago.gameLogic.gameState import GameState
+from imago.engine.monteCarlo import MCTS
 from imago.engine.monteCarlo import MCTSNode
 
-TEST_BOARD_SIZE = 19
+TEST_BOARD_SIZE = 9
 
 class TestMonteCarlo(unittest.TestCase):
     """Test MonteCarlo algorithm."""
 
-    def testSimulation(self):
-        """Test calculation of group liberties."""
-        board = GameBoard(TEST_BOARD_SIZE, TEST_BOARD_SIZE)
-        move = GameMove(board)
-        node = MCTSNode(move, None)
-        node.simulation()
+    def testPickMove(self):
+        """Test picking a move."""
+        state = GameState(TEST_BOARD_SIZE)
+        tree = MCTS(state)
+        print(tree.pickMove().toString())
+
+    #def testSimulation(self):
+    #    """Test calculation of group liberties."""
+    #    board = GameBoard(TEST_BOARD_SIZE, TEST_BOARD_SIZE)
+    #    move = GameMove(board)
+    #    node = MCTSNode(move, None)
+    #    nMatches = 100
+    #    scoreDiffHeur = 15
+    #    node.simulation(nMatches, scoreDiffHeur)
 
 if __name__ == '__main__':
     unittest.main()
author	InigoGutierrez <inigogf.95@gmail.com>	2021-06-17 20:13:41 +0200
committer	InigoGutierrez <inigogf.95@gmail.com>	2021-06-17 20:13:41 +0200
commit	25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1 (patch)
tree	fc45d496d5a3b961ceaedd026afe8bb1f8c54994
parent	4a39a8fd07e49db5feb0c403b784423f0b673f97 (diff)
download	imago-25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1.tar.gz imago-25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1.zip