From 25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1 Mon Sep 17 00:00:00 2001 From: InigoGutierrez Date: Thu, 17 Jun 2021 20:13:41 +0200 Subject: Engine now uses MCTS algorithm. --- imago/engine/core.py | 31 ++++------- imago/engine/monteCarlo.py | 119 +++++++++++++++++++++++++++++++++---------- imago/gameLogic/gameBoard.py | 10 ++++ imago/gameLogic/gameMove.py | 44 +++++++++++++--- imago/gameLogic/gameState.py | 6 ++- tests/test_monteCarlo.py | 26 ++++++---- 6 files changed, 170 insertions(+), 66 deletions(-) diff --git a/imago/engine/core.py b/imago/engine/core.py index 48af241..3e52f8c 100644 --- a/imago/engine/core.py +++ b/imago/engine/core.py @@ -3,6 +3,7 @@ from random import randrange from imago.data.enums import Player +from imago.engine.monteCarlo import MCTS from imago.gameLogic.gameState import GameState DEF_SIZE = 7 @@ -14,6 +15,7 @@ class GameEngine: def __init__(self): self.komi = DEF_KOMI self.gameState = GameState(DEF_SIZE) + self.mcts = MCTS(self.gameState.lastMove) def setBoardsize(self, newSize): """Changes the size of the board. @@ -47,30 +49,15 @@ class GameEngine: row = vertex[0] col = vertex[1] self.gameState.playMoveForPlayer(row, col, color) + self.mcts.forceNextMove(vertex) def genmove(self, color): - """The key of this TFG.""" - - # Get valid vertices to play at - validCells = [] - board = self.gameState.getBoard().board - size = self.gameState.size - for row in range(size): - for col in range(size): - if board[row][col] == Player.EMPTY: - # Don't play on eyes! - if ( self.gameState.getBoard().getGroupCellsCount(row, col) != 1 - and self.gameState.getBoard().isCellEye(row, col) == Player.EMPTY ): - validCells.append([row, col]) - # Pass if no valid vertices - # Select a random vertex - randIndex = randrange(0, len(validCells)) - move = validCells[randIndex] - self.gameState.playMoveForPlayer(move[0], move[1], color) - # NOTA: Esto usa gameState para hacer play, y en monteCarlo.py se usa GameMove - # para hacer add. Incoherente. Idealmente monteCarlo.py usaría un GameState en vez - # de acceder a los GameMove y gestionar un árbol... - return move + """Returns a list representing coordinates of the board in the form (row, col).""" + coords = self.mcts.pickMove().coords + #TODO: The move should NOT be played in its generation. This method is just for + #suggesting a move. + self.gameState.playMoveForPlayer(coords[0], coords[1], color) + return coords def undo(self): """The board configuration and number of captured stones are reset to the state diff --git a/imago/engine/monteCarlo.py b/imago/engine/monteCarlo.py index 6e5d54d..2a531b6 100644 --- a/imago/engine/monteCarlo.py +++ b/imago/engine/monteCarlo.py @@ -1,18 +1,49 @@ """Monte Carlo Tree Search module.""" +import sys +import random + +from imago.data.enums import Player + class MCTS: """Monte Carlo tree.""" - def __init__(self, root): - self.root = root + def __init__(self, move): + self.root = MCTSNode(move, None) - def selection(self): - """Select the most promising node with unexplored children.""" - bestNode = self.root.selectionRec(self.root) - return bestNode + def forceNextMove(self, coords): + """Selects given move as next move.""" + for node in self.root.children: + if (node.move.getRow() == coords[0] + and node.move.getCol() == coords[1]): + self.root = node + return + self.root = self.root.expansionForCoords(coords) - def backup(self, node): - """Update nodes backbards up to root.""" + def pickMove(self): + """ + Performs an exploratory cycle, updates the root to the best node and returns its + corresponding move.""" + #NOTE: with only one selection-expansion the match is + # completely random + for _ in range(5): + self.root.selection().expansion().simulation(10, 20) + self.root = self.selectBestNextNode() + return self.root.move + + def selectBestNextNode(self): + """Returns best ucb node available for the current player.""" + + # Assumes at least one expansion has occured + bestUCB = -sys.maxsize - 1 + bestNode = None + for node in self.root.children: + ucb = node.ucbForPlayer() + if ucb > bestUCB: + bestUCB = ucb + bestNode = node + + return bestNode class MCTSNode: """Monte Carlo tree node.""" @@ -28,56 +59,92 @@ class MCTSNode: def ucb(self): """Returns Upper Confidence Bound of node""" # UCB = meanVictories + 1/visits + if self.visits == 0: + return 0 mean = self.score / self.visits adjust = 1/self.visits return mean + adjust + def ucbForPlayer(self): + """ + Returns Upper Confidence Bound of node changing the symbol if the move is for the + wite player.""" + + # Account for white player score being negative + if self.move.getPlayer() == Player.WHITE: + return self.ucb() * -1 + return self.ucb() + + def selection(self): + """Select the most promising node with unexplored children.""" + bestNode = self.selectionRec(self) + return bestNode + def selectionRec(self, bestNode): """Searches this node and its children for the node with the best UCB value.""" # Check if node has unexplored children and better UCB than previously explored if len(self.unexploredVertices) > 0: - if self.ucb() > bestNode.ucb(): + if self.ucbForPlayer() > bestNode.ucbForPlayer(): bestNode = self # Recursively search children for better UCB for child in self.children: - bestNode = child.selectionRec(bestNode) + bestChildNode = child.selectionRec(bestNode) + if bestChildNode.ucbForPlayer() > bestNode.ucbForPlayer(): + bestNode = bestChildNode return bestNode def expansion(self): """Pick an unexplored vertex from this node and add it as a new MCTSNode.""" - newVertex = self.unexploredVertices.pop() # Random? - newMove = self.move.addMove(newVertex[0], newVertex[1]) + newVertex = random.choice(list(self.unexploredVertices)) + return self.expansionForCoords(newVertex) + + def expansionForCoords(self, coords): + """ + Adds a move for the given coordinates as a new node to the children of this + node.""" + newMove = self.move.addMove(coords[0], coords[1]) newNode = MCTSNode(newMove, self) self.children.add(newNode) + self.unexploredVertices.remove((coords[0], coords[1])) return newNode - def simulation(self): + def simulation(self, nMatches, scoreDiffHeur): """Play random matches to accumulate reward information on the node.""" - matches = 10 - for _ in range(matches): - result = self._randomMatch() + scoreAcc = 0 + for _ in range(nMatches): + result = self._randomMatch(scoreDiffHeur) self.visits += 1 scoreDiff = result[0]-result[1] - self.score += scoreDiff / abs(scoreDiff) - self._printBoardInfo() + if scoreDiff != 0: + scoreAcc += scoreDiff / abs(scoreDiff) + # Backup + node = self + while node is not None: + node.score += scoreAcc + node.visits += nMatches + node = node.parent - def _randomMatch(self): + def _randomMatch(self, scoreDiffHeur): """Play a random match and return the resulting score.""" #IMPORTANT: the score heuristic doesn't work for the first move of the game, since #the black player holds all except for one vertex! currentMove = self.move - scoreHeuristic = 15 score = currentMove.board.score() - while currentMove.getGameLength() < 5 or abs(score[0] - score[1]) < scoreHeuristic: - validMoves = currentMove.getPlayableVertices() - selectedMove = validMoves.pop() - currentMove = currentMove.addMove(selectedMove[0], selectedMove[1]) - print("Current move: %d, %d" % (currentMove.getRow(), currentMove.getCol())) - print("Current move game length: ", currentMove.getGameLength()) + while currentMove.getGameLength() < 5 or abs(score[0] - score[1]) < scoreDiffHeur: + if currentMove.isPass and currentMove.previousMove.isPass: + return score + sensibleMoves = currentMove.getSensibleVertices() + if len(sensibleMoves) == 0: + currentMove = currentMove.addPass() + else: + selectedMove = random.choice(list(sensibleMoves)) + currentMove = currentMove.addMoveByCoords(selectedMove) score = currentMove.board.score() + print("Current move: %s" % (currentMove.toString())) + print("Current move game length: ", currentMove.getGameLength()) print("Score of the board: %d, %d (%d)" % (score[0], score[1], diff --git a/imago/gameLogic/gameBoard.py b/imago/gameLogic/gameBoard.py index eaa1545..74098ed 100644 --- a/imago/gameLogic/gameBoard.py +++ b/imago/gameLogic/gameBoard.py @@ -226,6 +226,16 @@ class GameBoard: return False, "Illegal by ko rule." return True, "" + def isSensible(self, row, col, player, prevBoards): + """Determines if a move is playable and sensible.""" + playable, playableText = self.isPlayable(row, col, player, prevBoards) + if not playable: + return playable, playableText + if ( self.getGroupCellsCount(row, col) == 1 + and self.isCellEye(row, col) == player ): + return False, "Move fills own eye.""" + return True, "" + def score(self): """Gets the current score given by the already surrounded territory for Japanese rules. The format of the returned score is (black, white). diff --git a/imago/gameLogic/gameMove.py b/imago/gameLogic/gameMove.py index 224e120..6d2d464 100644 --- a/imago/gameLogic/gameMove.py +++ b/imago/gameLogic/gameMove.py @@ -7,12 +7,13 @@ class GameMove: Tree: the board can be empty, or the move can consist of more than one added or removed stones.""" - def __init__(self, board, coords=None, isPass=False): + def __init__(self, board, coords=None, isPass=False, playerWhoPassed=None): self.board = board self.nextMoves = [] self.previousMove = None self.isPass = isPass self.coords = coords + self.playerWhoPassed = playerWhoPassed def getRow(self): """Returns the row of the vertex the move was played on.""" @@ -31,7 +32,7 @@ class GameMove: if self.isPass: if self.previousMove is None: return Player.BLACK - return Player.otherPlayer(self.previousMove.getPlayer()) + return self.playerWhoPassed if self.coords is None: # Not pass and no coordinates: root move of the tree return Player.EMPTY @@ -65,15 +66,30 @@ class GameMove: def getPlayableVertices(self): """Returns a set with the playable vertices.""" - playables = set() + return self._getVerticesByFilter(self.board.isPlayable) + + def getSensibleVertices(self): + """Returns a set with the sensible vertices.""" + return self._getVerticesByFilter(self.board.isSensible) + + def _getVerticesByFilter(self, filterFunction): + """Returns a set with the vertices which fill a requirement.""" + vertices = set() player = self.getNextPlayer() prevBoards = self.getThisAndPrevBoards() for row in range(self.board.getBoardHeight()): for col in range(self.board.getBoardWidth()): - isPlayable, _ = self.board.isPlayable(row, col, player, prevBoards) - if isPlayable: - playables.add((row, col)) - return playables + valid, _ = filterFunction(row, col, player, prevBoards) + if valid: + vertices.add((row, col)) + return vertices + + def addMoveByCoords(self, coords): + """Adds a move to the next moves list creating its board from this move's board + plus a new stone at the specified coordinates. + """ + return self.addMove(coords[0], coords[1]) + def addMove(self, row, col): """Adds a move to the next moves list creating its board from this move's board @@ -98,12 +114,24 @@ class GameMove: def addPass(self): """Adds a pass move to the next moves list.""" + return self.addPassForPlayer(self.getNextPlayer()) + + def addPassForPlayer(self, player): + """Adds a pass move for the given player to the next moves list.""" newBoard = self.board.getDeepCopy() - newMove = GameMove(newBoard, True) + newMove = GameMove(newBoard, isPass=True, playerWhoPassed=player) newMove.previousMove = self self.nextMoves.append(newMove) return newMove + def toString(self): + """Returns the coordinates of the move as a string.""" + if self.isPass: + return "Pass" + if self.coords is None: + return "Root move" + return "(%d, %d)" % (self.getRow(), self.getCol()) + def printBoard(self): """Prints the board as of this move.""" self.board.printBoard() diff --git a/imago/gameLogic/gameState.py b/imago/gameLogic/gameState.py index 55bac5f..d596ba1 100644 --- a/imago/gameLogic/gameState.py +++ b/imago/gameLogic/gameState.py @@ -49,9 +49,13 @@ class GameState: return False def playPass(self): - """Passes the turn for the given player.""" + """Passes the turn for the player who should make a move.""" self.lastMove.addPass() + def passForPlayer(self, player): + """Passes the turn for the given player.""" + self.lastMove.addPassForPlayer(player) + def undo(self): """Sets the move before the last move as the new last move.""" self.lastMove = self.lastMove.previousMove diff --git a/tests/test_monteCarlo.py b/tests/test_monteCarlo.py index 8fe6f47..283c657 100644 --- a/tests/test_monteCarlo.py +++ b/tests/test_monteCarlo.py @@ -2,21 +2,29 @@ import unittest -from imago.gameLogic.gameBoard import GameBoard -from imago.gameLogic.gameMove import GameMove +from imago.gameLogic.gameState import GameState +from imago.engine.monteCarlo import MCTS from imago.engine.monteCarlo import MCTSNode -TEST_BOARD_SIZE = 19 +TEST_BOARD_SIZE = 9 class TestMonteCarlo(unittest.TestCase): """Test MonteCarlo algorithm.""" - def testSimulation(self): - """Test calculation of group liberties.""" - board = GameBoard(TEST_BOARD_SIZE, TEST_BOARD_SIZE) - move = GameMove(board) - node = MCTSNode(move, None) - node.simulation() + def testPickMove(self): + """Test picking a move.""" + state = GameState(TEST_BOARD_SIZE) + tree = MCTS(state) + print(tree.pickMove().toString()) + + #def testSimulation(self): + # """Test calculation of group liberties.""" + # board = GameBoard(TEST_BOARD_SIZE, TEST_BOARD_SIZE) + # move = GameMove(board) + # node = MCTSNode(move, None) + # nMatches = 100 + # scoreDiffHeur = 15 + # node.simulation(nMatches, scoreDiffHeur) if __name__ == '__main__': unittest.main() -- cgit v1.2.1