aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorInigoGutierrez <inigogf.95@gmail.com>2021-06-17 20:13:41 +0200
committerInigoGutierrez <inigogf.95@gmail.com>2021-06-17 20:13:41 +0200
commit25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1 (patch)
treefc45d496d5a3b961ceaedd026afe8bb1f8c54994
parent4a39a8fd07e49db5feb0c403b784423f0b673f97 (diff)
downloadimago-25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1.tar.gz
imago-25e4e5fb3a77c8da65a2e7b42b7d77ba8f1b21a1.zip
Engine now uses MCTS algorithm.
-rw-r--r--imago/engine/core.py31
-rw-r--r--imago/engine/monteCarlo.py119
-rw-r--r--imago/gameLogic/gameBoard.py10
-rw-r--r--imago/gameLogic/gameMove.py44
-rw-r--r--imago/gameLogic/gameState.py6
-rw-r--r--tests/test_monteCarlo.py26
6 files changed, 170 insertions, 66 deletions
diff --git a/imago/engine/core.py b/imago/engine/core.py
index 48af241..3e52f8c 100644
--- a/imago/engine/core.py
+++ b/imago/engine/core.py
@@ -3,6 +3,7 @@
from random import randrange
from imago.data.enums import Player
+from imago.engine.monteCarlo import MCTS
from imago.gameLogic.gameState import GameState
DEF_SIZE = 7
@@ -14,6 +15,7 @@ class GameEngine:
def __init__(self):
self.komi = DEF_KOMI
self.gameState = GameState(DEF_SIZE)
+ self.mcts = MCTS(self.gameState.lastMove)
def setBoardsize(self, newSize):
"""Changes the size of the board.
@@ -47,30 +49,15 @@ class GameEngine:
row = vertex[0]
col = vertex[1]
self.gameState.playMoveForPlayer(row, col, color)
+ self.mcts.forceNextMove(vertex)
def genmove(self, color):
- """The key of this TFG."""
-
- # Get valid vertices to play at
- validCells = []
- board = self.gameState.getBoard().board
- size = self.gameState.size
- for row in range(size):
- for col in range(size):
- if board[row][col] == Player.EMPTY:
- # Don't play on eyes!
- if ( self.gameState.getBoard().getGroupCellsCount(row, col) != 1
- and self.gameState.getBoard().isCellEye(row, col) == Player.EMPTY ):
- validCells.append([row, col])
- # Pass if no valid vertices
- # Select a random vertex
- randIndex = randrange(0, len(validCells))
- move = validCells[randIndex]
- self.gameState.playMoveForPlayer(move[0], move[1], color)
- # NOTA: Esto usa gameState para hacer play, y en monteCarlo.py se usa GameMove
- # para hacer add. Incoherente. Idealmente monteCarlo.py usaría un GameState en vez
- # de acceder a los GameMove y gestionar un árbol...
- return move
+ """Returns a list representing coordinates of the board in the form (row, col)."""
+ coords = self.mcts.pickMove().coords
+ #TODO: The move should NOT be played in its generation. This method is just for
+ #suggesting a move.
+ self.gameState.playMoveForPlayer(coords[0], coords[1], color)
+ return coords
def undo(self):
"""The board configuration and number of captured stones are reset to the state
diff --git a/imago/engine/monteCarlo.py b/imago/engine/monteCarlo.py
index 6e5d54d..2a531b6 100644
--- a/imago/engine/monteCarlo.py
+++ b/imago/engine/monteCarlo.py
@@ -1,18 +1,49 @@
"""Monte Carlo Tree Search module."""
+import sys
+import random
+
+from imago.data.enums import Player
+
class MCTS:
"""Monte Carlo tree."""
- def __init__(self, root):
- self.root = root
+ def __init__(self, move):
+ self.root = MCTSNode(move, None)
- def selection(self):
- """Select the most promising node with unexplored children."""
- bestNode = self.root.selectionRec(self.root)
- return bestNode
+ def forceNextMove(self, coords):
+ """Selects given move as next move."""
+ for node in self.root.children:
+ if (node.move.getRow() == coords[0]
+ and node.move.getCol() == coords[1]):
+ self.root = node
+ return
+ self.root = self.root.expansionForCoords(coords)
- def backup(self, node):
- """Update nodes backbards up to root."""
+ def pickMove(self):
+ """
+ Performs an exploratory cycle, updates the root to the best node and returns its
+ corresponding move."""
+ #NOTE: with only one selection-expansion the match is
+ # completely random
+ for _ in range(5):
+ self.root.selection().expansion().simulation(10, 20)
+ self.root = self.selectBestNextNode()
+ return self.root.move
+
+ def selectBestNextNode(self):
+ """Returns best ucb node available for the current player."""
+
+ # Assumes at least one expansion has occured
+ bestUCB = -sys.maxsize - 1
+ bestNode = None
+ for node in self.root.children:
+ ucb = node.ucbForPlayer()
+ if ucb > bestUCB:
+ bestUCB = ucb
+ bestNode = node
+
+ return bestNode
class MCTSNode:
"""Monte Carlo tree node."""
@@ -28,56 +59,92 @@ class MCTSNode:
def ucb(self):
"""Returns Upper Confidence Bound of node"""
# UCB = meanVictories + 1/visits
+ if self.visits == 0:
+ return 0
mean = self.score / self.visits
adjust = 1/self.visits
return mean + adjust
+ def ucbForPlayer(self):
+ """
+ Returns Upper Confidence Bound of node changing the symbol if the move is for the
+ wite player."""
+
+ # Account for white player score being negative
+ if self.move.getPlayer() == Player.WHITE:
+ return self.ucb() * -1
+ return self.ucb()
+
+ def selection(self):
+ """Select the most promising node with unexplored children."""
+ bestNode = self.selectionRec(self)
+ return bestNode
+
def selectionRec(self, bestNode):
"""Searches this node and its children for the node with the best UCB value."""
# Check if node has unexplored children and better UCB than previously explored
if len(self.unexploredVertices) > 0:
- if self.ucb() > bestNode.ucb():
+ if self.ucbForPlayer() > bestNode.ucbForPlayer():
bestNode = self
# Recursively search children for better UCB
for child in self.children:
- bestNode = child.selectionRec(bestNode)
+ bestChildNode = child.selectionRec(bestNode)
+ if bestChildNode.ucbForPlayer() > bestNode.ucbForPlayer():
+ bestNode = bestChildNode
return bestNode
def expansion(self):
"""Pick an unexplored vertex from this node and add it as a new MCTSNode."""
- newVertex = self.unexploredVertices.pop() # Random?
- newMove = self.move.addMove(newVertex[0], newVertex[1])
+ newVertex = random.choice(list(self.unexploredVertices))
+ return self.expansionForCoords(newVertex)
+
+ def expansionForCoords(self, coords):
+ """
+ Adds a move for the given coordinates as a new node to the children of this
+ node."""
+ newMove = self.move.addMove(coords[0], coords[1])
newNode = MCTSNode(newMove, self)
self.children.add(newNode)
+ self.unexploredVertices.remove((coords[0], coords[1]))
return newNode
- def simulation(self):
+ def simulation(self, nMatches, scoreDiffHeur):
"""Play random matches to accumulate reward information on the node."""
- matches = 10
- for _ in range(matches):
- result = self._randomMatch()
+ scoreAcc = 0
+ for _ in range(nMatches):
+ result = self._randomMatch(scoreDiffHeur)
self.visits += 1
scoreDiff = result[0]-result[1]
- self.score += scoreDiff / abs(scoreDiff)
- self._printBoardInfo()
+ if scoreDiff != 0:
+ scoreAcc += scoreDiff / abs(scoreDiff)
+ # Backup
+ node = self
+ while node is not None:
+ node.score += scoreAcc
+ node.visits += nMatches
+ node = node.parent
- def _randomMatch(self):
+ def _randomMatch(self, scoreDiffHeur):
"""Play a random match and return the resulting score."""
#IMPORTANT: the score heuristic doesn't work for the first move of the game, since
#the black player holds all except for one vertex!
currentMove = self.move
- scoreHeuristic = 15
score = currentMove.board.score()
- while currentMove.getGameLength() < 5 or abs(score[0] - score[1]) < scoreHeuristic:
- validMoves = currentMove.getPlayableVertices()
- selectedMove = validMoves.pop()
- currentMove = currentMove.addMove(selectedMove[0], selectedMove[1])
- print("Current move: %d, %d" % (currentMove.getRow(), currentMove.getCol()))
- print("Current move game length: ", currentMove.getGameLength())
+ while currentMove.getGameLength() < 5 or abs(score[0] - score[1]) < scoreDiffHeur:
+ if currentMove.isPass and currentMove.previousMove.isPass:
+ return score
+ sensibleMoves = currentMove.getSensibleVertices()
+ if len(sensibleMoves) == 0:
+ currentMove = currentMove.addPass()
+ else:
+ selectedMove = random.choice(list(sensibleMoves))
+ currentMove = currentMove.addMoveByCoords(selectedMove)
score = currentMove.board.score()
+ print("Current move: %s" % (currentMove.toString()))
+ print("Current move game length: ", currentMove.getGameLength())
print("Score of the board: %d, %d (%d)"
% (score[0],
score[1],
diff --git a/imago/gameLogic/gameBoard.py b/imago/gameLogic/gameBoard.py
index eaa1545..74098ed 100644
--- a/imago/gameLogic/gameBoard.py
+++ b/imago/gameLogic/gameBoard.py
@@ -226,6 +226,16 @@ class GameBoard:
return False, "Illegal by ko rule."
return True, ""
+ def isSensible(self, row, col, player, prevBoards):
+ """Determines if a move is playable and sensible."""
+ playable, playableText = self.isPlayable(row, col, player, prevBoards)
+ if not playable:
+ return playable, playableText
+ if ( self.getGroupCellsCount(row, col) == 1
+ and self.isCellEye(row, col) == player ):
+ return False, "Move fills own eye."""
+ return True, ""
+
def score(self):
"""Gets the current score given by the already surrounded territory for Japanese
rules. The format of the returned score is (black, white).
diff --git a/imago/gameLogic/gameMove.py b/imago/gameLogic/gameMove.py
index 224e120..6d2d464 100644
--- a/imago/gameLogic/gameMove.py
+++ b/imago/gameLogic/gameMove.py
@@ -7,12 +7,13 @@ class GameMove:
Tree: the board can be empty, or the move can consist of more than one added or
removed stones."""
- def __init__(self, board, coords=None, isPass=False):
+ def __init__(self, board, coords=None, isPass=False, playerWhoPassed=None):
self.board = board
self.nextMoves = []
self.previousMove = None
self.isPass = isPass
self.coords = coords
+ self.playerWhoPassed = playerWhoPassed
def getRow(self):
"""Returns the row of the vertex the move was played on."""
@@ -31,7 +32,7 @@ class GameMove:
if self.isPass:
if self.previousMove is None:
return Player.BLACK
- return Player.otherPlayer(self.previousMove.getPlayer())
+ return self.playerWhoPassed
if self.coords is None: # Not pass and no coordinates: root move of the tree
return Player.EMPTY
@@ -65,15 +66,30 @@ class GameMove:
def getPlayableVertices(self):
"""Returns a set with the playable vertices."""
- playables = set()
+ return self._getVerticesByFilter(self.board.isPlayable)
+
+ def getSensibleVertices(self):
+ """Returns a set with the sensible vertices."""
+ return self._getVerticesByFilter(self.board.isSensible)
+
+ def _getVerticesByFilter(self, filterFunction):
+ """Returns a set with the vertices which fill a requirement."""
+ vertices = set()
player = self.getNextPlayer()
prevBoards = self.getThisAndPrevBoards()
for row in range(self.board.getBoardHeight()):
for col in range(self.board.getBoardWidth()):
- isPlayable, _ = self.board.isPlayable(row, col, player, prevBoards)
- if isPlayable:
- playables.add((row, col))
- return playables
+ valid, _ = filterFunction(row, col, player, prevBoards)
+ if valid:
+ vertices.add((row, col))
+ return vertices
+
+ def addMoveByCoords(self, coords):
+ """Adds a move to the next moves list creating its board from this move's board
+ plus a new stone at the specified coordinates.
+ """
+ return self.addMove(coords[0], coords[1])
+
def addMove(self, row, col):
"""Adds a move to the next moves list creating its board from this move's board
@@ -98,12 +114,24 @@ class GameMove:
def addPass(self):
"""Adds a pass move to the next moves list."""
+ return self.addPassForPlayer(self.getNextPlayer())
+
+ def addPassForPlayer(self, player):
+ """Adds a pass move for the given player to the next moves list."""
newBoard = self.board.getDeepCopy()
- newMove = GameMove(newBoard, True)
+ newMove = GameMove(newBoard, isPass=True, playerWhoPassed=player)
newMove.previousMove = self
self.nextMoves.append(newMove)
return newMove
+ def toString(self):
+ """Returns the coordinates of the move as a string."""
+ if self.isPass:
+ return "Pass"
+ if self.coords is None:
+ return "Root move"
+ return "(%d, %d)" % (self.getRow(), self.getCol())
+
def printBoard(self):
"""Prints the board as of this move."""
self.board.printBoard()
diff --git a/imago/gameLogic/gameState.py b/imago/gameLogic/gameState.py
index 55bac5f..d596ba1 100644
--- a/imago/gameLogic/gameState.py
+++ b/imago/gameLogic/gameState.py
@@ -49,9 +49,13 @@ class GameState:
return False
def playPass(self):
- """Passes the turn for the given player."""
+ """Passes the turn for the player who should make a move."""
self.lastMove.addPass()
+ def passForPlayer(self, player):
+ """Passes the turn for the given player."""
+ self.lastMove.addPassForPlayer(player)
+
def undo(self):
"""Sets the move before the last move as the new last move."""
self.lastMove = self.lastMove.previousMove
diff --git a/tests/test_monteCarlo.py b/tests/test_monteCarlo.py
index 8fe6f47..283c657 100644
--- a/tests/test_monteCarlo.py
+++ b/tests/test_monteCarlo.py
@@ -2,21 +2,29 @@
import unittest
-from imago.gameLogic.gameBoard import GameBoard
-from imago.gameLogic.gameMove import GameMove
+from imago.gameLogic.gameState import GameState
+from imago.engine.monteCarlo import MCTS
from imago.engine.monteCarlo import MCTSNode
-TEST_BOARD_SIZE = 19
+TEST_BOARD_SIZE = 9
class TestMonteCarlo(unittest.TestCase):
"""Test MonteCarlo algorithm."""
- def testSimulation(self):
- """Test calculation of group liberties."""
- board = GameBoard(TEST_BOARD_SIZE, TEST_BOARD_SIZE)
- move = GameMove(board)
- node = MCTSNode(move, None)
- node.simulation()
+ def testPickMove(self):
+ """Test picking a move."""
+ state = GameState(TEST_BOARD_SIZE)
+ tree = MCTS(state)
+ print(tree.pickMove().toString())
+
+ #def testSimulation(self):
+ # """Test calculation of group liberties."""
+ # board = GameBoard(TEST_BOARD_SIZE, TEST_BOARD_SIZE)
+ # move = GameMove(board)
+ # node = MCTSNode(move, None)
+ # nMatches = 100
+ # scoreDiffHeur = 15
+ # node.simulation(nMatches, scoreDiffHeur)
if __name__ == '__main__':
unittest.main()