diff options
-rw-r--r-- | doc/diagrams/analysisClasses.puml | 50 | ||||
-rw-r--r-- | doc/tex/planification.tex | 7 | ||||
-rw-r--r-- | doc/tex/systemAnalysis.tex | 230 | ||||
-rw-r--r-- | doc/tex/tfg.tex | 9 | ||||
-rw-r--r-- | imago/engine/keras/keras.py | 13 | ||||
-rw-r--r-- | imago/engine/monteCarlo.py | 44 | ||||
-rw-r--r-- | imago/sgfParser/sgf.py | 1 |
7 files changed, 277 insertions, 77 deletions
diff --git a/doc/diagrams/analysisClasses.puml b/doc/diagrams/analysisClasses.puml index d051cf9..8273930 100644 --- a/doc/diagrams/analysisClasses.puml +++ b/doc/diagrams/analysisClasses.puml @@ -1,32 +1,56 @@ @startuml -!include skinparams.puml +'!include skinparams.puml -package GameModule { +() Player +package "Game module" { class GameIO class GameState class GameBoard class GameMove - GameIO -> GameState - GameState -> GameMove - GameMove -> GameBoard + Player -> GameIO + GameIO ..> GameState + GameState ..> GameMove + GameMove ..> GameBoard } -package EngineModule { +() "Engine user" as EU +() "Model files" as MF +package "Engine module" { class EngineIO class EngineLogic - !include DecisionAlgorithm.pumlc + interface DecisionAlgorithm class MonteCarloTreeSearch - class OtherDecisionAlgorithm + class MCTSNode + class Keras + class NeuralNetwork - EngineIO --> EngineLogic - EngineLogic -> DecisionAlgorithm + EU -> EngineIO + EngineIO ..> EngineLogic + EngineLogic ..> DecisionAlgorithm DecisionAlgorithm <|.. MonteCarloTreeSearch - DecisionAlgorithm <|.. OtherDecisionAlgorithm + DecisionAlgorithm <|.. Keras + MonteCarloTreeSearch ..> MCTSNode + Keras ..> NeuralNetwork + NeuralNetwork --> MF } -MonteCarloTreeSearch --> GameMove -OtherDecisionAlgorithm --> GameMove +() "SGF files" as SGF +package "Training module" { + class Trainer + class Parser + class ASTNode + + Parser -> SGF + Trainer ..> Parser + Parser ..> ASTNode +} + +DecisionAlgorithm .> GameMove + +ASTNode .> GameMove + +Trainer .> NeuralNetwork @enduml diff --git a/doc/tex/planification.tex b/doc/tex/planification.tex index 942973f..df72486 100644 --- a/doc/tex/planification.tex +++ b/doc/tex/planification.tex @@ -182,7 +182,10 @@ The systems' interfaces are shown in Fig.~\ref{fig:interfaces}. \begin{figure}[h] \begin{center} \includegraphics[width=\textwidth]{diagrams/interfaces.png} - \caption{Interfaces of the three components of the project. - }\label{fig:interfaces} + \caption{Interfaces of the three components of the project.} + The Engine and Trainer components are shown to share the Neural network + model interface because they will interact with the same files (the + files generated by the Trainer will be used by the Engine). + \label{fig:interfaces} \end{center} \end{figure} diff --git a/doc/tex/systemAnalysis.tex b/doc/tex/systemAnalysis.tex index d0eb0b5..044f2ee 100644 --- a/doc/tex/systemAnalysis.tex +++ b/doc/tex/systemAnalysis.tex @@ -1,6 +1,28 @@ \section{System Analysis} -%\subsection{System reach determination} +\subsection{System reach determination} + +These are the main goals the final product must reach. + +\begin{enumerate} + + \item The implementation, analysis and comparison of different decision + algorithms for genarating moves. This is the main goal and the following + ones are derived from the need of reaching it. + + \item A library for representing the game of Go. It can be used for the + decision algorithms to keep the state of the game and can also be used + in an interactive application for a user to play the game and test the + code. + + \item An engine program as a way of presenting an interface for using these + algorithms. The engine will use the GTP so it can be used with an + existing GUI or other tools. + + \item A parser for SGF files so they can be processed in the training of + neural networks. + +\end{enumerate} \subsection{System Requirements} @@ -144,6 +166,7 @@ requisites needed for the system. \begin{enumerate} + %TODO: Implement this \item The engine executable will include a help option with the different modes of execution. @@ -269,39 +292,45 @@ against another machine player. \paragraph{Train a neural network} -A neural network is trained over a given list of moves. +A neural network is trained by providing records of games. \subsection{Subsystems} -There will be two main subsystems. - -% TODO: Are there really two different subsystems? They look very coupled, since -% the engine will use some classes of the game. This section is more suited for -% independently run systems which communicate through some common interface. -% ...Or maybe not. From the template: "Subsystems are groupings of packages and -% classes with a common objective. Examples of subsystems are the classes which -% handle the database, classes joining a group of related services..." +There will be three main subsystems. \subsubsection{Game System} -The first, called the Game System, will be in charge of storing all the state -information regarding a Go match, such as the history of moves, the possible -variations, the state of the board at any given time or the current number of -captured stones. +The Game System will be in charge of storing all the state information regarding +a Go match, such as the history of moves, the possible variations, the state of +the board at any given time or the current number of captured stones. This system will include a command-line interface with which to play Go matches between human players to show and test its capabilities. \subsubsection{Engine System} -The second, called the Engine System, will implement the GTP interface and use -the Game System to analyze positions and generate moves via decision algorithms. +The Engine System will implement the GTP interface and use the Game System to +analyse positions and generate moves via decision algorithms. This system can be directly called to manually set up game states and ask for -moves or can be called by other programs to be used as backend for playing -matches against a computer player. +moves or can be called by other programs which use GTP to be used as backend for +playing matches against a computer player. + +\subsubsection{Training System} -%\subsubsection{Interface between subsystems} +The Training System will process SGF files storing records of games, train the +neural network models over those games and store the result. These models can +then be imported by the engine and be used to generate moves. + +\subsubsection{Interface between subsystems} + +The Training System depends on the NeuralNetwork interface of the Engine System +and uses it to train and store the neural network models. + +Both the Engine and Training systems depend on the GameMove class of the Game +System. The Engine System uses it to store the state of a game and provide it +to the decision algorithms. The Training System uses it to create the internal +representation of a game resulting from the processing of an SGF file. \subsection{Class analysis} @@ -322,7 +351,9 @@ The classes resulting from the analysis phase are shown in \newcommand{\interclassSpace}{30pt} -\indent +\paragraph{Engine System} + +\indent \\ \begin{tabular}{p{\linewidth}} \toprule @@ -368,6 +399,14 @@ The classes resulting from the analysis phase are shown in \vspace{\interclassSpace} +\newcommand{\decisionAlgorithmMethods}{ + \tabitem{\textbf{pickMove()}: Gives a move to play.} \\ + \tabitem{\textbf{forceNextMove(coords)}: Notifies the system of a played + move so it can update its state accordingly.} \\ + \tabitem{\textbf{clearBoard()}: Empties the move history. The algorithm will + now generate moves as from a new game.} \\ +} + \begin{tabular}{p{\linewidth}} \toprule \textbf{DecisionAlgorithm} \\ @@ -382,7 +421,7 @@ The classes resulting from the analysis phase are shown in \textit{(Depends on the algorithm.)} \\ \midrule \textbf{Proposed methods} \\ - \tabitem{\textbf{genmove()}: Gives the coordinates of a move to play.} \\ + \decisionAlgorithmMethods \bottomrule \end{tabular} @@ -390,20 +429,21 @@ The classes resulting from the analysis phase are shown in \begin{tabular}{p{\linewidth}} \toprule - \textbf{GameIO} \\ + \textbf{MonteCarloTreeSearch} \\ \midrule \textbf{Description} \\ - Offers the interface with the game. \\ + Implements the Monte Carlo Tree Search algorithm for exploring the tree of + the game and deciding on a move. \\ \midrule \textbf{Responsibilities} \\ - \tabitem{Read input.} \\ - \tabitem{Do some preprocessing.} \\ - \tabitem{Forward commands to the game state component.} \\ + \tabitem{Analyzing game states and generating moves.} \\ \midrule \textbf{Proposed attributes} \\ + \tabitem{\textbf{root}: The root node of a tree representing of the current + game state and the explored possible moves from it.} \\ \midrule \textbf{Proposed methods} \\ - \tabitem{\textbf{start()}: Starts reading standard input in a loop.} \\ + \decisionAlgorithmMethods \bottomrule \end{tabular} @@ -411,6 +451,103 @@ The classes resulting from the analysis phase are shown in \begin{tabular}{p{\linewidth}} \toprule + \textbf{MCTSNode} \\ + \midrule + \textbf{Description} \\ + A node of the tree used by the Monte Carlo Tree Search algorithm. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Storing a specific state of a match.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{visits}: How many times the node has been visited.} \\ + \tabitem{\textbf{score}: The number of explorations of the node resulting in + victory.} \\ + \tabitem{\textbf{move}: A GameMove for accessing game state and logic.} \\ + \tabitem{\textbf{parent}: This node's parent in the tree.} \\ + \tabitem{\textbf{children}: The nodes following from this node in the tree.} + \\ + \tabitem{\textbf{unexploredVertices}: The plays which could be explored from + this node.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{ucbForPlayer()}: Computes the Upper Confidence Bound of the + node from the perspective of the player making the move stored in the node.} + \\ + \tabitem{\textbf{selection()}: Monte Carlo Tree Search selection step. + Selects the most promising node which still has some unexplored children.} + \\ + \tabitem{\textbf{expansion()}: Monte Carlo Tree Search expansion step. Picks + an unexplored vertex from the node and adds it as a new MCTSNode.} \\ + \tabitem{\textbf{expansionForCoords()}: Performs an expansion for the given + coordinates. This represents forcing a move on the algorithm.} \\ + \tabitem{\textbf{simulation()}: Play random matches to accumulate reward + information on the node.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{\linewidth}} + \toprule + \textbf{Keras} \\ + \midrule + \textbf{Description} \\ + Implements the DecisionAlgorithm interface to give access to a neural + network. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Analyzing game states and generating moves.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{currentMove}: A GameMove for accessing game state and + logic.} \\ + \tabitem{\textbf{neuralNetwork}: A NeuralNetwork instance for generating + moves.} \\ + \midrule + \textbf{Proposed methods} \\ + \decisionAlgorithmMethods + \bottomrule +\end{tabular} + +\begin{tabular}{p{\linewidth}} + \toprule + \textbf{NeuralNetwork} \\ + \midrule + \textbf{Description} \\ + Manages the neural networks used by the engine. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Analyzing game states and generating moves.} \\ + \tabitem{Generating a new neural network.} \\ + \tabitem{Loading a model file to use an existing trained neural network.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{currentMove}: A GameMove for accessing game state and + logic.} \\ + \tabitem{\textbf{neuralNetwork}: A NeuralNetwork instance for generating + moves.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{pickMove()}: Uses the current internal model to pick a move + given a game state.} \\ + \tabitem{\textbf{trainModel()}: Receives a list of games, with each game + being a list of moves, and trains the network on them.} \\ + \tabitem{\textbf{saveModel()}: Saves the current internal neural network + model.} \\ + \tabitem{\textbf{saveHeatmap()}: Saves an image of a heatmap of move + likelihood.} \\ + \tabitem{\textbf{saveModelPlot()}: Saves an image of a plot of the model + configuration.} \\ + \bottomrule +\end{tabular} + +\paragraph{Game System} + +\indent \\ + +\begin{tabular}{p{\linewidth}} + \toprule \textbf{GameState} \\ \midrule \textbf{Description} \\ @@ -476,14 +613,17 @@ The classes resulting from the analysis phase are shown in \tabitem{Store information about a move (board, player, coordinates\ldots).} \\ \midrule \textbf{Proposed attributes} \\ - \tabitem{\textbf{GameBoard board}: The board as of this move.} \\ - \tabitem{\textbf{GameMove[] nextMoves}: The list of moves played after this + \tabitem{\textbf{board}: The board as of this move.} \\ + \tabitem{\textbf{nextMoves}: The list of moves played after this one. Different moves represent different game variations.} \\ - \tabitem{\textbf{GameMove previousMove}: The move before this one.} \\ - \tabitem{\textbf{boolean isPass}: True if the move is a pass and not a stone + \tabitem{\textbf{previousMove}: The move before this one.} \\ + \tabitem{\textbf{isPass}: True if the move is a pass and not a stone placement.} \\ - \tabitem{\textbf{int[] coords}: The coordinates of the board the move was - played at. Have no meaning if \textbf{isPass} is true.} \\ + \tabitem{\textbf{coords}: The coordinates of the board the move was + played at. Has no meaning if \textbf{isPass} is true.} \\ + \tabitem{\textbf{playerWhoPassed}: The player who made this move. Has no + meaning if \textbf{isPass} is false, since the player can be obtained from + the coordinates of the move when it is not a pass.} \\ \midrule \textbf{Proposed methods} \\ \tabitem{\textbf{getRow()}: Returns the row the move was played at.} \\ @@ -504,6 +644,30 @@ The classes resulting from the analysis phase are shown in \vspace{\interclassSpace} +%TODO: Finish the classes of the Game System + +\paragraph{Training System} + +\indent \\ + +\begin{tabular}{p{\linewidth}} + \toprule + \textbf{Trainer} \\ + \midrule + \textbf{Description} \\ + . \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{}: .} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{}: .} \\ + \bottomrule +\end{tabular} + \subsection{Use case analysis and scenarios} \indent diff --git a/doc/tex/tfg.tex b/doc/tex/tfg.tex index 5af6278..22ee0e9 100644 --- a/doc/tex/tfg.tex +++ b/doc/tex/tfg.tex @@ -34,7 +34,8 @@ \frenchspacing -\title{\program: An AI capable of playing the game of Go} +\title{\program\\ +\large An AI player of the game of Go} \author{Íñigo Gutiérrez Fernández} @@ -50,6 +51,8 @@ \end{center} \end{figure} +\clearpage + \begin{abstract} This is the abstract. \end{abstract} @@ -109,6 +112,10 @@ inclusion to use this template is included here. \tableofcontents +\listoffigures + +\clearpage + \input{tex/introduction.tex} \input{tex/planification.tex} diff --git a/imago/engine/keras/keras.py b/imago/engine/keras/keras.py index 4f39818..0668cd1 100644 --- a/imago/engine/keras/keras.py +++ b/imago/engine/keras/keras.py @@ -11,10 +11,9 @@ class Keras(DecisionAlgorithm): def __init__(self, move): self.currentMove = move - self.boardSize = move.board.getBoardHeight() - self.nn = NeuralNetwork( + self.neuralNetwork = NeuralNetwork( MODEL_FILE, - self.boardSize + move.board.getBoardHeight() ) def forceNextMove(self, coords): @@ -23,8 +22,12 @@ class Keras(DecisionAlgorithm): def pickMove(self): """Returns a move to play.""" - return self.nn.pickMove(self.currentMove, self.currentMove.getNextPlayer()) + return self.neuralNetwork.pickMove( + self.currentMove, + self.currentMove.getNextPlayer() + ) def clearBoard(self): """Empties move history.""" - self.currentMove = GameMove(GameBoard(self.boardSize, self.boardSize)) + boardSize = self.currentMove.board.getBoardHeight() + self.currentMove = GameMove(GameBoard(boardSize, boardSize)) diff --git a/imago/engine/monteCarlo.py b/imago/engine/monteCarlo.py index 2113fdd..81bee1d 100644 --- a/imago/engine/monteCarlo.py +++ b/imago/engine/monteCarlo.py @@ -29,10 +29,10 @@ class MCTS(DecisionAlgorithm): # completely random for _ in range(5): self.root.selection().expansion().simulation(10, 20) - selectedNode = self.selectBestNextNode() + selectedNode = self._selectBestNextNode() return selectedNode.move.coords - def selectBestNextNode(self): + def _selectBestNextNode(self): """Returns best ucb node available for the current player.""" # Assumes at least one expansion has occured @@ -62,35 +62,35 @@ class MCTSNode: self.children = set() self.unexploredVertices = move.getPlayableVertices() - def ucb(self): - """Returns Upper Confidence Bound of node""" - # UCB = meanVictories + 1/visits - if self.visits == 0: - return 0 - mean = self.score / self.visits - adjust = 1/self.visits - return mean + adjust - def ucbForPlayer(self): """ Returns Upper Confidence Bound of node changing the symbol if the move is for the wite player.""" - return self.ucbForSpecificPlayer(self.move.getPlayer()) + return self._ucbForSpecificPlayer(self.move.getPlayer()) - def ucbForSpecificPlayer(self, player): + def _ucbForSpecificPlayer(self, player): """ Returns Upper Confidence Bound of node from the perspective of the given player.""" if player == Player.WHITE: - return self.ucb() * -1 - return self.ucb() + return self._ucb() * -1 + return self._ucb() + + def _ucb(self): + """Returns Upper Confidence Bound of node""" + # UCB = meanVictories + 1/visits + if self.visits == 0: + return 0 + mean = self.score / self.visits + adjust = 1/self.visits + return mean + adjust def selection(self): """Select the most promising node with unexplored children.""" - bestNode = self.selectionRec(self) + bestNode = self._selectionRec(self) return bestNode - def selectionRec(self, bestNode): + def _selectionRec(self, bestNode): """Searches this node and its children for the node with the best UCB value for the current player.""" @@ -109,15 +109,15 @@ class MCTSNode: # Check if node has unexplored children and better UCB than previously explored if len(self.unexploredVertices) > 0: - if self.ucbForSpecificPlayer( - player) > bestNode.ucbForSpecificPlayer(player): + if self._ucbForSpecificPlayer( + player) > bestNode._ucbForSpecificPlayer(player): bestNode = self # Recursively search children for better UCB for child in self.children: - bestChildNode = child.selectionRec(bestNode) - if bestChildNode.ucbForSpecificPlayer( - player) > bestNode.ucbForSpecificPlayer(player): + bestChildNode = child._selectionRec(bestNode) + if bestChildNode._ucbForSpecificPlayer( + player) > bestNode._ucbForSpecificPlayer(player): bestNode = bestChildNode return bestNode diff --git a/imago/sgfParser/sgf.py b/imago/sgfParser/sgf.py index 4b11cfb..154cbbe 100644 --- a/imago/sgfParser/sgf.py +++ b/imago/sgfParser/sgf.py @@ -3,7 +3,6 @@ from imago.sgfParser.sgfyacc import parser def loadGameTree(filename): -# PLY? """Parses a GameTree instance from a source SGF file.""" file = open(filename, "r") |