aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/diagrams/analysisClasses.puml50
-rw-r--r--doc/tex/planification.tex7
-rw-r--r--doc/tex/systemAnalysis.tex230
-rw-r--r--doc/tex/tfg.tex9
-rw-r--r--imago/engine/keras/keras.py13
-rw-r--r--imago/engine/monteCarlo.py44
-rw-r--r--imago/sgfParser/sgf.py1
7 files changed, 277 insertions, 77 deletions
diff --git a/doc/diagrams/analysisClasses.puml b/doc/diagrams/analysisClasses.puml
index d051cf9..8273930 100644
--- a/doc/diagrams/analysisClasses.puml
+++ b/doc/diagrams/analysisClasses.puml
@@ -1,32 +1,56 @@
@startuml
-!include skinparams.puml
+'!include skinparams.puml
-package GameModule {
+() Player
+package "Game module" {
class GameIO
class GameState
class GameBoard
class GameMove
- GameIO -> GameState
- GameState -> GameMove
- GameMove -> GameBoard
+ Player -> GameIO
+ GameIO ..> GameState
+ GameState ..> GameMove
+ GameMove ..> GameBoard
}
-package EngineModule {
+() "Engine user" as EU
+() "Model files" as MF
+package "Engine module" {
class EngineIO
class EngineLogic
- !include DecisionAlgorithm.pumlc
+ interface DecisionAlgorithm
class MonteCarloTreeSearch
- class OtherDecisionAlgorithm
+ class MCTSNode
+ class Keras
+ class NeuralNetwork
- EngineIO --> EngineLogic
- EngineLogic -> DecisionAlgorithm
+ EU -> EngineIO
+ EngineIO ..> EngineLogic
+ EngineLogic ..> DecisionAlgorithm
DecisionAlgorithm <|.. MonteCarloTreeSearch
- DecisionAlgorithm <|.. OtherDecisionAlgorithm
+ DecisionAlgorithm <|.. Keras
+ MonteCarloTreeSearch ..> MCTSNode
+ Keras ..> NeuralNetwork
+ NeuralNetwork --> MF
}
-MonteCarloTreeSearch --> GameMove
-OtherDecisionAlgorithm --> GameMove
+() "SGF files" as SGF
+package "Training module" {
+ class Trainer
+ class Parser
+ class ASTNode
+
+ Parser -> SGF
+ Trainer ..> Parser
+ Parser ..> ASTNode
+}
+
+DecisionAlgorithm .> GameMove
+
+ASTNode .> GameMove
+
+Trainer .> NeuralNetwork
@enduml
diff --git a/doc/tex/planification.tex b/doc/tex/planification.tex
index 942973f..df72486 100644
--- a/doc/tex/planification.tex
+++ b/doc/tex/planification.tex
@@ -182,7 +182,10 @@ The systems' interfaces are shown in Fig.~\ref{fig:interfaces}.
\begin{figure}[h]
\begin{center}
\includegraphics[width=\textwidth]{diagrams/interfaces.png}
- \caption{Interfaces of the three components of the project.
- }\label{fig:interfaces}
+ \caption{Interfaces of the three components of the project.}
+ The Engine and Trainer components are shown to share the Neural network
+ model interface because they will interact with the same files (the
+ files generated by the Trainer will be used by the Engine).
+ \label{fig:interfaces}
\end{center}
\end{figure}
diff --git a/doc/tex/systemAnalysis.tex b/doc/tex/systemAnalysis.tex
index d0eb0b5..044f2ee 100644
--- a/doc/tex/systemAnalysis.tex
+++ b/doc/tex/systemAnalysis.tex
@@ -1,6 +1,28 @@
\section{System Analysis}
-%\subsection{System reach determination}
+\subsection{System reach determination}
+
+These are the main goals the final product must reach.
+
+\begin{enumerate}
+
+ \item The implementation, analysis and comparison of different decision
+ algorithms for genarating moves. This is the main goal and the following
+ ones are derived from the need of reaching it.
+
+ \item A library for representing the game of Go. It can be used for the
+ decision algorithms to keep the state of the game and can also be used
+ in an interactive application for a user to play the game and test the
+ code.
+
+ \item An engine program as a way of presenting an interface for using these
+ algorithms. The engine will use the GTP so it can be used with an
+ existing GUI or other tools.
+
+ \item A parser for SGF files so they can be processed in the training of
+ neural networks.
+
+\end{enumerate}
\subsection{System Requirements}
@@ -144,6 +166,7 @@ requisites needed for the system.
\begin{enumerate}
+ %TODO: Implement this
\item The engine executable will include a help option with the different
modes of execution.
@@ -269,39 +292,45 @@ against another machine player.
\paragraph{Train a neural network}
-A neural network is trained over a given list of moves.
+A neural network is trained by providing records of games.
\subsection{Subsystems}
-There will be two main subsystems.
-
-% TODO: Are there really two different subsystems? They look very coupled, since
-% the engine will use some classes of the game. This section is more suited for
-% independently run systems which communicate through some common interface.
-% ...Or maybe not. From the template: "Subsystems are groupings of packages and
-% classes with a common objective. Examples of subsystems are the classes which
-% handle the database, classes joining a group of related services..."
+There will be three main subsystems.
\subsubsection{Game System}
-The first, called the Game System, will be in charge of storing all the state
-information regarding a Go match, such as the history of moves, the possible
-variations, the state of the board at any given time or the current number of
-captured stones.
+The Game System will be in charge of storing all the state information regarding
+a Go match, such as the history of moves, the possible variations, the state of
+the board at any given time or the current number of captured stones.
This system will include a command-line interface with which to play Go matches
between human players to show and test its capabilities.
\subsubsection{Engine System}
-The second, called the Engine System, will implement the GTP interface and use
-the Game System to analyze positions and generate moves via decision algorithms.
+The Engine System will implement the GTP interface and use the Game System to
+analyse positions and generate moves via decision algorithms.
This system can be directly called to manually set up game states and ask for
-moves or can be called by other programs to be used as backend for playing
-matches against a computer player.
+moves or can be called by other programs which use GTP to be used as backend for
+playing matches against a computer player.
+
+\subsubsection{Training System}
-%\subsubsection{Interface between subsystems}
+The Training System will process SGF files storing records of games, train the
+neural network models over those games and store the result. These models can
+then be imported by the engine and be used to generate moves.
+
+\subsubsection{Interface between subsystems}
+
+The Training System depends on the NeuralNetwork interface of the Engine System
+and uses it to train and store the neural network models.
+
+Both the Engine and Training systems depend on the GameMove class of the Game
+System. The Engine System uses it to store the state of a game and provide it
+to the decision algorithms. The Training System uses it to create the internal
+representation of a game resulting from the processing of an SGF file.
\subsection{Class analysis}
@@ -322,7 +351,9 @@ The classes resulting from the analysis phase are shown in
\newcommand{\interclassSpace}{30pt}
-\indent
+\paragraph{Engine System}
+
+\indent \\
\begin{tabular}{p{\linewidth}}
\toprule
@@ -368,6 +399,14 @@ The classes resulting from the analysis phase are shown in
\vspace{\interclassSpace}
+\newcommand{\decisionAlgorithmMethods}{
+ \tabitem{\textbf{pickMove()}: Gives a move to play.} \\
+ \tabitem{\textbf{forceNextMove(coords)}: Notifies the system of a played
+ move so it can update its state accordingly.} \\
+ \tabitem{\textbf{clearBoard()}: Empties the move history. The algorithm will
+ now generate moves as from a new game.} \\
+}
+
\begin{tabular}{p{\linewidth}}
\toprule
\textbf{DecisionAlgorithm} \\
@@ -382,7 +421,7 @@ The classes resulting from the analysis phase are shown in
\textit{(Depends on the algorithm.)} \\
\midrule
\textbf{Proposed methods} \\
- \tabitem{\textbf{genmove()}: Gives the coordinates of a move to play.} \\
+ \decisionAlgorithmMethods
\bottomrule
\end{tabular}
@@ -390,20 +429,21 @@ The classes resulting from the analysis phase are shown in
\begin{tabular}{p{\linewidth}}
\toprule
- \textbf{GameIO} \\
+ \textbf{MonteCarloTreeSearch} \\
\midrule
\textbf{Description} \\
- Offers the interface with the game. \\
+ Implements the Monte Carlo Tree Search algorithm for exploring the tree of
+ the game and deciding on a move. \\
\midrule
\textbf{Responsibilities} \\
- \tabitem{Read input.} \\
- \tabitem{Do some preprocessing.} \\
- \tabitem{Forward commands to the game state component.} \\
+ \tabitem{Analyzing game states and generating moves.} \\
\midrule
\textbf{Proposed attributes} \\
+ \tabitem{\textbf{root}: The root node of a tree representing of the current
+ game state and the explored possible moves from it.} \\
\midrule
\textbf{Proposed methods} \\
- \tabitem{\textbf{start()}: Starts reading standard input in a loop.} \\
+ \decisionAlgorithmMethods
\bottomrule
\end{tabular}
@@ -411,6 +451,103 @@ The classes resulting from the analysis phase are shown in
\begin{tabular}{p{\linewidth}}
\toprule
+ \textbf{MCTSNode} \\
+ \midrule
+ \textbf{Description} \\
+ A node of the tree used by the Monte Carlo Tree Search algorithm. \\
+ \midrule
+ \textbf{Responsibilities} \\
+ \tabitem{Storing a specific state of a match.} \\
+ \midrule
+ \textbf{Proposed attributes} \\
+ \tabitem{\textbf{visits}: How many times the node has been visited.} \\
+ \tabitem{\textbf{score}: The number of explorations of the node resulting in
+ victory.} \\
+ \tabitem{\textbf{move}: A GameMove for accessing game state and logic.} \\
+ \tabitem{\textbf{parent}: This node's parent in the tree.} \\
+ \tabitem{\textbf{children}: The nodes following from this node in the tree.}
+ \\
+ \tabitem{\textbf{unexploredVertices}: The plays which could be explored from
+ this node.} \\
+ \midrule
+ \textbf{Proposed methods} \\
+ \tabitem{\textbf{ucbForPlayer()}: Computes the Upper Confidence Bound of the
+ node from the perspective of the player making the move stored in the node.}
+ \\
+ \tabitem{\textbf{selection()}: Monte Carlo Tree Search selection step.
+ Selects the most promising node which still has some unexplored children.}
+ \\
+ \tabitem{\textbf{expansion()}: Monte Carlo Tree Search expansion step. Picks
+ an unexplored vertex from the node and adds it as a new MCTSNode.} \\
+ \tabitem{\textbf{expansionForCoords()}: Performs an expansion for the given
+ coordinates. This represents forcing a move on the algorithm.} \\
+ \tabitem{\textbf{simulation()}: Play random matches to accumulate reward
+ information on the node.} \\
+ \bottomrule
+\end{tabular}
+
+\vspace{\interclassSpace}
+
+\begin{tabular}{p{\linewidth}}
+ \toprule
+ \textbf{Keras} \\
+ \midrule
+ \textbf{Description} \\
+ Implements the DecisionAlgorithm interface to give access to a neural
+ network. \\
+ \midrule
+ \textbf{Responsibilities} \\
+ \tabitem{Analyzing game states and generating moves.} \\
+ \midrule
+ \textbf{Proposed attributes} \\
+ \tabitem{\textbf{currentMove}: A GameMove for accessing game state and
+ logic.} \\
+ \tabitem{\textbf{neuralNetwork}: A NeuralNetwork instance for generating
+ moves.} \\
+ \midrule
+ \textbf{Proposed methods} \\
+ \decisionAlgorithmMethods
+ \bottomrule
+\end{tabular}
+
+\begin{tabular}{p{\linewidth}}
+ \toprule
+ \textbf{NeuralNetwork} \\
+ \midrule
+ \textbf{Description} \\
+ Manages the neural networks used by the engine. \\
+ \midrule
+ \textbf{Responsibilities} \\
+ \tabitem{Analyzing game states and generating moves.} \\
+ \tabitem{Generating a new neural network.} \\
+ \tabitem{Loading a model file to use an existing trained neural network.} \\
+ \midrule
+ \textbf{Proposed attributes} \\
+ \tabitem{\textbf{currentMove}: A GameMove for accessing game state and
+ logic.} \\
+ \tabitem{\textbf{neuralNetwork}: A NeuralNetwork instance for generating
+ moves.} \\
+ \midrule
+ \textbf{Proposed methods} \\
+ \tabitem{\textbf{pickMove()}: Uses the current internal model to pick a move
+ given a game state.} \\
+ \tabitem{\textbf{trainModel()}: Receives a list of games, with each game
+ being a list of moves, and trains the network on them.} \\
+ \tabitem{\textbf{saveModel()}: Saves the current internal neural network
+ model.} \\
+ \tabitem{\textbf{saveHeatmap()}: Saves an image of a heatmap of move
+ likelihood.} \\
+ \tabitem{\textbf{saveModelPlot()}: Saves an image of a plot of the model
+ configuration.} \\
+ \bottomrule
+\end{tabular}
+
+\paragraph{Game System}
+
+\indent \\
+
+\begin{tabular}{p{\linewidth}}
+ \toprule
\textbf{GameState} \\
\midrule
\textbf{Description} \\
@@ -476,14 +613,17 @@ The classes resulting from the analysis phase are shown in
\tabitem{Store information about a move (board, player, coordinates\ldots).} \\
\midrule
\textbf{Proposed attributes} \\
- \tabitem{\textbf{GameBoard board}: The board as of this move.} \\
- \tabitem{\textbf{GameMove[] nextMoves}: The list of moves played after this
+ \tabitem{\textbf{board}: The board as of this move.} \\
+ \tabitem{\textbf{nextMoves}: The list of moves played after this
one. Different moves represent different game variations.} \\
- \tabitem{\textbf{GameMove previousMove}: The move before this one.} \\
- \tabitem{\textbf{boolean isPass}: True if the move is a pass and not a stone
+ \tabitem{\textbf{previousMove}: The move before this one.} \\
+ \tabitem{\textbf{isPass}: True if the move is a pass and not a stone
placement.} \\
- \tabitem{\textbf{int[] coords}: The coordinates of the board the move was
- played at. Have no meaning if \textbf{isPass} is true.} \\
+ \tabitem{\textbf{coords}: The coordinates of the board the move was
+ played at. Has no meaning if \textbf{isPass} is true.} \\
+ \tabitem{\textbf{playerWhoPassed}: The player who made this move. Has no
+ meaning if \textbf{isPass} is false, since the player can be obtained from
+ the coordinates of the move when it is not a pass.} \\
\midrule
\textbf{Proposed methods} \\
\tabitem{\textbf{getRow()}: Returns the row the move was played at.} \\
@@ -504,6 +644,30 @@ The classes resulting from the analysis phase are shown in
\vspace{\interclassSpace}
+%TODO: Finish the classes of the Game System
+
+\paragraph{Training System}
+
+\indent \\
+
+\begin{tabular}{p{\linewidth}}
+ \toprule
+ \textbf{Trainer} \\
+ \midrule
+ \textbf{Description} \\
+ . \\
+ \midrule
+ \textbf{Responsibilities} \\
+ \tabitem{.} \\
+ \midrule
+ \textbf{Proposed attributes} \\
+ \tabitem{\textbf{}: .} \\
+ \midrule
+ \textbf{Proposed methods} \\
+ \tabitem{\textbf{}: .} \\
+ \bottomrule
+\end{tabular}
+
\subsection{Use case analysis and scenarios}
\indent
diff --git a/doc/tex/tfg.tex b/doc/tex/tfg.tex
index 5af6278..22ee0e9 100644
--- a/doc/tex/tfg.tex
+++ b/doc/tex/tfg.tex
@@ -34,7 +34,8 @@
\frenchspacing
-\title{\program: An AI capable of playing the game of Go}
+\title{\program\\
+\large An AI player of the game of Go}
\author{Íñigo Gutiérrez Fernández}
@@ -50,6 +51,8 @@
\end{center}
\end{figure}
+\clearpage
+
\begin{abstract}
This is the abstract.
\end{abstract}
@@ -109,6 +112,10 @@ inclusion to use this template is included here.
\tableofcontents
+\listoffigures
+
+\clearpage
+
\input{tex/introduction.tex}
\input{tex/planification.tex}
diff --git a/imago/engine/keras/keras.py b/imago/engine/keras/keras.py
index 4f39818..0668cd1 100644
--- a/imago/engine/keras/keras.py
+++ b/imago/engine/keras/keras.py
@@ -11,10 +11,9 @@ class Keras(DecisionAlgorithm):
def __init__(self, move):
self.currentMove = move
- self.boardSize = move.board.getBoardHeight()
- self.nn = NeuralNetwork(
+ self.neuralNetwork = NeuralNetwork(
MODEL_FILE,
- self.boardSize
+ move.board.getBoardHeight()
)
def forceNextMove(self, coords):
@@ -23,8 +22,12 @@ class Keras(DecisionAlgorithm):
def pickMove(self):
"""Returns a move to play."""
- return self.nn.pickMove(self.currentMove, self.currentMove.getNextPlayer())
+ return self.neuralNetwork.pickMove(
+ self.currentMove,
+ self.currentMove.getNextPlayer()
+ )
def clearBoard(self):
"""Empties move history."""
- self.currentMove = GameMove(GameBoard(self.boardSize, self.boardSize))
+ boardSize = self.currentMove.board.getBoardHeight()
+ self.currentMove = GameMove(GameBoard(boardSize, boardSize))
diff --git a/imago/engine/monteCarlo.py b/imago/engine/monteCarlo.py
index 2113fdd..81bee1d 100644
--- a/imago/engine/monteCarlo.py
+++ b/imago/engine/monteCarlo.py
@@ -29,10 +29,10 @@ class MCTS(DecisionAlgorithm):
# completely random
for _ in range(5):
self.root.selection().expansion().simulation(10, 20)
- selectedNode = self.selectBestNextNode()
+ selectedNode = self._selectBestNextNode()
return selectedNode.move.coords
- def selectBestNextNode(self):
+ def _selectBestNextNode(self):
"""Returns best ucb node available for the current player."""
# Assumes at least one expansion has occured
@@ -62,35 +62,35 @@ class MCTSNode:
self.children = set()
self.unexploredVertices = move.getPlayableVertices()
- def ucb(self):
- """Returns Upper Confidence Bound of node"""
- # UCB = meanVictories + 1/visits
- if self.visits == 0:
- return 0
- mean = self.score / self.visits
- adjust = 1/self.visits
- return mean + adjust
-
def ucbForPlayer(self):
"""
Returns Upper Confidence Bound of node changing the symbol if the move is for the
wite player."""
- return self.ucbForSpecificPlayer(self.move.getPlayer())
+ return self._ucbForSpecificPlayer(self.move.getPlayer())
- def ucbForSpecificPlayer(self, player):
+ def _ucbForSpecificPlayer(self, player):
"""
Returns Upper Confidence Bound of node from the perspective of the given
player."""
if player == Player.WHITE:
- return self.ucb() * -1
- return self.ucb()
+ return self._ucb() * -1
+ return self._ucb()
+
+ def _ucb(self):
+ """Returns Upper Confidence Bound of node"""
+ # UCB = meanVictories + 1/visits
+ if self.visits == 0:
+ return 0
+ mean = self.score / self.visits
+ adjust = 1/self.visits
+ return mean + adjust
def selection(self):
"""Select the most promising node with unexplored children."""
- bestNode = self.selectionRec(self)
+ bestNode = self._selectionRec(self)
return bestNode
- def selectionRec(self, bestNode):
+ def _selectionRec(self, bestNode):
"""Searches this node and its children for the node with the best UCB value for
the current player."""
@@ -109,15 +109,15 @@ class MCTSNode:
# Check if node has unexplored children and better UCB than previously explored
if len(self.unexploredVertices) > 0:
- if self.ucbForSpecificPlayer(
- player) > bestNode.ucbForSpecificPlayer(player):
+ if self._ucbForSpecificPlayer(
+ player) > bestNode._ucbForSpecificPlayer(player):
bestNode = self
# Recursively search children for better UCB
for child in self.children:
- bestChildNode = child.selectionRec(bestNode)
- if bestChildNode.ucbForSpecificPlayer(
- player) > bestNode.ucbForSpecificPlayer(player):
+ bestChildNode = child._selectionRec(bestNode)
+ if bestChildNode._ucbForSpecificPlayer(
+ player) > bestNode._ucbForSpecificPlayer(player):
bestNode = bestChildNode
return bestNode
diff --git a/imago/sgfParser/sgf.py b/imago/sgfParser/sgf.py
index 4b11cfb..154cbbe 100644
--- a/imago/sgfParser/sgf.py
+++ b/imago/sgfParser/sgf.py
@@ -3,7 +3,6 @@
from imago.sgfParser.sgfyacc import parser
def loadGameTree(filename):
-# PLY?
"""Parses a GameTree instance from a source SGF file."""
file = open(filename, "r")