diff options
author | InigoGutierrez <inigogf.95@gmail.com> | 2022-07-01 16:10:15 +0200 |
---|---|---|
committer | InigoGutierrez <inigogf.95@gmail.com> | 2022-07-01 16:10:15 +0200 |
commit | b08408d23186205e71dfc68634021e3236bfb45c (patch) | |
tree | 55e5679b6964902dadab1d5737546cfd4f0f2f0a /doc/tex | |
parent | ddde2a9a43daf870c26bef33f47abe45b414c3d0 (diff) | |
download | imago-b08408d23186205e71dfc68634021e3236bfb45c.tar.gz imago-b08408d23186205e71dfc68634021e3236bfb45c.zip |
First version.
Diffstat (limited to 'doc/tex')
-rw-r--r-- | doc/tex/biber.bib | 156 | ||||
-rw-r--r-- | doc/tex/conclusions.tex | 128 | ||||
-rw-r--r-- | doc/tex/imago.tex | 178 | ||||
-rw-r--r-- | doc/tex/implementation.tex | 181 | ||||
-rw-r--r-- | doc/tex/interface.tex | 6 | ||||
-rw-r--r-- | doc/tex/introduction.tex | 8 | ||||
-rw-r--r-- | doc/tex/planification.tex | 197 | ||||
-rw-r--r-- | doc/tex/previousWorks.tex | 21 | ||||
-rw-r--r-- | doc/tex/results.tex | 416 | ||||
-rw-r--r-- | doc/tex/systemAnalysis.tex | 892 | ||||
-rw-r--r-- | doc/tex/systemDesign.tex | 324 | ||||
-rw-r--r-- | doc/tex/tfg.tex | 60 |
12 files changed, 2417 insertions, 150 deletions
diff --git a/doc/tex/biber.bib b/doc/tex/biber.bib new file mode 100644 index 0000000..d0a714e --- /dev/null +++ b/doc/tex/biber.bib @@ -0,0 +1,156 @@ +@article{natureAlphaGo2016, + author = {David Silver and Aja Huang and Chris J. Maddison and Arthur Guez and Laurent Sifre and George van den Driessche and Julian Schrittwieser and Ioannis Antonoglou and Veda Panneershelvam and Marc Lanctot and Sander Dieleman and Dominik Grewe and John Nham and Nal Kalchbrenner and Ilya Sutskever and Timothy Lillicrap and Madeleine Leach and Koray Kavukcuoglu and Thore Graepel and Demis Hassabis}, + title = {Mastering the game of Go with deep neural networks and tree search}, + journaltitle = {Nature}, + date = {2016-01-27}, + url = {https://www.nature.com/articles/nature16961} +} + +@online{gtp, + author = {Gunnar Farnebäck}, + title = {GTP --- Go Text Protocol}, + date = {2002}, + urldate = {2021}, + url = {https://www.lysator.liu.se/~gunnar/gtp} +} + +@online{sgf, + author = {Arno Hollosi}, + title = {SGF File Format FF[4]}, + date = {2021}, + urldate = {2022}, + url = {https://www.red-bean.com/sgf} +} + +@online{katago, + author = {David J Wu ("lightvector")}, + title = {KataGo}, + date = {2021}, + urldate = {2021}, + url = {https://github.com/lightvector/KataGo} +} + +@online{gnugo, + title = {GNU Go}, + organization = {Free Software Foundation}, + date = {2019}, + urldate = {2021}, + url = {https://www.gnu.org/software/gnugo} +} + +@online{sabaki, + author = {Yichuan Shen}, + title = {Sabaki --- An elegant Go board and SGF editor for a more civilized age.}, + date = {2019}, + urldate = {2021}, + url = {https://sabaki.yichuanshen.de} +} + +@online{keras, + title = {Keras: the Python deep learning API}, + urldate = {2022}, + url = {https://keras.io} +} + +@online{sl_go, + title = {Go}, + organization = {Sensei's Library}, + date = {2019}, + urldate = {2021}, + url = {https://senseis.xmp.net/?go} +} + +@online{plantillaRedondo, + author = {J. M. Redondo}, + title = {Documentos-modelo para Trabajos de Fin de Grado/Master de la Escuela de Informática de Oviedo}, + date = {2019-06-17}, + url = {https://www.researchgate.net/publication/327882831_Plantilla_de_Proyectos_de_Fin_de_Carrera_de_la_Escuela_de_Informatica_de_Oviedo} +} + +@online{python_unittest, + title = {unittest --- Unit testing framework}, + urldate = {2021}, + url = {https://docs.python.org/3/library/unittest.html} +} + +@online{python_coverage, + title = {Coverage.py}, + urldate = {2021}, + url = {https://coverage.readthedocs.io} +} + +@online{matplotlib_heatmaps, + title = {Creating annotated heatmaps — Matplotlib 3.5.2 documentation}, + urldate = {2022}, + url = {https://matplotlib.org/stable/gallery/images_contours_and_fields/image_annotated_heatmap.html} +} + +@online{python, + title = {Welcome to Python.org}, + urldate = {2022}, + url = {https://www.python.org} +} + +@online{numpy, + title = {NumPy}, + urldate = {2022}, + url = {https://numpy.org} +} + +@online{matplotlib, + title = {Matplotlib — Visualization with Python}, + urldate = {2022}, + url = {https://matplotlib.org} +} + +@online{ply, + title = {PLY (Python Lex-Yacc)}, + urldate = {2022}, + url = {http://www.dabeaz.com/ply} +} + +@online{vim, + title = {welcome home : vim online}, + urldate = {2022}, + url = {https://www.vim.org} +} + +@online{neovim, + title = {Home - Neovim}, + urldate = {2022}, + url = {http://neovim.io} +} + +@online{latex, + title = {LaTeX - A document preparation system}, + urldate = {2022}, + url = {https://www.latex-project.org} +} + +@online{puml, + title = {Open-source tool that uses simple textual descriptions to draw beautiful UML diagrams.}, + urldate = {2022}, + url = {https://plantuml.com} +} + +@online{sl_sword, + title = {9x9 Tengen Openings at Sensei's Library}, + urldate = {2022}, + url = {https://senseis.xmp.net/?9x9TengenOpenings} +} + +@online{ogsLifeAndDeath, + author = {sunspark}, + title = {Cho Chikun's Encyclopedia of Life and Death - Elementary: 1 / 900}, + date = {2016}, + urldate = {2022}, + url = {https://online-go.com/puzzle/2824} +} + +@online{fsf_free, + author = {Free Software Foundation}, + title = {What is Free Software? - GNU Project - Free Software Foundation}, + date = {2022}, + urldate = {2022}, + url = {https://www.gnu.org/philosophy/free-sw.html} +} diff --git a/doc/tex/conclusions.tex b/doc/tex/conclusions.tex new file mode 100644 index 0000000..ebbf7fd --- /dev/null +++ b/doc/tex/conclusions.tex @@ -0,0 +1,128 @@ +\section{Conclusions} + +Some problems found during the project, an evaluation of the results and what +could be done in the future to improve the system are discussed here. + +\subsection{Problems with the Implementation of the Game} + +While Go has a simple set of rules, they lead to many borderline cases which +must be specifically addressed. For example, the ko rule obliges to check the +previous board positions after each move so no boards are repeated. + +These problems have been solved by designing the tree of moves of a match so +each move stores the full board layout after it has been played. This of course +increases the memory needed to run the application, but has been chosen over the +alternative of having to recreate the board parsing the tree backwards for each +move to check if ko is occurring, if a move makes a capture and which stones +have already been captured, etc. It is the old problem of memory versus +processing, which in this case has been answered with memory. Which one would be +the best solution would require a deep analysis out of the scope of the project. + +\subsection{Problems with the Monte Carlo Tree Search Algorithm} + +The implementation Monte Carlo Tree Search algorithm was not precise by itself +when asked for moves. This could be attributed to various factors: + +\begin{itemize} + + \item The way the score is counted makes it difficult to evaluate who is + winning at the beginning or middle of the game, so the score of a move + has to be evaluated by running playing simulations. These simulations + are not precise on analyzing the result since the moves in them are + played at random. + + \item The configuration used, 5 explorations with 10 simulations each for + cycle, does not allow for many good moves to be found. + +\end{itemize} + +To solve these problems, either a good score evaluation engine has to be +implemented, which is another machine learning problem by itself, or the +explorations and simulations of the engine tuned up to values unable to be run +on the development hardware. + +\subsection{Problems with Neural Networks} + +Some concerns with the design and results of the neural networks are discussed +on this section. + +\subsubsection{Finding the correct design} + +When approaching neural networks design with a basic understanding of their +inner workings it is easy to get lost in all the different factors that can be +tuned. The number and variant of layers, their number of nodes, their activation +function, the learning rate; they can all be tweaked and doing so in the favour +of the design requires a good knowledge of what is being done. + +The basic approach was to design the input and output shapes and then adjust the +network based on that. At first, the desired output was a 9x9 two-dimensional +matrix with the probabilities of playing on each move. A heatmap generated from +a failed solution of this approach can be seen on \fref{fig:oldHM}, where the +network distributed the probability for each row instead of the whole board. The +fitting of shapes of input, internal layers and output has found to be tricky at +first. + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{img/matches/oldLineByLineHeatmap.png} + \caption{A heatmap from an old design of the dense neural network.} + \label{fig:oldHM} + Note that each row adds up to one, meaning that the engine is selecting + the best move for each row and not for the whole board. + \end{center} +\end{figure} + +Finally, based on the design of classification networks where each possible +class is mapped to a value in the one-dimensional output vector, it was decided +that the output was not a two-dimensional matrix but a one-dimensional vector. +This also helped address the possibility of passing, treating it as just another +possible move that was previously left out because of not knowing how to fit it +on the matrix of possible moves. This vector is then reshaped to a matrix +representing the board when needed, for example to generate the heatmaps used +for evaluation. + +\subsubsection{Mistakes on Networks' Play Patterns} + +One notable mistake made by the networks, specially the convolutional network, +was passing to much. Passing is considered just another move, so the networks +have no grasp that they should not pass until there are no more valuable moves +to be made. A new design problem could be to create a specific passing policy. + +Another observation is that the engines seem to follow traditional openings and +make reasonable decisions at the start of the game, with some interesting +variations. But as the game progresses their moves make less sense. This could +be because they have seen many examples of starting moves but complications at +the middle and end game are much more diverse and the networks had not been +trained on similar situations. + +\subsection{Viable Extensions} + +There are several ways this project could be improved, but were left out by lack +of resources or time. Some interesting ones are explored on this section. + +\subsubsection{Joining Monte Carlo Tree Search and Neural Networks} + +The tree search algorithm is better at the end of the game, where the tree to +explore is much more manageable. On the other hand, the neural networks are +better at the beginning, where the positions are more common and patterns are +simpler. The logical next step would be to join both: the network could select +the best moves to be made, and the tree search algorithm could explore them to +select the best and grow its explored tree in meaningful directions. + +This is also what was done by AlphaGo. By seeing the results obtained in this +project it makes sense they made that decision. + +\subsubsection{Train the Engine by Itself} + +By training a game engine on human matches, it can at best get as good as the +human players it has learned from. + +Top engines learn not from humans but by playing against themselves. This would +be an interesting problem to tackle, if requiring a new approach to the design +of the networks and a lot of training resources. + +\subsubsection{Presenting the Engine to the World} + +Some Go servers allow for bot players to be registered. This would provide a +source of training against players from all around the world and also an +evaluation over time of the engine strength. diff --git a/doc/tex/imago.tex b/doc/tex/imago.tex new file mode 100644 index 0000000..cc77673 --- /dev/null +++ b/doc/tex/imago.tex @@ -0,0 +1,178 @@ +\documentclass[12pt]{article} + +\usepackage{geometry} +\usepackage{graphicx} +\usepackage{booktabs} +\usepackage{hyperref} +\usepackage{csquotes} +\usepackage{enumitem} +\usepackage[indent=20pt]{parskip} % Space between paragraphs +\usepackage{indentfirst} % Indent first paragraph of sections + +\geometry{left=3cm,top=2cm,bottom=2cm,right=3cm} + +\usepackage{chngcntr} + +\hypersetup{colorlinks=false, + linkcolor=black, + filecolor=black, + urlcolor=black, + bookmarks=true +} + +\urlstyle{mono} + +\usepackage[backend=biber, style=numeric, sorting=none]{biblatex} +\addbibresource{tex/biber.bib} + +\usepackage{minted} % Code importing and formatting +\setminted{linenos, breaklines} + +\newcommand{\program}{Imago} + +\newcommand{\fref}[1]{Fig.~\ref{#1}} +\newcommand{\flist}[1]{Listing~\ref{#1}} +%\newcommand{\uurl}[1]{\underline{\url{#1}}} + +\newcommand{\tabitem}{~~\llap{\textbullet}~~} + +\begin{document} + +\frenchspacing + +\title{ + \begin{center} + \includegraphics[width=0.4\textwidth]{img/logoUniovi.png}\hfill + \includegraphics[width=0.3\textwidth]{img/logoEII.png} + \end{center}~\\[10pt] + \program\\ + \large An AI player of the game of Go +} + +\author{Íñigo Gutiérrez Fernández} + +\date{} + +\maketitle + +\thispagestyle{empty} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.6\textwidth]{img/imago.jpg} + \end{center} +\end{figure} + +\clearpage + +\begin{abstract} + The game of Go presents a complex problem for machine learning by virtue of + containing a very wide and deep decision tree. This project has tried to + tackle the problem by using different decision algorithms and also provides + a full implementation of the game. These tools could be used by players and + developers as a foundation for other machine learning projects or to simply + keep studying the game. +\end{abstract} + +\clearpage + +\section*{Acknowledgements} + +To Vicente García Díaz, for helping me learning to program on my first year at +the school and directing me in this project on my last. + +To José Manuel Redondo López\cite{plantillaRedondo}, for making an extensive +template on which the structure of this documentation is extensively based. + +To all the people who have provided their time, support, ideas and company, all +fundamental for this project. + +\clearpage + +\section*{Copyright notice} + +\begin{displayquote} + + Copyright (C) 2021 \textbf{ÍÑIGO GUTIÉRREZ FERNÁNDEZ}. + + \textit{Permission is granted to copy, distribute and/or modify this + document under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; with the + Copyright notice as an Invariant Section, no Front-Cover Texts, and no + Back-Cover Texts. A copy of the license is included in the section + entitled ``GNU Free Documentation License''.} + +\end{displayquote} + +Although this document uses the GNU Free Documentation License to keep its +contained information free, bear in mind that it isn't software documentation or +a manual or guide of any kind, and serves just as the documentation for the +author's Degree's Final Project. + +This document is based on a template by José Manuel Redondo López, associate +professor of the University of Oviedo. The copyright notice he asks for +inclusion to use this template is included here. + +\begin{displayquote} + + Copyright (C) 2019 \textbf{JOSÉ MANUEL REDONDO + LÓPEZ}.\cite{plantillaRedondo} + + \textit{Permission is granted to copy, distribute and/or modify this + document under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; with no + Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy + of the license is included in the section entitled ``GNU Free + Documentation License''.} + +\end{displayquote} + +\clearpage + +\setcounter{secnumdepth}{3} +\setcounter{tocdepth}{4} +\tableofcontents + +\clearpage + +\counterwithin{figure}{section} +\renewcommand{\listfigurename}{Figures} +\listoffigures + +\clearpage + +\renewcommand{\listoflistingscaption}{Listings} +\counterwithin{listing}{section} +\listoflistings + +\clearpage + +\input{tex/introduction.tex} +\clearpage + +\input{tex/planification.tex} +\clearpage + +\input{tex/systemAnalysis.tex} +\clearpage + +\input{tex/systemDesign.tex} +\clearpage + +\input{tex/implementation.tex} +\clearpage + +\input{tex/results.tex} +\clearpage + +\input{tex/conclusions.tex} +\clearpage + +% References (bibliography) + +\setcounter{secnumdepth}{0} +\section{References} +\printbibliography[type=article,title={Cited articles},heading=subbibintoc]{} +\printbibliography[type=online,title={Online resources},heading=subbibintoc]{} + +\end{document} diff --git a/doc/tex/implementation.tex b/doc/tex/implementation.tex index 9e42313..d46e805 100644 --- a/doc/tex/implementation.tex +++ b/doc/tex/implementation.tex @@ -1,64 +1,121 @@ \section{Implementation} -\subsection{Engine} - -An engine implementing GTP.\@ It is designed to be used by a software controller -but can also be directly run, mostly for debugging purposes. Its design is shown -in \fref{fig:engine} - -\begin{figure}[h] - \begin{center} - \includegraphics[width=\textwidth]{diagrams/gtpEngine.png} - \caption{Design of the GTP engine.}\label{fig:engine} - \end{center} -\end{figure} - -\subsection{Modules} - -One module to store the state of the game and the game tree. One module to parse -moves. One module to read and write SGF files. Modules are shown in -\fref{fig:modules}. - -\begin{figure}[h] - \begin{center} - \includegraphics[width=\textwidth]{diagrams/modules.png} - \caption{Modules.}\label{fig:modules} - \end{center} -\end{figure} - -\subsection{Representation of a match} - -Strictly said, a match is composed of a series of moves. But since game review -and variants exploration is an important part of Go learing, \program{} allows -for navigation back and forth through the board states of a match and for new -variants to be created from each of these board states. Therefore, a match is -represented as a tree of moves. The state of the game must also be present so -liberties, captures and legality of moves can be addressed, so it is represented -with its own class, which holds a reference both to the game tree and the -current move. This classes and their relationship can be seen in -\fref{fig:gameRepresentation}. - -\begin{figure}[h] - \begin{center} - \includegraphics[width=0.7\textwidth]{diagrams/gameRepresentation.png} - \caption{A game is represented as a tree of moves.}\label{fig:gameRepresentation} - \end{center} -\end{figure} - -\subsection{SGF} - -To parse SGF files a lexer and parser have been implemented using PLY.\@ The -result of the parsing is an AST (Annotated Syntax Tree) reflecting the contents -of the text input, each node with zero or more properties, and with the ability -to convert themselves and their corresponding subtree into a GameTree. This is -done for the root node, since from the SGF specification there are some -properties only usable in the root node, like those which specify general game -information and properties such as rank of players or komi. These components are -shown in \fref{fig:sgfModule}. - -\begin{figure}[h] - \begin{center} - \includegraphics[width=\textwidth]{diagrams/sgfModule.png} - \caption{Components of the SGF file parsing module.}\label{fig:sgfModule} - \end{center} -\end{figure} +\subsection{Development Hardware} + +The development and evaluation machine is a Satellite L50-C laptop. Its +specifications are shown as a list for readability. + +\begin{itemize} + \item \textbf{CPU}: Intel i5-6200U, 2.800GHz + \item \textbf{Integrated GPU}: Intel Skylake GT2 + \item \textbf{Dedicated GPU}: Nvidia Geforce 930M + \item \textbf{RAM}: 8 GiB +\end{itemize} + +\subsection{Development Software} + +The tools selected for the development of the project and the documentation are +listed and explained on this section. All the tools used are either +free\cite{fsf_free} or open source software. The development machine runs 64 +bits Arch Linux as its operating system. + +\subsubsection{Language} + +The programming language of choice is Python\cite{python}. The rationale behind +this decision has been stated on Section \ref{sec:programmingLanguage}. It also +allows easy use of the Keras library for implementing neural networks. + +Various python libraries have been used to easy the development process or +assist in the analysis of results. These are: + +\paragraph{Keras/Tensorflow\cite{keras}} + +Tensorflow is a platform for machine learning which provides a diverse range of +tools, one of which is a python library for machine learning. + +Keras is a high-level API for Tensorflow allowing for the easy definition of +neural networks. It permits easily testing and comparing different network +layouts. + +\paragraph{NumPy\cite{numpy}} + +A scientific package for python providing a lot of mathematical tools. The most +interesting for this project are its capabilities to create and transform +matrices. + +\paragraph{Matplotlib\cite{matplotlib}} + +A python library for creating graphs and other visualizations. It is used to +show the likelihood of moves the neural networks of the project create from a +board configuration. + +\paragraph{PLY\cite{ply}} + +A tool for generating compilers in Python. It is an implementation of the lex +and yacc utilities, allowing to create lexers and parsers. It is used in the +project to create the SGF parser which transform SGF files to internal +representations of Go matches. + +\paragraph{Other utility libraries} + +These are some utility libraries commonly used for frequent programming tasks: + +\begin{itemize} + \item \textbf{sys}: to stop the execution of the program or access system info such + as primitives maximum values. + \item \textbf{os}: to interact with files. + \item \textbf{re}: to check strings with regular expressions. + \item \textbf{random}: to get random values, for example to obtain a random + item from a list. + \item \textbf{copy}: to obtain deep copies of multidimensional arrays. +\end{itemize} + +\subsubsection{Development Tools} + +\paragraph{Neovim\cite{neovim}} + +A text editor based on Vim\cite{vim}, providing its same functionality with +useful additions and defaults for modern computers and terminal emulators. With +some extensions and configuration it can become a powerful development +environment with a very fluid usage experience. That, and the fact that the +developer considers Vim's modal editing the best writing experience possible on +a computer, have made Neovim the editor of choice. + +%TODO: Write about neovim extensions +%\begin{itemize} +% \item FZF +% \item Extensions +% +%\end{itemize} + +\subsubsection{Documentation Tools} + +\paragraph{\LaTeX\cite{latex}} + +A typesetting system widely used in the investigation field, among others. It +allows for documentation like this text to be written in plain text and then +compiled to PDF or other formats, which permits keeping the source files of the +documentation small and organized plus other benefits of plain text such as +being able to use version control. + +\paragraph{PlantUML\cite{puml}} + +A program which creates diagrams from plain text files. PlantUML supports syntax +for many different sorts of diagrams, mainly but not only UML. It has been used +to generate the diagrams used in this text. + +\paragraph{Make} + +A tool for specifying and handling dependencies on a build system. It reads a +file, typically named ``Makefile'', containing which files are needed and on +which other files each of them depends, and then generates those files or +updates them if they already exist but their source files are newer than them. + +It has been used to generate this text from \LaTeX{} and PlantUML source files. +The contents of the Makefile with which this document has been compiled are +shown in \flist{code:makefile}. + +\begin{listing}[h] + \inputminted{make}{Makefile} + \caption{Documentation Makefile}\label{code:makefile} +\end{listing} diff --git a/doc/tex/interface.tex b/doc/tex/interface.tex deleted file mode 100644 index 9caa78d..0000000 --- a/doc/tex/interface.tex +++ /dev/null @@ -1,6 +0,0 @@ -\section{Interface} - -\subsection{Importing and exporting games} - -The format chosen to read and write games is SGF (Smart Game Format). It is a -widely used text format which allows for variants, comments and other metadata. diff --git a/doc/tex/introduction.tex b/doc/tex/introduction.tex index 6defbd9..fe5c205 100644 --- a/doc/tex/introduction.tex +++ b/doc/tex/introduction.tex @@ -12,5 +12,11 @@ game at heart, Go has nonetheless been interpreted as a stylized representation of fighting a war, settling a frontier, cornering a market, thrashing out an argument, or even of fortune-telling and prophecy. Go has always been one of the - most played games in the world. \parencite{sl_go} + most played games in the world.\cite{sl_go} \end{displayquote} + +As old and deep as Go is it has recently lived a revolution by the appearance of +artificial intelligences with superhuman strength. While not expecting to +achieve what a full team of developers and computer scientists at Google did, +this project aims to evaluate how an engine able to play the game of Go could be +created, implement such an engine and evaluate the results of the whole process. diff --git a/doc/tex/planification.tex b/doc/tex/planification.tex new file mode 100644 index 0000000..f0f7535 --- /dev/null +++ b/doc/tex/planification.tex @@ -0,0 +1,197 @@ +\section{Planning} + +\subsection{Driving Needs} + +As one of the deepest and most studied games in the world, Go presents a very +interesting problem for artificial intelligence. Implementing not only the +game's simple but subtle rules, but a system capable of playing it with a +satisfying level of skill, is a task worth of pursuing as an exercise on +software design, algorithmics and AI research. + +On the practical level, this project can be a foundation for the development of +different Go analysis algorithms by providing an existing engine to house them, +which can be of interest to Go players and software scientists alike. + +\subsection{Reach} + +Presented here are the ideal targets of the project. + +\begin{itemize} + \item An implementation of the game of Go, that is, a system for holding the + moves and variants of a match (a tree of moves) and the logic for the + game's rules. + \item An engine capable of analyzing board positions and generating strong + moves via various decision algorithms. + \item Either a GUI specifically developed for the project or an + implementation of an existing protocol so the engine can be used with + existing tools and GUIs. + \item A way for processing existing records of games, which are usually + recorded in the SGF format. +\end{itemize} + +\subsection{Project Stages} + +The project will be organized in several stages based on the different +components and needs. + +\subsubsection{Game Implementation} + +The rules of the game must be implemented, ideally in a way they can be tested +by direct human play. This system will at its bare minimum represent the +Japanese Go rules (area scoring, no superko rule, no suicide moves). + +\subsubsection{Engine Implementation} + +The key of this project is to create some kind of system able to generate strong +moves based on any given board configuration: this will be such system. It will +implement an existing protocol so it can be used with other compatible tools. It +has to be able to receive game updates and configuration and to output moves for +either player. It should also be modular enough so different algorithms can be +selected and tested against each other as an experimental search for the best of +them. + +\subsubsection{Artificial Intelligence Algorithms} + +Different algorithms for the engine to use should be implemented and tested. The +results of this development and testing process should be presented as part of +the final version of the project. + +\subsection{Logistics} + +The project will be developed by Íñigo Gutiérrez Fernández, student of the +Computer Software Engineering Degree at the University of Oviedo, with +supervision from Vicente García Díaz, Associate Professor in the Department of +Computer Science at the University of Oviedo. + +The used material consists of a development and testing machine owned by the +student with specifications stated later on the project plan. + +\subsection{Work Plan} + +The sole developer will be the student, who is currently working as a Junior +Software Engineer on a 35 hour per week schedule and with no university +responsibilities other than this project. Taking this into account, a sensible +initial assumption is that he will be able to work 3 hours a day, Monday to +Friday. Gantt diagrams for the planned working schedule are shown as +\fref{fig:planificationWorkPlanGame} and +\fref{fig:planificationWorkPlanEngine}. + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/planificationWorkPlanGame.png} + \caption{Initial work plan for implementing the game. + }\label{fig:planificationWorkPlanGame} + \end{center} +\end{figure} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/planificationWorkPlanEngine.png} + \caption{Initial work plan for implementing the engine and algorithms. + }\label{fig:planificationWorkPlanEngine} + \end{center} +\end{figure} + +\subsection{Previous Works} + +\subsubsection{Existing Engines} + +\paragraph{AlphaGo} + +A Go play and analysis engine developed by DeepMind Technologies, a company +owned by Google. It revolutionized the world of Go in 2015 and 2016 when it +respectively became the first AI to win against a professional Go player and +then won against Lee Sedol, a Korean player of the highest professional rank and +one of the strongest players in the world at the time. Its source code is +closed, but a paper\cite{natureAlphaGo2016} written by the team and +published on Nature is available on +https://storage.googleapis.com/deepmind-media/alphago/AlphaGoNaturePaper.pdf. + +The unprecedented success of AlphaGo served as inspiration for many AI projects, +including this one. + +\paragraph{KataGo\cite{katago}} + +An open source project based on the AlphaGo paper that also achieved superhuman +strength of play. The availability of its implementation and documentation +presents a great resource for this project. + +\paragraph{GnuGo\cite{gnugo}} + +A software capable of playing Go part of the GNU project. Although not a strong +engine anymore, it is interesting for historic reasons as the free software +engine for which the GTP protocol was first defined. + +\subsubsection{Existing Standards} + +\paragraph{GTP\cite{gtp}} + +GTP (\textit{Go Text Protocol}) is a text based protocol for +communication with computer go programs. It is the protocol used by GNU Go and +the more modern and powerful KataGo. By supporting GTP the engine developed for +this project can be used with existing GUIs and other programs, making it easier +to use it with the tools users are already familiar with. + +\paragraph{SGF\cite{sgf}} + +SGF (\textit{Smart Go Format} or, in a more general context, \textit{Smart Game +Format}) is a text format widely used for storing records of Go matches which +allows for variants, comments and other metadata. It was devised for Go but it +supports other games with similar turn-based structure. Many popular playing +tools use it. By supporting SGF vast existing collections of games, such as +those played on online Go servers, can be used to train the decision algorithms +based on neural networks. + +\subsubsection{Sabaki\cite{sabaki}} + +Sabaki is a go board software compatible with GTP engines. It can serve as a GUI +for the engine developed in this project and as an example of the advantages of +following a standardized protocol. + +\subsubsection{Keras\cite{keras}} + +Keras is a deep learning API for Python allowing for the high-level definition +of neural networks. This permits easily testing and comparing different network +layouts. + +\subsection{Technological Infrastructure} + +\subsubsection{Programming Language}\label{sec:programmingLanguage} + +The resulting product of this project will be one or more pieces of software +able to be run locally on a personal computer. The programming language of +choice is Python\cite{python}, for various reasons: + +\begin{itemize} + + \item It has established a reputation on scientific fields and more + specifically on AI research and development. + \item Interpreters are available for many platforms, which allows the most + people possible to access the product. + \item Although not very deeply, it has been used by the developer student + during its degree including in AI and game theory contexts. + +\end{itemize} + +\subsubsection{Interface} + +Both the game and the engine will offer a text interface. For the game this +allows for quick human testing. For the engine it is mandated by the protocol, +since GTP is a text based protocol for programs using text interfaces. +Independent programs compatible with this interface can be used as a GUI. + +There is also the need of an interface with SGF files so existing games can be +processed by the trainer. + +Both the engine and the trainer will need to interface with the files storing +the neural network models. + +The systems' interfaces are shown in \fref{fig:interfaces}. + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/interfaces.png} + \caption{Interfaces of the three components of the project.} + \label{fig:interfaces} + \end{center} +\end{figure} diff --git a/doc/tex/previousWorks.tex b/doc/tex/previousWorks.tex deleted file mode 100644 index 6ba5dce..0000000 --- a/doc/tex/previousWorks.tex +++ /dev/null @@ -1,21 +0,0 @@ -\section{Previous works} - -\subsection{SGF} - -SGF (\textit{Smart Go Format} or \textit{Smart Game Format}) is a text file -format specification for records of games or even collections of them. It was -devised for Go but it supports other games with similar turns structure. It -supports move variations, annotations, setups and game metadata. By supporting -SGF, our application can be used to analyse existing games registered by other -applications, such as those played on online Go servers. - -The SGF specification can be found at -\url{https://www.red-bean.com/sgf/user_guide/index.html} - -\subsection{GTP} - -GTP (\textit{Go Text Protocol}) is a text based protocol for communication with -computer go programs. [ref https://www.lysator.liu.se/~gunnar/gtp/] It is the -protocol used by GNU Go and the more modern and powerful KataGo. By supporting -GTP, our application can be used with existing GUIs and other programs, making -it easier to use it with the tools users are already familiar with. diff --git a/doc/tex/results.tex b/doc/tex/results.tex new file mode 100644 index 0000000..3c586e4 --- /dev/null +++ b/doc/tex/results.tex @@ -0,0 +1,416 @@ +\newcommand{\boards}[5]{ +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.4\textwidth]{#1} + \includegraphics[width=0.4\textwidth]{#2} + \caption{#3} + \label{#4} + #5 + \end{center} +\end{figure} +} + +\newcommand{\moveHeatmap}[9]{ +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.35\textwidth]{#1} + \includegraphics[width=0.5\textwidth]{#2} + \\[\smallskipamount] + \includegraphics[width=0.35\textwidth]{#3} + \includegraphics[width=0.5\textwidth]{#4} + \\[\smallskipamount] + \includegraphics[width=0.35\textwidth]{#5} + \includegraphics[width=0.5\textwidth]{#6} + \caption{#7} + \label{#8} + #9 + \end{center} +\end{figure} +} + +\section{Results} + +\subsection{Monte Carlo Tree Search Evaluation} + +The Monte Carlo Algorithm tries to explore the tree of possibilities as +efficiently as possible. With this approach, it can be expected to fail when +alone on a problem such big as the game of Go. Nonetheless, there are some areas +where it can be useful. It will be evaluated by its capabilities while playing +games but also when presented with go problems. + +The Monte Carlo algorithm has been set to do 5 explorations with 10 simulations +each when it is asked for a move. In the hardware used this makes it think for +40 seconds on an empty 9x9 board. + +\subsubsection{Monte Carlo Tree Search against itself} + +\boards +{img/matches/mctsVSmcts01.jpg} +{img/matches/mctsVSmcts02.jpg} +{Monte Carlo Tree Search against itself.} +{fig:mctsVSmcts} +{Both boards are from the same match. The first shows only the first 12 moves +for clarity. The second shows the finished game.} + +When set to play against itself the algorithm shows what looks like random play. +Some moves could be debated as sensible, but many of them are on the second or +fist line, which so early on the game are the two worst places to play at. + +It seems clear that this algorithm, at least with this specific configuration, +can't handle the size of an empty Go board, even on the 9x9 size. + +A record of the game is shown in \fref{fig:mctsVSmcts}. + +\subsubsection{Monte Carlo Tree Search against Go problems} + +\boards +{img/problems/mctsProblem01.jpg} +{img/problems/mctsProblem02.jpg} +{Monte Carlo against the first of Cho Chikun's problems.} +{fig:mctsProblem01} + +Since tree exploration on smaller boards or advanced games with little empty +spaces should be easier the algorithm has also been tested on some Go problems. + +A Go problem or tsumego is a predefined layout of the board, or of a section of +the board, for which the player must find some beneficial move. Life and death +problems are a subset of tsumegos in which the survival of a group depends on +finding the correct sequence to save or kill the group. One collection of such +tsumegos is \textit{Cho Chikun's Encyclopedia of Life and Death}, part of which +are available on OGS\cite{ogsLifeAndDeath}, an online go server. + +The first of these problems and what the algorithm suggested as moves is shown +in \fref{fig:mctsProblem01}. + +Black makes the first move, which means the solution is to find some favorable +outcome for black, which in this case is killing the white group. The white +group has a critical point on B1. If white plays on B1 they make two eyes and +live, but if black plays there first white can't make two eyes and dies, so B1 +is the solution to the tsumego. This is one of the easiest Go problems. + +The algorithm neglects this solution. While asked five times to generate a move +for the starting position it suggested B1 only once. + +But notably, after making another move, it consistently suggested B1 for white, +which is the solution now that white has to play. So in the end it was able to +solve the tsumego, probably because after making a move it had already explored +part of the tree but it was difficult that it explored the solution for the +first move. + +The engine was tested against other tsumegos but it was not able to solve them, +so no more are shown here. + +\subsection{Neural Network Training} + +Each network has been training by receiving batches of SGF files which are first +converted to lists of moves by the Training System and then provided to the +train function of the networks. This has been executed with the following +command: + +\inputminted[fontsize=\small]{bash}{listings/trainCommand.sh} + +Which lists the contents of a folder containing multiple SGF files, shuffles the +list, takes some of them and passes them to train.py as arguments. The combined +list of game positions and moves made from them in all the games selected make +up the sample of the training. The number of files selected can of course be +adapted to the situation. + +The networks were configured to train for 20 epochs, that is, processing the +full batch and updating the weights on the network 20 times. 10\% of the sample +were used as validation. This means they were not used for training but to +check the accuracy and loss of the network after training with the rest of +the batch. This technique of validation can help detect overfitting, which +happens when a network does a very good job on the population it has been +trained on but it fails when receiving unseen data. + +The training shows loss decrementing and accuracy incrementing on each epoch for +both training and validation samples, which suggests there is learning happening +and no overfitting occurring. + +The number of files to train on has been selected by trying to keep the +resulting training time manageable for the developer. It was found that +incrementing the files per batch did not increment the training time linearly, +as with batches of 10 games an epoch of training could be completed in one +minute, in three minutes batches of 20 games, and in up to an hour with batches +of 100 games. + +The outputs from this training process can be seen on \flist{code:denseTraining} +and \flist{code:convTraining} for the dense network and the convolutional +network respectively. + +\begin{listing}[h] + \inputminted[fontsize=\scriptsize]{text}{listings/denseTraining.txt} + \caption{Dense neural network training log.} + \label{code:denseTraining} +\end{listing} + +\begin{listing}[h] + \inputminted[fontsize=\scriptsize]{text}{listings/convTraining.txt} + \caption{Convolutional neural network training log.} + \label{code:convTraining} +\end{listing} + +\subsection{Neural Network Evaluation} + +One way of studying the results of the training could be to programmatically +test if the network is able to predict human moves when provided with a game +file. This has the drawback of not directly testing the strength of the network, +since it would be tested on predicting the moves of some human player, whose +level could greatly vary, and not on its playing capabilities alone. This also +has already been tested to some extent by the validation portion of the samples +providing for training, and the results show the network increasing its accuracy +of predicting human moves through the training epochs. + +Another way is to make the network play either against itself, another network, +or a human player, and analyze the score it provides to each possible play. The +output of the network is a vector with the likelihood of making each move, so +we can take this values as how strongly the engine suggests each of them. + +\subsection{Neural Networks Against Themselves} + +The case of neural networks playing against themselves is interesting in that it +shows moves with high certainty, that is, the network strongly suggests a +specific move. It can be thought that this would be the most common line of play +the engine has seen, with one certain move leading into another. + +This happened for both the dense neural network and the convolutional network. + +\subsubsection{Dense network Against Itself} + +The initial moves of a match of the dense network against itself, along with +heatmaps showing the output from the network (which chance it gives for each +move), can be seen on Figs.~\ref{fig:denseVSdense01}, \ref{fig:denseVSdense02}, +\ref{fig:denseVSdense03} and \ref{fig:denseVSdense04}. + +The dense network starts on the center of the board, which is one of the +standard openings in the 9x9 board. It starts on a very good track, but we must +acknowledge that the empty board is a position present on every go match it has +trained on and so it should know it well. It probably means the center was the +most played opening in the sample. It is interesting to check the heatmap of +this move, since the selected move has only a score of 0.27. Other common +openings on the 9x9 are represented on the vertices with some significant score. + +The heatmap of the response to the center play is also interesting. The four +highest scored moves are equivalent, since the first move has not broken any +symmetry. The move they represent, two spaces from the first stone, is also the +consensual response. The three moves following in score are also equivalent +between them. + +The first white stone has broken the symmetry in one direction, and so the +second black move is suggested with more likelihood than the previous one. This +is also a very common sequence of play. + +The following moves are a white attack followed by black's sensible response, +and a strong white extension (arguably to the wrong side, but the minutia of why +the other side would be better is probably not grasped by the engine). The +following moves could be called looking for complication, which is a strategy. + +Overall some moves could be criticised but the engine is showing at least a +basic knowledge of the game's strategy. + +\subsubsection{Convolutional Network Against Itself} + +The same information on a match of the convolutional network against itself can +be seen on Figs.~\ref{fig:convVSconv01}, \ref{fig:convVSconv02}, +\ref{fig:convVSconv03} and \ref{fig:convVSconv04}. + +The convolutional network also likes to start on the center of the board and is +more certain of the move than the dense network. Its next two plays are also the +same, but the match starts to diverge from the fourth move. + +Instead of attacking the black stone on the side, white plays directly under at +B5, keeping the edge of symmetry. This is strategically important because while +symmetry is present playing on either side makes no difference, while playing +after the opponent has broken symmetry gives the opportunity to respond and +capitalize on the now better side to play. + +Black responds to the white stone by attacking it at B4 and white then extends +to the more open side with F7. + +The match up to this point is discussed on Sensei's Library as an interesting +sequence deriving from the center first stone\cite{sl_sword}. The discussion +there states that white should have extended to the other side. + +The following moves are some sensible continuations, with approaches and +responses on each side. They are probably not the best moves, but the idea +behind them can be read from the board. + +\subsubsection{Dense Network Against Convolutional} + +The first 12 and the last 6 moves of a match of the dense network against the +convolutional network can be seen on be seen on Figs.~\ref{fig:denseVSconv01}, +\ref{fig:denseVSconv02}, \ref{fig:denseVSconv03}, \ref{fig:denseVSconv04}, +\ref{fig:denseVSconv05} and \ref{fig:denseVSconv06}. + +The dense network is given black. The convolutional network plays then as white. + +The first three moves are known territory for both networks, since both play the +same sequence when matched against themselves. + +After white plays under the black stone as the fourth move, black makes a maybe +too careful extension, but it seemed very sure about it. Now white has some +doubts. It seems to like the extensions to both sides, if a bit more to the more +open side, a preference both engines have previously shown at this point of the +match. The move it prefers the most, though, would be to play again under black. +So, as this move is impossible, it defers to the extension to the open side with +F3. + +Black now makes the mistake of overattacking the lone white stone on B5 with B6, +and white capitalizes on it by keeping extending with D3. B5 is still a move +with a not negligible score, but it is no longer the highest. + +The following moves are questionable, with black keeping building a very tight +group and getting no territory and white attacking too directly with a lone +stone into that strong group. + +In the final moves of the game we see that black has built a living group on the +right side and things are looking good for it on the rest of the board. White +has passed a bunch of times, starting too early, which means it gives too high +an importance to passing. Since the networks learned only by seeing moves in +response to boards, it has not learnt that it should not pass until there is no +more to gain. But it is also important to note that passing was not the most +likely move for the engine to make, it just happens that the move it would like +to make is already taken and the highest chance available move is to pass. + +\moveHeatmap +{img/matches/denseVSdense_board_01.jpg} +{img/matches/denseVSdense_heatmap_01.png} +{img/matches/denseVSdense_board_02.jpg} +{img/matches/denseVSdense_heatmap_02.png} +{img/matches/denseVSdense_board_03.jpg} +{img/matches/denseVSdense_heatmap_03.png} +{Dense network against itself, moves 1 through 3.} +{fig:denseVSdense01} + +\moveHeatmap +{img/matches/denseVSdense_board_04.jpg} +{img/matches/denseVSdense_heatmap_04.png} +{img/matches/denseVSdense_board_05.jpg} +{img/matches/denseVSdense_heatmap_05.png} +{img/matches/denseVSdense_board_06.jpg} +{img/matches/denseVSdense_heatmap_06.png} +{Dense network against itself, moves 4 through 6.} +{fig:denseVSdense02} + +\moveHeatmap +{img/matches/denseVSdense_board_07.jpg} +{img/matches/denseVSdense_heatmap_07.png} +{img/matches/denseVSdense_board_08.jpg} +{img/matches/denseVSdense_heatmap_08.png} +{img/matches/denseVSdense_board_09.jpg} +{img/matches/denseVSdense_heatmap_09.png} +{Dense network against itself, moves 7 through 9.} +{fig:denseVSdense03} + +\moveHeatmap +{img/matches/denseVSdense_board_10.jpg} +{img/matches/denseVSdense_heatmap_10.png} +{img/matches/denseVSdense_board_11.jpg} +{img/matches/denseVSdense_heatmap_11.png} +{img/matches/denseVSdense_board_12.jpg} +{img/matches/denseVSdense_heatmap_12.png} +{Dense network against itself, moves 10 through 12.} +{fig:denseVSdense04} + +\moveHeatmap +{img/matches/convVSconv_board_01.jpg} +{img/matches/convVSconv_heatmap_01.png} +{img/matches/convVSconv_board_02.jpg} +{img/matches/convVSconv_heatmap_02.png} +{img/matches/convVSconv_board_03.jpg} +{img/matches/convVSconv_heatmap_03.png} +{Convolutional network against itself, moves 1 through 3.} +{fig:convVSconv01} + +\moveHeatmap +{img/matches/convVSconv_board_04.jpg} +{img/matches/convVSconv_heatmap_04.png} +{img/matches/convVSconv_board_05.jpg} +{img/matches/convVSconv_heatmap_05.png} +{img/matches/convVSconv_board_06.jpg} +{img/matches/convVSconv_heatmap_06.png} +{Convolutional network against itself, moves 4 through 6.} +{fig:convVSconv02} + +\moveHeatmap +{img/matches/convVSconv_board_07.jpg} +{img/matches/convVSconv_heatmap_07.png} +{img/matches/convVSconv_board_08.jpg} +{img/matches/convVSconv_heatmap_08.png} +{img/matches/convVSconv_board_09.jpg} +{img/matches/convVSconv_heatmap_09.png} +{Convolutional network against itself, moves 7 through 9.} +{fig:convVSconv03} + +\moveHeatmap +{img/matches/convVSconv_board_10.jpg} +{img/matches/convVSconv_heatmap_10.png} +{img/matches/convVSconv_board_11.jpg} +{img/matches/convVSconv_heatmap_11.png} +{img/matches/convVSconv_board_12.jpg} +{img/matches/convVSconv_heatmap_12.png} +{Convolutional network against itself, moves 10 through 12.} +{fig:convVSconv04} + +\moveHeatmap +{img/matches/denseVSconv_board_01.jpg} +{img/matches/denseVSconv_heatmap_01.png} +{img/matches/denseVSconv_board_02.jpg} +{img/matches/denseVSconv_heatmap_02.png} +{img/matches/denseVSconv_board_03.jpg} +{img/matches/denseVSconv_heatmap_03.png} +{Dense network against convolutional, moves 1 through 3.} +{fig:denseVSconv01} +{The dense network plays as black.} + +\moveHeatmap +{img/matches/denseVSconv_board_04.jpg} +{img/matches/denseVSconv_heatmap_04.png} +{img/matches/denseVSconv_board_05.jpg} +{img/matches/denseVSconv_heatmap_05.png} +{img/matches/denseVSconv_board_06.jpg} +{img/matches/denseVSconv_heatmap_06.png} +{Dense network against convolutional, moves 4 through 6.} +{fig:denseVSconv02} + +\moveHeatmap +{img/matches/denseVSconv_board_07.jpg} +{img/matches/denseVSconv_heatmap_07.png} +{img/matches/denseVSconv_board_08.jpg} +{img/matches/denseVSconv_heatmap_08.png} +{img/matches/denseVSconv_board_09.jpg} +{img/matches/denseVSconv_heatmap_09.png} +{Dense network against convolutional, moves 7 through 9.} +{fig:denseVSconv03} + +\moveHeatmap +{img/matches/denseVSconv_board_10.jpg} +{img/matches/denseVSconv_heatmap_10.png} +{img/matches/denseVSconv_board_11.jpg} +{img/matches/denseVSconv_heatmap_11.png} +{img/matches/denseVSconv_board_12.jpg} +{img/matches/denseVSconv_heatmap_12.png} +{Dense network against convolutional, moves 10 through 12.} +{fig:denseVSconv04} + +\moveHeatmap +{img/matches/denseVSconv_board_72.jpg} +{img/matches/denseVSconv_heatmap_72.png} +{img/matches/denseVSconv_board_73.jpg} +{img/matches/denseVSconv_heatmap_73.png} +{img/matches/denseVSconv_board_74.jpg} +{img/matches/denseVSconv_heatmap_74.png} +{Dense network against convolutional, moves 72 through 74.} +{fig:denseVSconv05} + +\moveHeatmap +{img/matches/denseVSconv_board_75.jpg} +{img/matches/denseVSconv_heatmap_75.png} +{img/matches/denseVSconv_board_76.jpg} +{img/matches/denseVSconv_heatmap_76.png} +{img/matches/denseVSconv_board_77.jpg} +{img/matches/denseVSconv_heatmap_77.png} +{Dense network against convolutional, moves 75 through 77.} +{fig:denseVSconv06} + +% Needed line so the processing of the last command doesn't find an EOF diff --git a/doc/tex/systemAnalysis.tex b/doc/tex/systemAnalysis.tex new file mode 100644 index 0000000..c3c6aa8 --- /dev/null +++ b/doc/tex/systemAnalysis.tex @@ -0,0 +1,892 @@ +\section{System Analysis} + +\subsection{System Reach Determination} + +These are the main goals the final product must reach. + +\begin{enumerate} + + \item The implementation, analysis and comparison of different decision + algorithms for genarating moves. This is the main goal and the following + ones are derived from the need of reaching it. + + \item A library for representing the game of Go. It can be used for the + decision algorithms to keep the state of the game and can also be used + in an interactive application for a user to play the game and test the + code. + + \item An engine program as a way of presenting an interface for using these + algorithms. The engine will use the GTP so it can be used with an + existing GUI or other tools. + + \item A parser for SGF files so they can be processed in the training of + neural networks. + +\end{enumerate} + +\subsection{System Requirements} + +The requirements for the system are expressed here in a nested list way, each of +them with a textual and numeric reference for them to be traceable. The +functional requirements are exposed first, followed by the other kinds of +requisites needed for the system. + +\setlist[enumerate,2]{label*=\arabic*.} +\setlist[enumerate,3]{label*=\arabic*.} + +\subsubsection{Functional Requirements} + +\paragraph{Game Requirements} + +\setlist[enumerate,1]{label=FRG \arabic*.} + +\begin{enumerate} + + \item The game program is interactive. + + \item Movements can be introduced to be played on the board. + \begin{enumerate} + \item A move is introduced as the textual representation of the + coordinates of the vertex to play on or as ``pass''. + \begin{enumerate} + \item The text introduced for the move must follow the + regular expression \texttt{([A-Z][0-9]+|pass)} + \item If the move is not valid it must be notified to the + user and another move asked for. + \end{enumerate} + \end{enumerate} + + \item The state of the board can be shown to the user. + \begin{enumerate} + \item A text representation of each cell is printed. + \begin{enumerate} + \item A different character is used for each different state + of a cell. + \end{enumerate} + \item The coordinates system is shown around the board. + \begin{enumerate} + \item Columns are shown as capital letters left to right + starting with ``A'' and skipping ``I''. + \item Rows are shown as numbers starting with 1 on the + lowest row and increasing upwards. + \end{enumerate} + \end{enumerate} + + \item The board will behave according to the Japanese rules of Go. + +\end{enumerate} + +\paragraph{Engine Requirements} + +\setlist[enumerate,1]{label=FRE \arabic*.} + +\begin{enumerate} + + \item The engine program is interactive. + + \item The engine implements the GTP (\textit{Go Text Protocol}) for its + interface. + \begin{enumerate} + \item Commands are read from standard input. + \item Responses are provided via standard output. + \item There exist commands to set up the conditions of the match. + \begin{enumerate} + \item The size of the board can be set. + \item The komi can be set. + \end{enumerate} + \item There exist commands to manipulate the internal representation + of the match. + \begin{enumerate} + \item It is possible to indicate a move being played. + \item It is possible to clear the board. + \end{enumerate} + \item There exists a command to generate a move. + \begin{enumerate} + \item The generated move must be a playable move. + \item Generating a move does not change the internal + representation of the match. + \end{enumerate} + \item There exist commands to ask for information about the engine. + \begin{enumerate} + \item It is possible to ask for the protocol version + implemented. + \item It is possible to ask for the name of the engine. + \item It is possible to ask for the version of the engine. + \item It is possible to ask whether a specific command is + known to the engine. + \item It is possible to ask for a list of the known commands. + \end{enumerate} + \item There exists a command to stop the engine. + \end{enumerate} + + \item The engine can be executed from the command line. + \begin{enumerate} + \item The engine can be executed directly from an interactive shell. + \item The engine can be executed by another program to be used as + backend. + \end{enumerate} + +\end{enumerate} + +\paragraph{Trainer Requirements} + +\setlist[enumerate,1]{label=FRT \arabic*.} + +\begin{enumerate} + + \item The trainer program is non-interactive. + + \item The trainer can be executed from the command line. + \begin{enumerate} + \item The trainer can be executed directly from an interactive shell. + \end{enumerate} + + \item The trainer can interact with stored neural network models. + \begin{enumerate} + \item The trainer can read stored models to continue training them. + \item The trainer can store model files after their training. + \end{enumerate} + + \item The trainer can import existing games. + \begin{enumerate} + \item Records of games stored as SGF can be imported. + \item Files containing records of games are provided as arguments to + the trainer. + \end{enumerate} + +\end{enumerate} + +%\subsubsection{Security Requirements} +% +%\setlist[enumerate,1]{label=SR \arabic*.} + +\subsubsection{Usability Requirements} + +\setlist[enumerate,1]{label=UR \arabic*.} + +\begin{enumerate} + + %TODO: Implement this + \item The engine executable will include a help option with the different + modes of execution. + +\end{enumerate} + +\subsubsection{User Requirements} + +\setlist[enumerate,1]{label=USR \arabic*.} + +\begin{enumerate} + + \item For understanding the workings of the application the user needs to be + familiar with the basics of the game of Go. + + \item For directly using the engine the user needs to be familiar with + command line interfaces. + + \item For directly using the trainer the user needs to know the different + network models available. + +\end{enumerate} + +\subsubsection{Technological Requirements} + +\setlist[enumerate,1]{label=TR \arabic*.} + +\begin{enumerate} + + \item The game program will be a python file able to be executed by the + python interpreter. + + \item The game program will make use of standard input and standard output + for communication. + \begin{enumerate} + \item Standard input will be used for reading moves. + \item Standard output will be used for showing the board. + \item Standard output will be used for messages directed to the user. + \end{enumerate} + + \item The engine program will be a python file able to be executed by the + python interpreter. + + \item The engine program will make use of standard input and standard output + for communication. + \begin{enumerate} + \item Standard input will be used for reading commands. + \item Standard output will be used for showing the result of + commands. + \end{enumerate} + + \item The trainer program will be a python file able to be executed by the + python interpreter. + + \item The engine program will make use of standard input and standard output + for communication. + \begin{enumerate} + \item Standard input will be used for reading commands. + \item Standard output will be used for showing the result of + commands. + \end{enumerate} + +\end{enumerate} + +\subsubsection{Response Time Requirements} + +\setlist[enumerate,1]{label=RTR \arabic*.} + +\begin{enumerate} + +%TODO: Check and update this to something feasible + \item The maximum thinking time of the engine will be configurable. + \begin{enumerate} + \item It will be possible to pass the maximum time as a launch + argument. + \item It will be possible to store the maximum time as a setting + in a configuration file. + \end{enumerate} + +\end{enumerate} + +\setlist[enumerate,1]{label=\arabic*.} + +\subsection{Subsystems} + +There will be three main subsystems. + +\subsubsection{Game System} + +The Game System will be in charge of storing all the state information regarding +a Go match, such as the history of moves, the possible variations, the state of +the board at any given time or the current number of captured stones. + +This system will include a command-line interface with which to play Go matches +between human players to show and test its capabilities. + +\subsubsection{Engine System} + +The Engine System will implement the GTP interface and use the Game System to +analyse positions and generate moves via decision algorithms. + +This system can be directly called to manually set up game states and ask for +moves or can be called by other programs which use GTP to be used as backend for +playing matches against a computer player. + +\subsubsection{Training System} + +The Training System will process SGF files storing records of games, train the +neural network models over those games and store the result. These models can +then be imported by the engine and be used to generate moves. + +\subsubsection{Interface Between Subsystems} + +The Training System depends on the NeuralNetwork interface of the Engine System +and uses it to train and store the neural network models. + +Both the Engine and Training systems depend on the GameMove class of the Game +System. The Engine System uses it to store the state of a game and provide it +to the decision algorithms. The Training System uses it to create the internal +representation of a game resulting from the processing of an SGF file. + +\subsection{Class Analysis} + +\subsubsection{Class Diagram} + +The classes resulting from the analysis phase are shown in +\fref{fig:analysisClasses}. + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/analysisClasses.png} + \caption{General classes obtained from the analysis + phase.}\label{fig:analysisClasses} + \end{center} +\end{figure} + +\subsubsection{Class Description} + +\newcommand{\interclassSpace}{30pt} + +\paragraph{Engine System} +\indent \\ + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{EngineIO} \\ + \midrule + \textbf{Description} \\ + Offers the interface with the engine. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Read input.} \\ + \tabitem{Do some preprocessing.} \\ + \tabitem{Forward commands to the engine logic component.} \\ + \midrule + \textbf{Proposed attributes} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{start()}: Starts reading standard input in a loop.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{EngineLogic} \\ + \midrule + \textbf{Description} \\ + Does the core logic and connects the different components of the engine. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Processes the commands and arguments forwarded by the IO + component.} \\ + \tabitem{Handles the logic of the game by using components from the game + module.} \\ + \tabitem{Calls a decision algorithm to generate moves.} \\ + \midrule + \textbf{Proposed attributes} \\ + \midrule + \textbf{Proposed methods} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\newcommand{\decisionAlgorithmMethods}{ + \tabitem{\textbf{pickMove()}: Gives a move to play.} \\ + \tabitem{\textbf{forceNextMove(coords)}: Notifies the system of a played + move so it can update its state accordingly.} \\ + \tabitem{\textbf{clearBoard()}: Empties the move history. The algorithm will + now generate moves as from a new game.} \\ +} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{DecisionAlgorithm} \\ + \midrule + \textbf{Description} \\ + Interface for the decision algorithms to be used by the engine. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Analyzing game states and generating moves.} \\ + \midrule + \textbf{Proposed attributes} \\ + \textit{(Depends on the algorithm.)} \\ + \midrule + \textbf{Proposed methods} \\ + \decisionAlgorithmMethods + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{MonteCarloTreeSearch} \\ + \midrule + \textbf{Description} \\ + Implements the Monte Carlo Tree Search algorithm for exploring the tree of + the game and deciding on a move. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Analyzing game states and generating moves.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{root}: The root node of a tree representing of the current + game state and the explored possible moves from it.} \\ + \midrule + \textbf{Proposed methods} \\ + \decisionAlgorithmMethods + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{MCTSNode} \\ + \midrule + \textbf{Description} \\ + A node of the tree used by the Monte Carlo Tree Search algorithm. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Storing a specific state of a match.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{visits}: How many times the node has been visited.} \\ + \tabitem{\textbf{score}: The number of explorations of the node resulting in + victory.} \\ + \tabitem{\textbf{move}: A GameMove for accessing game state and logic.} \\ + \tabitem{\textbf{parent}: This node's parent in the tree.} \\ + \tabitem{\textbf{children}: The nodes following from this node in the tree.} + \\ + \tabitem{\textbf{unexploredVertices}: The plays which could be explored from + this node.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{ucbForPlayer()}: Computes the Upper Confidence Bound of the + node from the perspective of the player making the move stored in the node.} + \\ + \tabitem{\textbf{selection()}: Monte Carlo Tree Search selection step. + Selects the most promising node which still has some unexplored children.} + \\ + \tabitem{\textbf{expansion()}: Monte Carlo Tree Search expansion step. Picks + an unexplored vertex from the node and adds it as a new MCTSNode.} \\ + \tabitem{\textbf{expansionForCoords()}: Performs an expansion for the given + coordinates. This represents forcing a move on the algorithm.} \\ + \tabitem{\textbf{simulation()}: Play random matches to accumulate reward + information on the node.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{Keras} \\ + \midrule + \textbf{Description} \\ + Implements the DecisionAlgorithm interface to give access to a neural + network. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Analyzing game states and generating moves.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{currentMove}: A GameMove for accessing game state and + logic.} \\ + \tabitem{\textbf{neuralNetwork}: A NeuralNetwork instance for generating + moves.} \\ + \midrule + \textbf{Proposed methods} \\ + \decisionAlgorithmMethods + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{NeuralNetwork} \\ + \midrule + \textbf{Description} \\ + Manages the neural networks used by the engine. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Analyzing game states and generating moves.} \\ + \tabitem{Generating a new neural network.} \\ + \tabitem{Loading a model file to use an existing trained neural network.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{currentMove}: A GameMove for accessing game state and + logic.} \\ + \tabitem{\textbf{neuralNetwork}: A NeuralNetwork instance for generating + moves.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{pickMove()}: Uses the current internal model to pick a move + given a game state.} \\ + \tabitem{\textbf{trainModel()}: Receives a list of games, with each game + being a list of moves, and trains the network on them.} \\ + \tabitem{\textbf{saveModel()}: Saves the current internal neural network + model.} \\ + \tabitem{\textbf{saveHeatmap()}: Saves an image of a heatmap of move + likelihood.} \\ + \tabitem{\textbf{saveModelPlot()}: Saves an image of a plot of the model + configuration.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\paragraph{Game System} + +\indent \\ + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{GameState} \\ + \midrule + \textbf{Description} \\ + Stores the state of a match. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Store state information.} \\ + \tabitem{Offer methods to manipulate the game state.} \\ + \midrule + \textbf{Proposed attributes} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{getCurrentPlayer()}: Returns the player who should play + next.} \\ + \tabitem{\textbf{playMove()}: Play a move in the specified coordinates + for the specified player.} \\ + \tabitem{\textbf{playMoveForPlayer()}: Play a move in the specified coordinates + for the player who should play next.} \\ + \tabitem{\textbf{undo()}: Reset the state to how it was just before the + last move was played.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{GameBoard} \\ + \midrule + \textbf{Description} \\ + Stores the state of a board position and handles its logic. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Store the vertices of a board position.} \\ + \tabitem{Logic related to a board position.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{board}: An array of the stones on the board.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{getGroupLiberties()}: Returns a set with the empty vertices + adjacent to the group occupying a vertex.} \\ + \tabitem{\textbf{getGroupLibertiesCount()}: Returns the number of liberties + of the group occupying a vertex.} \\ + \tabitem{\textbf{getGroupVertices()}: Returns a set with the vertices of the + group occupying a vertex.} \\ + \tabitem{\textbf{getGroupVerticesCount()}: Returns the number of stones of + the group occupying a vertex.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{GameMove} \\ + \midrule + \textbf{Description} \\ + Stores information about a specific move and its relationships to the + previous and next moves. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Store information about a move (board, player, coordinates\ldots).} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{board}: The board as of this move.} \\ + \tabitem{\textbf{nextMoves}: The list of moves played after this + one. Different moves represent different game variations.} \\ + \tabitem{\textbf{previousMove}: The move before this one.} \\ + \tabitem{\textbf{isPass}: True if the move is a pass and not a stone + placement.} \\ + \tabitem{\textbf{coords}: The coordinates of the board the move was + played at. Has no meaning if \textbf{isPass} is true.} \\ + \tabitem{\textbf{playerWhoPassed}: The player who made this move. Has no + meaning if \textbf{isPass} is false, since the player can be obtained from + the coordinates of the move when it is not a pass.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{getRow()}: Returns the row the move was played at.} \\ + \tabitem{\textbf{getCol()}: Returns the col the move was played at.} \\ + \tabitem{\textbf{getPlayer()}: Returns the player who played the move.} \\ + \tabitem{\textbf{getNextPlayer()}: Returns the player who should play after + this move.} \\ + \tabitem{\textbf{getGameLength()}: Returns the number of moves the game has + had.} \\ + \tabitem{\textbf{getPlayableVertices()}: Returns the legal vertices for the + next move.} \\ + \tabitem{\textbf{addMove()}: Inserts a new children move for the given + coordinates and for the player who should make the next move.} \\ + \tabitem{\textbf{addMoveForPlayer()}: Inserts a new children move for the given + coordinates and player.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{GameBoard} \\ + \midrule + \textbf{Description} \\ + Represents a board. Contains played stones and the amount of captures made + by each player. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Store a specific layout of stones in the board.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{board}: An array containing the stone layout.} \\ + \tabitem{\textbf{capturesBlack}: The stones captured by black before the + position.} \\ + \tabitem{\textbf{capturesWhite}: The stones captured by white before the + position.} \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{getBoardHeight()}: Returns the number of rows of the board.} \\ + \tabitem{\textbf{getBoardWidth()}: Returns the number of columns of the board.} \\ + \tabitem{\textbf{getGroupLiberties()}: Returns a list with the empty + vertices adjacent to the group occupying a vertex.} \\ + \tabitem{\textbf{getGroupVertices()}: Returns a list with the vertices + occupied by the group occupying a vertex.} \\ + \tabitem{\textbf{moveAndCapture()}: Makes a move and captures the + corresponding stones if the move results in the capture of a group.} \\ + \tabitem{\textbf{score()}: Gets the current score based on the already + surrounded territory. This follows Japanese rules.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\paragraph{Training System} + +\indent \\ + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{Trainer} \\ + \midrule + \textbf{Description} \\ + Provides the neural networks with moves to train on. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Obtain moves from stored records of matches.} \\ + \tabitem{Provide neural networks with moves to train on.} \\ + \midrule + \textbf{Proposed attributes} \\ + %TODO: Explain why this is empty + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{loadGameTree()}: Reads a file and generates a GameMove tree + from its contents.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{Parser} \\ + \midrule + \textbf{Description} \\ + Reads SGF files and converts them to a tree of GameMove from the Game + System. \\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Read SGF files.} \\ + \tabitem{Convert the content of the SGF files to a tree of GameMove.} \\ + \midrule + \textbf{Proposed attributes} \\ + %TODO: Explain why this is empty + \midrule + \textbf{Proposed methods} \\ + %TODO: Explain why this is empty + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{tabular}{p{0.9\linewidth}} + \toprule + \textbf{ASTNode} \\ + \midrule + \textbf{Description} \\ + Makes up the tree resulting from the parsing of an SGF file.\\ + \midrule + \textbf{Responsibilities} \\ + \tabitem{Obtain a GameMove tree from itself and its children.} \\ + \midrule + \textbf{Proposed attributes} \\ + \tabitem{\textbf{children}: The nodes following from itself.} \\ + \tabitem{\textbf{props}: The properties of the tree read from an SGF file.} + \\ + \midrule + \textbf{Proposed methods} \\ + \tabitem{\textbf{toGameTree()}: Returns a GameMove tree corresponding to the + tree following from this node.} \\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\subsection{System Actors} + +There are various actors who will interact with the system, both human and +non-human. + +\begin{itemize} + + \item The human player who interacts with the playing interface. + \item The human user who interacts with the engine. + \item A GUI software which uses the engine to generate moves. + +\end{itemize} + +\subsection{Use Cases} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/useCases.png} + \caption{Use cases.} + \label{fig:useCases} + \end{center} +\end{figure} + +The different actors and use cases are represented on \fref{fig:useCases}. Each +use case is explained next. + +\paragraph{Play a match} + +The game interface reads the moves presented by the player and shows their +result on the board. + +\paragraph{Use as backend for machine player} + +The engine is used as the backend for generating moves for a machine player, +this is, for automated play, either against a human who is using the GUI or +against another machine player. + +\paragraph{Generate a move} + +The engine interface reads the input for generating a move as stated by the +GTP protocol and outputs the coordinates of the board to play. + +\subsection{Use Case Analysis and Scenarios} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.8\textwidth]{diagrams/useCase_playAMatch.png} + \caption{Use case: Play a match.} + \label{fig:useCase_playAMatch} + \end{center} +\end{figure} + +\begin{tabular}{lp{0.6\linewidth}} + \toprule + \multicolumn{2}{c}{\textbf{Play a match}} \\ + \midrule + \textbf{Preconditions} & The game interface has been started. \\ + \midrule + \textbf{Postconditions} & The program terminates after a match has been + played. \\ + \midrule + \textbf{Actors} & Human player \\ + \midrule + \textbf{Description} & + 1. The user enters the move to make.\newline + 2. The result of playing that move is outputted by the program.\newline + 3. Stop the program if the game has ended or go back to 1 if not. \\ + \midrule + \textbf{Secondary scenarios} & + \textbf{The move is illegal}: An error message is shown. Go back to step 1 of + main scenario. \\ + \midrule + \textbf{Exceptions} & + \textbf{The input is wrong}: An error message is shown. Go back to step 1 of + main scenario. \\ + \midrule + \textbf{Notes} & + This scenario does not pretend to be a complete recreation of a go match. It + will be playable, but its main purpose is to see the Game implementation in + action.\newline + A robustness diagram for this scenario is shown in + \fref{fig:useCase_playAMatch}.\\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/useCase_generateAMove.png} + \caption{Use case: Generate a move.} + \label{fig:useCase_generateAMove} + \end{center} +\end{figure} + +\begin{tabular}{lp{0.6\linewidth}} + \toprule + \multicolumn{2}{c}{\textbf{Generate a move}} \\ + \midrule + \textbf{Preconditions} & The game engine has been started. \newline + Optionally, some moves have already been played. \\ + \midrule + \textbf{Postconditions} & A move is suggested via the engine output. \\ + \midrule + \textbf{Actors} & Human user and GUI program. \\ + \midrule + \textbf{Description} & + 1. The user or program enters the player to generate the move + for.\newline + 2. The suggested move is outputted by the engine, either as + coordinates or as an indication to pass. + \\ + \midrule + \textbf{Secondary scenarios} & + \textbf{The move is illegal}: An error message is shown. Go back to step 1 of + main scenario. \\ + \midrule + \textbf{Exceptions} & + \textbf{The input is wrong}: An error message is shown. Go back to step 1 of + main scenario. \\ + \midrule + \textbf{Notes} & + A robustness diagram for this scenario is shown in + \fref{fig:useCase_generateAMove}.\\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/useCase_useAsBackend.png} + \caption{Use case: Use as backend for machine player.} + \label{fig:useCase_useAsBackend} + \end{center} +\end{figure} + +\begin{tabular}{lp{0.6\linewidth}} + \toprule + \multicolumn{2}{c}{\textbf{Use as backend for machine player}} \\ + \midrule + \textbf{Preconditions} & The game engine has been configured as engine for + the software. \\ + \midrule + \textbf{Postconditions} & A match has been played against the engine. \\ + \midrule + \textbf{Actors} & GUI program. \\ + \midrule + \textbf{Description} & + 1. The program gives commands to the engine to set up the game. The + specific commands will vary from program to program.\newline + 2. The program asks the engine for a move.\newline + 3. The engine suggest a move to the program.\newline + 4. The moves are shown by the program as if made by a player.\newline + 5. The opponent gives a move to the program.\newline + 6. Repeat from step 2 until the game ends. \\ + \midrule + \textbf{Secondary scenarios} & + ---\\ + \midrule + \textbf{Exceptions} & + ---\\ + \midrule + \textbf{Notes} & + A robustness diagram for this scenario is shown in + \fref{fig:useCase_useAsBackend}.\\ + \bottomrule +\end{tabular} + +\vspace{\interclassSpace} + +\subsection{Testing Plan Specification} + +\subsubsection{Unitary Testing} + +Tests for the python code are developed using the unittest\cite{python_unittest} +testing framework. It has been chosen by virtue of being thoroughly documented +and widely used. + +The coverage of unit testing is checked with Coverage.py\cite{python_coverage}, +which can by itself run the unittest tests and generate coverage reports based +on the results. + +% Maybe put an example report here? diff --git a/doc/tex/systemDesign.tex b/doc/tex/systemDesign.tex new file mode 100644 index 0000000..80a2ccb --- /dev/null +++ b/doc/tex/systemDesign.tex @@ -0,0 +1,324 @@ +\section{System Design} + +\subsection{Class Diagram} + +The full class diagram is shown in \fref{fig:fullClasses}. + +\begin{figure}[h] + \begin{center} + \makebox[0pt]{\begin{minipage}{1.2\textwidth} + \includegraphics[width=\textwidth]{diagrams/fullClasses.png} + \caption{Full class diagram.} + \label{fig:fullClasses} + \end{minipage}} + \end{center} +\end{figure} + +The design of each system of the diagram is explained after this section +together with diagrams for each subsystem, since the full class diagram can be +too big to be comfortably analyzed. + +\subsection{Game} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.55\textwidth]{diagrams/gameModule.png} + \caption{Design of the implementation of the game of Go.} + \label{fig:game} + A game is represented as a tree of moves. + \end{center} +\end{figure} + +A regular Go match is composed of a list of moves. But since game review and +variants exploration is an important part of Go learning, \program{} and most +playing and analysis existing programs allow for navigation back and forth +through the board states of a match and for new variants to be created from each +of these board states. Therefore, a match is represented as a tree of moves. The +GameMove class has the information about a specific move and also a reference to +the previous move and to a list of following moves, implementing this tree +structure and allowing for navigating both forward and backwards in the move +history. + +The state of the board at any given move must be stored so liberties, captures +count and legality of moves can be addressed, so it is represented with the +GameState class, which holds a reference to the current move. + +Moves depend on a representation of the game board to have access to its current +layout and count of captured stones. There are also many logic operations needed +to be performed on the board, such as getting the stones in a group, counting +their liberties or checking if a move is playable. The layout of the board and +these operations are implemented as the GameBoard class. + +A game can be started by the executable \texttt{go.py}. + +These classes and their relationships can be seen in \fref{fig:game}. + +\subsection{Engine} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.8\textwidth]{diagrams/engineModule.png} + \caption{Design of the GTP engine.}\label{fig:engine} + \end{center} +\end{figure} + +An implementation of GTP, that is, the piece of software which offers the GTP +interface to other applications. It is designed to be used by a software +controller but can also be directly run, mostly for debugging purposes. Its +design is shown in \fref{fig:engine}. The core of the engine is related with +three components, each with a separate responsibility: + +\begin{itemize} + + \item The ImagoIO component is the one imported from executables or other + applications and offers the text interface. It reads and processes input + and calls corresponding commands from the core of the engine. + + \item The GameEngine contains the logic of the commands available from the + IO component. It uses a GameState to keep a record of the game and uses + a DecisionAlgorithm to generate moves. + + \item The DecisionAlgorithm component is responsible of analyzing the match + and generate moves. The engine core uses it when a decision has to be + made by the AI, such as when a move needs to be generated by the engine. + +\end{itemize} + +Two implementations of DecisionAlgorithm have been made: one for the Monte +Carlo Tree Search algorithm (on the MCTS class) and the other for neural +networks (on the Keras class). + +The Keras class also makes use of the NeuralNetwork class, which offers +functions for creating, training, saving and using neural network models. The +designs of the network are implemented in the subclasses DenseNeuralNetwork and +ConvNeuralNetwork as examples of dense and convolutional networks, respectively. + +The engine can be started with the executable \texttt{imagocli.py}. + +\subsubsection{Monte Carlo Tree Search Explained} + +Monte Carlo Tree Search is an algorithm that can be useful for exploring +decision trees. It was used by AlphaGo in conjunction with neural networks as +explained in the AlphaGo 2016 paper\cite{natureAlphaGo2016}. + +The algorithm assigns a score to each explored node based on how likely the +player who makes the corresponding move is to win and updates this score on each +exploration cycle. + +The exploration of the tree has 4 steps: + +\begin{enumerate} + + \item \textbf{Selection}: The most promising move with unexplored children + is selected for exploration. Unexplored children are viable moves which + are not yet part of the tree. + + \item \textbf{Expansion}: An unexplored children of the selected move is + added to the tree. This children is selected at random. + + \item \textbf{Simulation}: The score of the new move is evaluated by playing + random matches from it. + + \item \textbf{Backpropagation}: The score of the new move, as well as its + previous moves up to the root of the tree, is updated based on the + results of the simulation. + +\end{enumerate} + +The suggested move is the children of the current move with the best score from +the perspective of the player which has to make the move. + +The implementation of the algorithm will use the existing GameMove class from +the Game System to access the game logic it needs, such as to get the possible +children from a node or to simulate random games. + +\subsubsection{Neural Networks Explained} + +A neural network is composed of nodes or ``neurons''. Each node contains a value +named weight. During execution the node receives a numeric input, multiplies it +for its weight and applies a function called activation function to the result. + +Nodes are organized in layers so that a network contains several layers and each +layer one or more neurons. The input to the network forms the input layer, which +contents are forwarded to the second layer. The second layer applies the weight +and activation function as discussed before in each of its nodes and the result +is forwarded to the next layer, and so on. At the end the last layer, called the +output layer, contains the result of the input evaluation. Each layer can have a +unique activation function for all its nodes and a different strategy of +connecting to the previous and next layers. + +Several kinds of layers have been used in this project: + +\begin{itemize} + + \item \textbf{Dense layers}, which connects each of its nodes to each of the + nodes of the previous layers. + + \item \textbf{Convolutional layers}, which process their input by applying + a filter function to adjacent values. In the case of this project, the + board is filtered by grouping its vertices in 3x3 squares. The aim of + these layers is to detect patterns in the input, such as curves, edges + or more complex shapes, so they are used a lot on neural networks + processing images. They are used in this project because a configuration + of the Go board is not that different from a two-dimensional image. + + \item \textbf{Max pooling layers}, which process their input in a similar + way to convolutional layers, but reducing the size of the input by + keeping the highest value in each of the groups they process. This + reduction accentuates the patterns detected by convolutional layers and + helps on the detection of bigger, more complex patterns. + + \item \textbf{Flatten layers}, which just change the shape of the input so + that all the values are on one dimension. + +\end{itemize} + +Combinations of these layers have been used to define two neural networks. + +First, a network using mainly dense layers as an example of a more general +purpose design of a network. + +Then, a network with convolutional and max pooling layers to compare the +approach used on image processing to the more general one and studying its +utility on the analysis of the Go board. + +These networks have been implemented on the DenseNeuralNetwork and +ConvNeuralNetwork classes, respectively. + +The networks have been designed to process boards of size 9x9, which is the +introductory size to the game. It is the easiest both for the hardware to handle +and for the analysis of results while keeping able to support meaningful +matches. + +Both networks have the same design for their input and output. + +Their input is a three-dimensional matrix of size 9x9x2 with values either 0 or +1. It represents two views of the board, one with ones as the stones of a player +and the other with ones as the stones of the other player. + +Their output is a vector with 82 elements of type float. Classification networks +typically use a vector of probabilities with one element for each class they are +trying to classify. Here the classes are the 81 positions of the 9x9 board and +the pass move, hence 82 total elements. Each element signifies the chance of +playing that move for the input board position, so the element with the highest +value represents the suggested move. + +\subsubsection{Dense Neural Network Design} + +\begin{listing}[h] + \inputminted{text}{listings/denseModel.txt} + \caption{Dense neural network model.} + \label{code:denseModel} +\end{listing} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.7\textwidth]{img/models/denseModel.png} + \caption{Dense neural network model.} + \label{fig:denseNN} + \end{center} +\end{figure} + +This network first uses two dense layers with 81 nodes each. This number has +been selected so each node can have as input each of the vertices of the board. +A flatten layer acts then to make the output one-dimensional, and a final dense +layer provides the vector containing the likelihood of each possible move. + +The design of this network is shown in \flist{code:denseModel} and +\fref{fig:denseNN} as provided by Keras' summary and plot\_model functions +respectively. + +\subsubsection{Convolutional Neural Network Design} + +\begin{listing}[h] + \inputminted{text}{listings/convModel.txt} + \caption{Convolutional neural network model.} + \label{code:convModel} +\end{listing} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=0.7\textwidth]{img/models/convModel.png} + \caption{Convolutional neural network model.} + \label{fig:convNN} + \end{center} +\end{figure} + +This network uses two pairs of convolutional and max pooling layers with the aim +of being trained to recognize patterns on the board. A flatten layer acts then +to make the output one-dimensional, and a final dense layer provides the vector +containing the likelihood of each possible move. + +The design of this network is shown in \flist{code:convModel} and +\fref{fig:convNN} as provided by Keras' summary and plot\_model functions +respectively. + +\subsection{Training} + +\begin{figure}[h] + \begin{center} + \includegraphics[width=\textwidth]{diagrams/trainingModule.png} + \caption{Components of the SGF file parsing module.} + \label{fig:trainingModule} + Components not showing a capital C are not classes, as in they not + follow the object-oriented paradigm and do not implement any classes, + only functions. + \end{center} +\end{figure} + +Neural networks can be powerful machine learning algorithms, but they have to be +trained first so they can provide meaningful results. For a Go AI it makes sense +to have its algorithms trained on Go games. There exists a common text format to +store Go games: SGF. If the system is able to process SGF files, it can provide +the games stored on them to the neural networks for training. And so the need +for an SGF parser arises. + +To parse SGF files a lexer and parser have been implemented using PLY.\@ The +result of the parsing is an AST (Annotated Syntax Tree) reflecting the contents +of the text input, each node with zero or more properties, and with the ability +to convert themselves and their corresponding subtree into a GameMove tree. This +is done for the root node, since from the SGF specification there are some +properties only usable in the root node, like those which specify general game +information and properties such as rank of players or komi. + +Here follows an explanation of the role and motivation before each component of +the Training module to show how these previous concerns have been addressed and +solved. These components are shown in \fref{fig:trainingModule}. + +\begin{itemize} + + \item \textbf{SGF}: Provides a high-level method to convert a path to a SGF + file to a GameMove tree. + + \item \textbf{sgfyacc}: The implementation of a SGF parser using PLY. Takes + the tokens generated by \textbf{sgflex} and creates an ASTNode tree from + them. + + \item \textbf{sgflex}: The implementation of a SGF lexer using PLY.\@ Takes + text input and generates the tokens of the SGF language from them. + + \item \textbf{ASTNode}: The AST resulting from the parsing of a SGF file. + Has a method to convert it to a tree of GameMove and so obtain the + contents of the SGF in the internal representation used by the project's + systems. + + \item \textbf{Property}: The representation of a property of an ASTNode + tree. Each property is made of a name and one or more values and this + class helps handling this specific situation. + +The training can be started with the executable \texttt{train.py}. + +\end{itemize} + +%\subsection{Modules} +% +%One module to store the state of the game and the game tree. One module to parse +%moves. One module to read and write SGF files. Modules are shown in +%\fref{fig:modules}. +% +%\begin{figure}[h] +% \begin{center} +% \includegraphics[width=\textwidth]{diagrams/modules.png} +% \caption{Modules.}\label{fig:modules} +% \end{center} +%\end{figure} diff --git a/doc/tex/tfg.tex b/doc/tex/tfg.tex deleted file mode 100644 index 9248748..0000000 --- a/doc/tex/tfg.tex +++ /dev/null @@ -1,60 +0,0 @@ -\documentclass{article} - -\usepackage{geometry} -\usepackage{graphicx} -\usepackage{booktabs} -\usepackage{hyperref} -\usepackage{csquotes} - -\usepackage[backend=biber, style=numeric-comp]{biblatex} -\addbibresource{/home/taamas/docs/biber.bib} - -\geometry{left=4.5cm,top=2cm,bottom=2cm,right=4.5cm} - -\hypersetup{colorlinks=true, - linkcolor=black, - filecolor=magenta, - urlcolor=black, - bookmarks=true -} - -\urlstyle{mono} - -%\renewcommand{\contentsname}{Contenidos} -%\renewcommand{\figurename}{Figura} - -\newcommand{\program}{Go-AI} - -\newcommand{\inputtex}[1]{\input{tex/#1}} -\newcommand{\fref}[1]{Fig.~\ref{#1}} -%\newcommand{\uurl}[1]{\underline{\url{#1}}} - -\begin{document} - -\frenchspacing - -\title{\program} - -\author{Íñigo Gutiérrez Fernández} - -\date{} - -\maketitle - -\begin{abstract} - This is the abstract. -\end{abstract} - -\tableofcontents - -\inputtex{introduction.tex} - -\inputtex{previousWorks.tex} - -\inputtex{interface.tex} - -\inputtex{implementation.tex} - -\printbibliography{} - -\end{document} |