ctw_rps

Author	R Daneel Olivaw
Submission date	2018-06-08 18:48:19.734010
Rating	2552
Matches played	292
Win rate	21.92
Use rpsrunner.py to play unranked matches on your computer.
Source code:

import math
import random

class CTNode:
    
    def __init__(self, depth):
        self.counters = None
        self.children = {}
        self.depth = depth
        self.beta = 1.0
        self.n = 0.0
        self.prob = None

def array_add(a, b):
    return [x + y for x,y in zip(a,b)]

def scalar_array_mult(s, a):
    return [s*x for x in a]

def scalar_array_add(s, a):
    return [s+x for x in a]

class ContextTree:

    def __init__(self, char_to_index, base_pref='', max_depth=5):
        self.num_abc = len(char_to_index)
        self.char_to_index = char_to_index
        self.max_depth = max_depth
        self.curr_prob_array = [0]*self.num_abc
        self.base_pref = base_pref
        self.pref_string = base_pref
        self.q = len(char_to_index) / float(2)

        self.root = CTNode(0)
        self.root.counters = {char: 0 for char in char_to_index}

    def update_prob(self, curr_char):
        curr_context = self.pref_string[-self.max_depth:] if self.max_depth != 0 else curr_char
        self.curr_prob_array = self.compute_prob_recursive(curr_context, self.root)
        self.update_beta(curr_char, curr_context, self.root)
        self.update_counters(curr_char, curr_context, self.root)
        self.pref_string += curr_char

    def compute_prob_recursive(self, context, root):
        kt = [0]*len(self.char_to_index)
        for char,j in self.char_to_index.items():
            kt[j] = (root.counters[char] + 0.5) / float(root.n + self.q)
        if root.depth == self.max_depth:
            root.prob = kt
            return kt
        
        next_char = context[-1]
        
        # if the context does not exist yet - create it and enter
        if next_char not in root.children:
            N = CTNode(root.depth+1)
            N.counters = {char: 0 for char in self.char_to_index}
            N.parent = root
            root.children[next_char] = N    
        
        next_root = root.children[next_char]
        recursive_prob = self.compute_prob_recursive(context[:-1], next_root)
        curr_node_prob = scalar_array_mult(math.pow(float(root.beta + 1), -1),
            array_add(scalar_array_mult(root.beta, kt), recursive_prob))
        root.prob = curr_node_prob
        
        return curr_node_prob

    def update_beta(self, char, context, root):
        kt = (root.counters[char] + 0.5) / float(root.n + self.q)
        if len(root.children) == 0:
            return
        
        next_char = context[-1]
        next_root = root.children[next_char]
        ix = self.char_to_index[char]
        recursive_prob = next_root.prob[ix]
        root.beta = root.beta * (kt/float(recursive_prob))
        self.update_beta(char, context[:-1], next_root)

    def update_counters(self, char, context, root):
        root.counters[char] += 1
        root.n += 1
        if len(root.children) == 0:
            return
        next_char = context[-1]
        next_root = root.children[next_char]
        self.update_counters(char, context[:-1], next_root)

class RPSPlayer:

    def __init__(self, initial_state='RRPRS', use_noise=True):
        self.char_to_index = {'R': 0, 'P': 1, 'S': 2}
        self.tree = ContextTree(self.char_to_index, initial_state)
        self.counter = 1
        self.use_noise = use_noise

    def predict_move(self):
        if self.use_noise:
            val = 1/math.sqrt(self.counter)
            noise = [random.uniform(-val, val) for x in range(len(self.char_to_index))]
        else:
            noise = [0]*len(self.char_to_index)
        probabilities = array_add(self.tree.curr_prob_array, noise)
        predicted_state = filter(
            lambda x: x[1] == self._argmax(probabilities), 
            self.char_to_index.items())[0][0]
        return predicted_state

    def update_prob(self, char):
        self.tree.update_prob(char)
        self.counter += 1

    @staticmethod
    def _argmax(array):
        return sorted(zip(array, range(len(array))))[0][1]

if __name__=='__main__':
    if input == '':
        initial_state = ''.join([random.choice(['R','P','S']) for x in range(5)])
        RPS = RPSPlayer(initial_state)
    else:
        RPS.update_prob(input)
    output = RPS.predict_move()