| Author | rlagent | 
| Submission date | 2013-01-21 14:09:42.620829 | 
| Rating | 4766 | 
| Matches played | 719 | 
| Win rate | 46.87 | 
Use rpsrunner.py to play unranked matches on your computer.
# a simple RL agent using SARSA to learn
#
# input is in input, output has to be in output
import random
states = {"R": 0, "P": 1, "S": 2, "start": 3}
actions = ["R", "P", "S"]
rewards = [[0.0, 1.0, -1.0],
           [-1.0, 0.0, 1.0],
           [-1.0, 1.0, 0.0],
           [0.0, 0.0, 0.0]]
   
def is_undefined(var):
    return not (var in vars() or var in globals())
class RLAgent():
    def __init__(self, states, actions, alpha=0.2, gamma=0.9,
                 epsilon=0.9):
        self.states = states
        self.actions = actions    
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.Q = [ [(1.0/len(actions)) for i in range(len(actions))] for j in
              range(len(states))]
        self.last_action = 0    
        self.last_state = states["start"]
        self.last_reward = 0.0  
    def get_action(self, state):
        if random.random() > self.epsilon:
            qs = self.Q[state]
            m = max(qs)
            midx = qs.index(m)
            return midx
        return random.randint(0, len(self.actions) - 1)
    def update_Q(self, state, action, reward):
        Q = self.Q
        Q[self.last_state][self.last_action] += \
                self.alpha * (self.last_reward + \
                              self.gamma * Q[state][action] - \
                              Q[self.last_state][self.last_action])
        self.last_reward = reward
        self.last_state = state
        self.last_action = action
def reward(state, action):
    return rewards[state][action]
              
if is_undefined('rlagent'):
    rlagent = RLAgent(states, actions,    
                      alpha=0.2, gamma=0.9,
                      epsilon=0.9)
  
if input == '':
    input = "start"
state = states[input]
a = rlagent.get_action(state)
r = reward(state, a)
rlagent.update_Q(state, a, r)
                
output = actions[a]