Author | rlagent |
Submission date | 2013-01-21 14:09:42.620829 |
Rating | 4766 |
Matches played | 719 |
Win rate | 46.87 |
Use rpsrunner.py to play unranked matches on your computer.
# a simple RL agent using SARSA to learn
#
# input is in input, output has to be in output
import random
states = {"R": 0, "P": 1, "S": 2, "start": 3}
actions = ["R", "P", "S"]
rewards = [[0.0, 1.0, -1.0],
[-1.0, 0.0, 1.0],
[-1.0, 1.0, 0.0],
[0.0, 0.0, 0.0]]
def is_undefined(var):
return not (var in vars() or var in globals())
class RLAgent():
def __init__(self, states, actions, alpha=0.2, gamma=0.9,
epsilon=0.9):
self.states = states
self.actions = actions
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon
self.Q = [ [(1.0/len(actions)) for i in range(len(actions))] for j in
range(len(states))]
self.last_action = 0
self.last_state = states["start"]
self.last_reward = 0.0
def get_action(self, state):
if random.random() > self.epsilon:
qs = self.Q[state]
m = max(qs)
midx = qs.index(m)
return midx
return random.randint(0, len(self.actions) - 1)
def update_Q(self, state, action, reward):
Q = self.Q
Q[self.last_state][self.last_action] += \
self.alpha * (self.last_reward + \
self.gamma * Q[state][action] - \
Q[self.last_state][self.last_action])
self.last_reward = reward
self.last_state = state
self.last_action = action
def reward(state, action):
return rewards[state][action]
if is_undefined('rlagent'):
rlagent = RLAgent(states, actions,
alpha=0.2, gamma=0.9,
epsilon=0.9)
if input == '':
input = "start"
state = states[input]
a = rlagent.get_action(state)
r = reward(state, a)
rlagent.update_Q(state, a, r)
output = actions[a]