Author | Ethan |
Submission date | 2019-03-15 02:38:35.581692 |
Rating | 6603 |
Matches played | 254 |
Win rate | 65.35 |
Use rpsrunner.py to play unranked matches on your computer.
import random
moves = {"R": 0, "P": 1, "S": 2}
moves_inv = {0: "R", 1: "P", 2: "S"}
#(opponent, bot move) = reward
reward = {(0,0): 0, (0, 1): 1, (0,2): -1, (1, 0): -1, (1,1): 0, (1,2): 1, (2,0): 1, (2,1): -1, (2,2): 0}
def zero_array(shape):
result = []
for _ in range(0, shape[0]):
if len(shape) == 1:
result.append(0)
else:
result.append(zero_array(shape[1:]))
return result
class Bot:
def __init__(self, alpha, gamma, num_past_moves):
self.past_moves = []
#Q[a][b][c][d] ... Q for action d with last 3 moves a,b,c of opponent
self.Q = zero_array([3] * (num_past_moves + 1))
self.alpha = alpha
self.gamma = gamma
self.num_past_moves = num_past_moves
def next_move(self):
#Do random move for first couple of moves
if len(self.past_moves) < self.num_past_moves:
return moves_inv[random.randint(0, 2)]
#Pick action with best Q given past couple of moves of opponent
current_Q = self.Q
for prev_move in self.past_moves:
current_Q = current_Q[prev_move]
max_a = 0
ties = [max_a]
for a in range(0, 3):
if current_Q[a] > current_Q[max_a]:
max_a = a
ties = [max_a]
elif current_Q[a] == current_Q[max_a]:
ties.append(a)
#Pick random move in event of tie
return moves_inv[random.choice(ties)]
#Update takes the numerical version of the move ... not the string
def update(self, opponent_move, current_move):
if len(self.past_moves) == self.num_past_moves:
reward_outcome = reward[(opponent_move, current_move)]
#update Q
current_Q = self.Q
for prev_move in self.past_moves:
current_Q = current_Q[prev_move]
future_states = self.Q
for prev_move in self.past_moves[1:]:
future_states = future_states[prev_move]
possible_future_states = [max(future_states[0]),max(future_states[1]),max(future_states[2])]
avg_future_Q = sum(possible_future_states) / float(len(possible_future_states))
current_Q[current_move] = (1 - self.alpha) * current_Q[current_move] + self.alpha * (reward_outcome + self.gamma * avg_future_Q)
#Pop off first item so that this move will be added to list of past moves
self.past_moves.pop(0)
#update previous moves list
self.past_moves.append(opponent_move)
if input == "":
bot1 = Bot(0.5, 0.99, 5)
output = str(bot1.next_move())
else:
bot1.update(moves[str(input)], moves[str(output)])
output = str(bot1.next_move())