Author | alainchau |
Submission date | 2018-06-13 03:21:00.963300 |
Rating | 5755 |
Matches played | 272 |
Win rate | 54.41 |
Use rpsrunner.py to play unranked matches on your computer.
# Idea: Use mixture of three strategies
# Strategy 1: Use mixture of kth order markov probabilities from HW 2 to make sequential predictions.
# Note: The following code has been adapted to avoid numpy since rpscontest doesn't allow it.
# This is good for learning patterns like
# RRRR...
# RPS RPS RPS ...
# Strategy 2: Counter the naive frequentist strategy. The opponent will probability keep track
# of how many times we used rock,paper,or scissors. If we tend to use scissors often, the opponent
# is more likely to pick rock and, anticipating this reaction, we should pick paper.
# Strategy 3: Random choice. Revert to this strategy if our other strategies don't work.
# This is to avoid getting dominated by superior bots.
if input == "":
import random, collections, math
def argmax(lst):
return lst.index(max(lst))
# Make prediction based on probability distribution defined by:
# probs = [p1, p2, p3] = [prob(Rock), prob(Paper), prob(Scissors)]
def get_pred(probs, thresh = 0.5):
# if one probability dominates others, choose that one
# ex. [0.1, 0.1, 0.8] -> just pick 3 (scissors)
if max(probs) > thresh:
return argmax(probs)
roll = random.random()
if roll <= probs[0]:
return 0 # rock
elif roll <= probs[0] + probs[1]:
return 1 # paper
else:
return 2 #scissors
# ===== HYPERPARAMETERS =====
alpha = 1/2 # KT
k = 2 # max order of markov
# ===========================
dct = {"R": 0, "P":1, "S":2}
lst = ["R", "P", "S"]
rps_counts = {"R": 0, "P":0, "S":0} #how many times have we used each
# weights for "experts". wts[0]=1.1 is a hack to favor markov strategy
n_experts = 2 + k
wts = [1] * (n_experts)
outputs = [0] * (n_experts)
firstround = True # dont adjust weights using results of first round
counter = [{} for i in range(0,k)] # key=state, value=counts
# random initial state
suffix = "".join(random.choice("RPS") for i in range(0, k))
for i in range(0,k):
suff = suffix[k-i:]
counter[i][suff] = counter[i].get(suff, [alpha] * 3)
# random
output = random.choice("RPS")
rps_counts[output] += 1
else:
# for system tests
try:
# weight the experts
if firstround:
firstround = False
else:
for i in range(0,n_experts):
if (dct[outputs[i]] - dct[input]) % 3 == 1: wts[i] += 1
# update probabilities
for i in range(0,k):
counter[i][suffix[k-i:]][dct[input]] += 1
suffix = suffix[1:] + input
for i in range(0,k):
suff = suffix[k-i:]
counter[i][suff] = counter[i].get(suff, [alpha] * 3)
# strategy 1
for i in range(0,k):
T = counter[i][suffix[k-i:]]
probs = [float(x)/sum(T) for x in T]
outputs[i+2] = lst[(get_pred(probs) + 1) % 3]
# strategy 2
mypred = max(rps_counts, key=rps_counts.get)
outputs[1] = lst[(dct[mypred] + 2) % 3]
# strategy 3
outputs[0] = random.choice("RPS")
output = outputs[argmax(wts)]
rps_counts[output] += 1
except:
output = random.choice("RPS")