Author | jlubea |
Submission date | 2019-11-15 19:27:40.466664 |
Rating | 7515 |
Matches played | 212 |
Win rate | 74.06 |
Use rpsrunner.py to play unranked matches on your computer.
# Hybrid of two strong players with a first attempt at a markov strategy switcher
#
if input == "":
import random
import math
import operator
from operator import add
from operator import truediv
from operator import mul
from collections import defaultdict
from itertools import combinations
rps = ['R', 'P', 'S']
beatBy = {'R':'P', 'P':'S', 'S':'R'}
values = { 'R': 0, 'P': 1, 'S': 2 }
chars = { 0: 'R', 1: 'P', 2: 'S' }
output = random.choice(rps)
def choice(arr, count, p):
result = [0] * count
accum = p[:]
for i in range(1, len(accum)):
accum[i] += accum[i - 1]
for c in range(count):
r = random.randrange(0, 100001) / 100000.0
for i in range(0, len(accum)):
if r <= accum[i]:
result[c] = arr[i]
break;
return result
# def flatten(l):
# flat_list = []
# for sublist in l:
# for item in sublist:
# flat_list.append(item)
# return flat_list
def normalize(counts):
c = counts[:]
total = sum(counts)
if total == 0:
c = [1] * len(c)
total = sum(c)
c = list(map(truediv, c, [total * 1.0] * len(c)))
return c
def rotate(l, by):
return l[by:] + l[:by]
def distance(l1, l2):
sum = 0.0
for i in range(len(l1)):
diff = l1[i] - l2[i]
diff2 = diff * diff
sum += diff2
return math.sqrt(sum)
def squash_to_bounds(counts, bounds, default):
maximum = max(counts)
minimum = min(counts)
rb = bounds[1] - bounds[0]
diff = float(maximum - minimum)
s = counts[:]
for i in range(len(s)):
s[i] = (default if (diff == 0) else (rb * (counts[i] - minimum)) / diff + bounds[0])
return s
class ScoreAccumulator:
def __init__(self, actions):
self.actions = actions
self.scores = [0] * len(self.actions)
self.scores_for_cond_hist = [0] * len(self.actions)
self.obs_count = 0
self.modified_count = 0
def update(self, me, opp):
self.obs_count += 1
if opp == me:
return
elif beatBy[opp] == me:
self.scores[values[me]] += 1
self.scores_for_cond_hist[values[beatBy[me]]] += 1
self.modified_count += 1
elif beatBy[me] == opp:
# self.scores[values[me]] -= 1
self.scores_for_cond_hist[values[beatBy[beatBy[me]]]] -= 1
self.modified_count += 1
# print "scores, obs ", self.scores, self.obs_count
def cond_hist(self):
conditional_probs = squash_to_bounds(self.scores_for_cond_hist, [-1.0, 1.0], 0)
# print "cond_hist ", conditional_probs, self.scores_for_cond_hist, self.scores
return conditional_probs
def wins_hist(self):
conditional_probs = squash_to_bounds(self.scores, [-1.0, 1.0], 0)
return conditional_probs
def select_max_action(self):
confidence = self.cond_hist()
# print "scores ", self.scores
# print "cond_hist ", self.cond_hist()
# print "confidence ", confidence
selected_action = random.choice([i for i,c in enumerate(confidence) if max(confidence) == c])
return self.actions[selected_action].evaluate()
def clone(self):
c = OppRewardCounter([0, 0, 0])
c.scores = self.scores[:]
c.obs_count = self.obs_count[:]
c.modified_count = self.modified_count[:]
return c
def __repr__(self):
# return self.cond_hist().__repr__()
return self.scores.__repr__()
class Rock:
def reset(self, actions):
return self
def evaluate(self):
return rps[0]
def learn(self, me, opp):
return
def clone(self):
return self
def cond_hist(self, me = None):
return [0, -1, 1]
class Paper:
def reset(self, actions):
return self
def evaluate(self):
return rps[1]
def learn(self, me, opp):
return
def clone(self):
return self
def cond_hist(self, me = None):
return [1, 0, -1]
class Scissors:
def reset(self, actions):
return self
def evaluate(self):
return rps[2]
def learn(self, me, opp):
return
def clone(self):
return self
def cond_hist(self, me = None):
return [-1, 1, 0]
class Freq:
def __init__(self, N, actions):
self.N = N
self.actions = actions
self.history = []
self.accumulators = {}
self.rewards = None
self.total_obs = 0
self.empty_acc = ScoreAccumulator(self.actions)
def evaluate(self):
if (len(self.history) >= self.N):
key = tuple(self.history[-self.N:])
if not (key in self.accumulators):
self.accumulators.update({key: ScoreAccumulator(self.actions)})
e = random.choice(self.actions)
else:
prediction = self.actions[values[self.accumulators[key].select_max_action()]]
e = self.actions[values[beatBy[beatBy[prediction.evaluate()]]]]
# print key, self.accumulators[key].cond_hist(), self.accumulators[key].scores, self.accumulators[key].scores_for_cond_hist
else:
e = random.choice(self.actions)
self.last_action = action = e.evaluate()
# print "playing action ", action
return action
def learn(self, me, opp):
# print "actual ", me, opp
self.rewards = None
self.history.append(opp)
if (len(self.history) > self.N):
key = tuple(self.history[-self.N-1:-1])
if not (key in self.accumulators):
self.accumulators.update({key: ScoreAccumulator(self.actions)})
self.total_obs += 1
# print me, opp
self.accumulators[key].update(me, opp)
# print self.accumulators
# print key, self.evaluate()
return
def clone(self):
c = Freq(self.N, self.actions)
c.history = self.history[:]
for key in self.accumulators.keys():
c.accumulators[key] = self.accumulators[key].clone()
c.total_obs = self.total_obs
return c
def cond_hist(self, me = None):
action = self.evaluate()
return self.actions[values[action]].cond_hist()
def __repr__(self):
return self.accumulators.__repr__()
class Bayes:
name = "Bayes13-Modified"
def __init__(self, lookback):
self.score = {'RR': 0, 'PP': 0, 'SS': 0, 'PR': 1, 'RS': 1, 'SP': 1,'RP': -1, 'SR': -1, 'PS': -1,}
self.cscore = {'RR': 'r', 'PP': 'r', 'SS': 'r', 'PR': 'b', 'RS': 'b', 'SP': 'b','RP': 'c', 'SR': 'c', 'PS': 'c',}
self.beat = {'P': 'S', 'S': 'R', 'R': 'P'}
self.cede = {'P': 'R', 'S': 'P', 'R': 'S'}
self.lookback = lookback
self.reset()
def reset(self):
self.played_probs = defaultdict(lambda: 1)
self.opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.my_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.both_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_both_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_both_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.both2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.win_probs = defaultdict(lambda: 1)
self.lose_probs = defaultdict(lambda: 1)
self.tie_probs = defaultdict(lambda: 1)
self.opp_answers = {'c': 1, 'b': 1, 'r': 1}
self.my_answers = {'c': 1, 'b': 1, 'r': 1}
self.opp2_answers = {'c': 1, 'b': 1, 'r': 1}
self.my2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_opp_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_my_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_my2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_opp_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_my_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_my2_answers = {'c': 1, 'b': 1, 'r': 1}
self.patterndict = defaultdict(str)
self.patterndict2 = defaultdict(str)
self.opppatterndict = defaultdict(str)
self.opppatterndict2 = defaultdict(str)
self.mypatterndict = defaultdict(str)
self.mypatterndict2 = defaultdict(str)
self.csu = [0] * 6 # consecutive strategy usage
self.csc = [] # consecutive strategy candidates
output = random.choice(["R", "P", "S"])
self.hist = ""
self.myhist = ""
self.opphist = ""
self.my = self.opp = self.my2 = self.opp2 = ""
self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""
self.sc = 0
self.opp_strats = []
def counter_prob(self, probs):
weighted_list = []
for h in ['R', 'P', 'S']:
weighted = 0
for p in probs.keys():
points = self.score[h+p]
prob = probs[p]
weighted += points * prob
weighted_list.append((h, weighted))
return max(weighted_list, key=operator.itemgetter(1))[0]
def learn(self, output, input):
self.previous_opp_strats = self.opp_strats[:]
self.previous_sc = self.sc
self.sc = self.score[output + input]
for i, c in enumerate(self.csc):
if c == input:
self.csu[i] += 1
else:
self.csu[i] = 0
m = max(self.csu)
self.opp_strats = [i for i, c in enumerate(self.csc) if self.csu[i] == m]
if self.previous_sc == 1:
for s1 in self.previous_opp_strats:
for s2 in self.opp_strats:
self.win_probs[chr(s1)+chr(s2)] += 1
if self.previous_sc == 0:
for s1 in self.previous_opp_strats:
for s2 in self.opp_strats:
self.tie_probs[chr(s1)+chr(s2)] += 1
if self.previous_sc == -1:
for s1 in self.previous_opp_strats:
for s2 in self.opp_strats:
self.lose_probs[chr(s1)+chr(s2)] += 1
if self.my and self.opp:
self.opp_answers[self.cscore[input+self.opp]] += 1
self.my_answers[self.cscore[input+self.my]] += 1
if self.my2 and self.opp2:
self.opp2_answers[self.cscore[input+self.opp2]] += 1
self.my2_answers[self.cscore[input+self.my2]] += 1
if self.singleopp_my and self.singleopp_opp:
self.singleopp_opp_answers[self.cscore[input+self.singleopp_opp]] += 1
self.singleopp_my_answers[self.cscore[input+self.singleopp_my]] += 1
if self.singleopp_my2 and self.singleopp_opp2:
self.singleopp_opp2_answers[self.cscore[input+self.singleopp_opp2]] += 1
self.singleopp_my2_answers[self.cscore[input+self.singleopp_my2]] += 1
if self.singlemy_my and self.singlemy_opp:
self.singlemy_opp_answers[self.cscore[input+self.singlemy_opp]] += 1
self.singlemy_my_answers[self.cscore[input+self.singlemy_my]] += 1
if self.singlemy_my2 and self.singlemy_opp2:
self.singlemy_opp2_answers[self.cscore[input+self.singlemy_opp2]] += 1
self.singlemy_my2_answers[self.cscore[input+self.singlemy_my2]] += 1
for length in range(min(self.lookback, len(self.hist)), 0, -2):
pattern = self.patterndict[self.hist[-length:]]
if pattern:
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
self.patterndict2[pattern[-length2:]] += output + input
self.patterndict[self.hist[-length:]] += output + input
# singleopp
for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
pattern = self.opppatterndict[self.opphist[-length:]]
if pattern:
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
self.opppatterndict2[pattern[-length2:]] += output + input
self.opppatterndict[self.opphist[-length:]] += output + input
# singlemy
for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
pattern = self.mypatterndict[self.myhist[-length:]]
if pattern:
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
self.mypatterndict2[pattern[-length2:]] += output + input
self.mypatterndict[self.myhist[-length:]] += output + input
self.played_probs[input] += 1
self.opp_probs[self.opp][input] += 1
self.my_probs[self.my][input] += 1
self.both_probs[self.my+self.opp][input] += 1
self.opp2_probs[self.opp2][input] += 1
self.my2_probs[self.my2][input] += 1
self.both2_probs[self.my2+self.opp2][input] += 1
self.hist += output + input
self.myhist += output
self.opphist += input
self.my = self.opp = self.my2 = self.opp2 = ""
self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""
for length in range(min(self.lookback, len(self.hist)), 0, -2):
pattern = self.patterndict[self.hist[-length:]]
if pattern != "":
self.my = pattern[-2]
self.opp = pattern[-1]
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
pattern2 = self.patterndict2[pattern[-length2:]]
if pattern2 != "":
self.my2 = pattern2[-2]
self.opp2 = pattern2[-1]
break
break
# singleopp
for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
pattern = self.opppatterndict[self.opphist[-length:]]
if pattern != "":
self.singleopp_my = pattern[-2]
self.singleopp_opp = pattern[-1]
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
pattern2 = self.opppatterndict2[pattern[-length2:]]
if pattern2 != "":
self.singleopp_my2 = pattern2[-2]
self.singleopp_opp2 = pattern2[-1]
break
break
# singlemy
for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
pattern = self.mypatterndict[self.myhist[-length:]]
if pattern != "":
self.singlemy_my = pattern[-2]
self.singlemy_opp = pattern[-1]
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
pattern2 = self.mypatterndict2[pattern[-length2:]]
if pattern2 != "":
self.singlemy_my2 = pattern2[-2]
self.singlemy_opp2 = pattern2[-1]
break
break
def evaluate(self):
probs = {}
for hand in rps:
probs[hand] = self.played_probs[hand]
if self.my and self.opp:
for hand in rps:
probs[hand] *= self.opp_probs[self.opp][hand] * self.my_probs[self.my][hand] * self.both_probs[self.my+self.opp][hand]
probs[hand] *= self.opp_answers[self.cscore[hand+self.opp]] * self.my_answers[self.cscore[hand+self.my]]
self.csc = [self.opp, self.beat[self.opp], self.cede[self.opp], self.my, self.cede[self.my], self.beat[self.my]]
strats_for_hand = {'R': [], 'P': [], 'S': []}
for i, c in enumerate(self.csc):
strats_for_hand[c].append(i)
if self.sc == 1:
pr = self.win_probs
if self.sc == 0:
pr = self.tie_probs
if self.sc == -1:
pr = self.lose_probs
for hand in rps:
for s1 in self.opp_strats:
for s2 in strats_for_hand[hand]:
probs[hand] *= pr[chr(s1)+chr(s2)]
else:
self.csc = []
if self.singleopp_my and self.singleopp_opp:
for hand in rps:
probs[hand] *= self.singleopp_opp_probs[self.singleopp_opp][hand] * \
self.singleopp_my_probs[self.singleopp_my][hand] * \
self.singleopp_both_probs[self.singleopp_my+self.singleopp_opp][hand]
probs[hand] *= self.singleopp_opp_answers[self.cscore[hand+self.singleopp_opp]] * self.singleopp_my_answers[self.cscore[hand+self.singleopp_my]]
if self.singlemy_my and self.singlemy_opp:
for hand in rps:
probs[hand] *= self.singlemy_opp_probs[self.singlemy_opp][hand] * \
self.singlemy_my_probs[self.singlemy_my][hand] * \
self.singlemy_both_probs[self.singlemy_my+self.singlemy_opp][hand]
probs[hand] *= self.singlemy_opp_answers[self.cscore[hand+self.singlemy_opp]] * self.singlemy_my_answers[self.cscore[hand+self.singlemy_my]]
if self.my2 and self.opp2:
for hand in rps:
probs[hand] *= self.opp2_probs[self.opp2][hand] * self.my2_probs[self.my2][hand] * self.both2_probs[self.my2+self.opp2][hand]
probs[hand] *= self.opp2_answers[self.cscore[hand+self.opp2]] * self.my2_answers[self.cscore[hand+self.my2]]
if self.singleopp_my2 and self.singleopp_opp2:
for hand in rps:
probs[hand] *= self.singleopp_opp2_probs[self.singleopp_opp2][hand] *\
self.singleopp_my2_probs[self.singleopp_my2][hand] *\
self.singleopp_both2_probs[self.singleopp_my2+self.singleopp_opp2][hand]
probs[hand] *= self.singleopp_opp2_answers[self.cscore[hand+self.singleopp_opp2]] * \
self.singleopp_my2_answers[self.cscore[hand+self.singleopp_my2]]
if self.singlemy_my2 and self.singlemy_opp2:
for hand in rps:
probs[hand] *= self.singlemy_opp2_probs[self.singlemy_opp2][hand] *\
self.singlemy_my2_probs[self.singlemy_my2][hand] *\
self.singlemy_both2_probs[self.singlemy_my2+self.singlemy_opp2][hand]
probs[hand] *= self.singlemy_opp2_answers[self.cscore[hand+self.singlemy_opp2]] * \
self.singlemy_my2_answers[self.cscore[hand+self.singlemy_my2]]
output = self.counter_prob(probs)
return output
class Bayes13Core:
def __init__(self):
self.model = Bayes(10)
self.payoff_accumulator = ScoreAccumulator(actions)
def evaluate(self):
return self.model.evaluate()
def learn(self, me, opp):
self.payoff_accumulator.update(me, opp)
self.model.learn(me, opp)
def clone(self):
# don't actually clone since it's expensive
return self
def cond_hist(self, me = None):
return self.payoff_accumulator.cond_hist()
class MarkovCore:
def __init__(self, max_markov_order):
self.debug = 0
self.USE_RANDOM = 1
self.USE_MARKOW = 1
self.LAST_ROUND = 1000
self.ROUND = 1
self.max_markov_order = max_markov_order
self.history = []
self.moves = ["R","P","S"]
self.beatedBy = {"R":"P", "P":"S", "S":"R"}
self.result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}
self.alpha = 0.01
self.M = 0
if self.USE_RANDOM == 1:
self.M += 1
if self.USE_MARKOW == 1:
self.markov_orders = []
for m in range(0, max_markov_order + 1):
self.markov_orders.append(m)
self.historyCount = {}
self.M += 6 * len(self.markov_orders)
self.weight = [1] * self.M
self.decay = [0.85] * self.M
self.score = [0] * self.M
self.selected = [0] * self.M
self.move = [random.choice(self.moves) for i in range(self.M)]
self.last = output
def selectBest(self, s):
return random.choice([i for i in range(len(s)) if max(s) == s[i]])
def selectBestDict(self, s):
ew = {i:s[self.beatedBy[self.beatedBy[i]]] - s[self.beatedBy[i]] for i in s.keys()};
return random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])
def learn(self, output, input):
if not input:
return
self.ROUND += 1
self.history += [(self.last,input)]
self.score = [ self.decay[i] * self.score[i] + self.weight[i] * self.result[self.move[i]][input] for i in range(self.M)]
self.weight = [ self.weight[i] + self.alpha * self.result[self.move[i]][input] for i in range(self.M)]
index = 0
# random optimal
if self.USE_RANDOM == 1:
self.move[index] = random.choice(self.moves)
# adjust random optimal score to zero
self.score[index] = 0
index += 1
first_meta_index = index
if self.USE_MARKOW == 1:
# markow with meta strategies
for m in self.markov_orders:
if len(self.history) > m:
key = tuple(self.history[-m-1:-1])
if not (key in self.historyCount):
self.historyCount[key] = [{"R":0,"P":0,"S":0},{"R":0,"P":0,"S":0}]
self.historyCount[key][0][self.history[-1][0]] += 1
self.historyCount[key][1][self.history[-1][1]] += 1
for m in self.markov_orders:
if len(self.history) >= m:
key = tuple(self.history[-m:])
if key in self.historyCount:
self.move[index] = self.selectBestDict(self.historyCount[key][0])
self.move[index+3] = self.selectBestDict(self.historyCount[key][1])
else:
self.move[index] = random.choice(self.moves)
self.move[index+3] = random.choice(self.moves)
else:
self.move[index] = random.choice(self.moves)
self.move[index+3] = random.choice(self.moves)
index += 6
# set other meta strategies
for i in range(first_meta_index, self.M, 3):
self.move[i+1] = self.beatedBy[self.move[i]]
self.move[i+2] = self.beatedBy[self.move[i+1]]
def evaluate(self):
best = self.selectBest(self.score)
self.selected[best] += 1
output = self.move[best]
self.last = output
if self.debug == 1:
if self.ROUND == self.LAST_ROUND:
print "score =", self.score
print "selected =", self.selected
return output
def clone(self):
return MarkovCore(self.max_markov_order)
class Beats:
def __init__(self, model, inner_name, depth = 0):
self.model = model
self.depth = depth
self.inner_name = inner_name
def evaluate(self):
predicted_action = self.model.evaluate()
action = beatBy[predicted_action]
return action
def learn(self, me, opp):
self.model.learn(me, opp)
def clone(self):
return Beats(self.model.clone(), self.inner_name, self.depth)
def cond_hist(self, me = None):
payoff = self.model.cond_hist()
payoff = rotate(payoff, 1)
return payoff
class Strategy:
def __init__(self, name, myIndex, strategy_universe, universe_size, actions, inner_model):
self.name = name
self.myIndex = myIndex
self.strategy_universe = strategy_universe
self.universe_size = universe_size
self.actions = actions
self.model = inner_model
self.beaten_by = None
def evaluate(self):
action = self.model.evaluate()
# print "pred ", predicted_action, " played ", action
return action
def learn(self, me, opp):
self.model.learn(me, opp)
def cond_hist(self, me = None):
return self.model.cond_hist()
def find_strategy(self, target_name):
for s in self.strategy_universe:
if s.name == target_name:
# print "found ", s.name
return s
return None
def find_beat_by(self):
target_name = "B(" + self.name + ")"
s = self.find_strategy(target_name)
if s:
return s
depth = 1
if (isinstance(self.model, Beats)):
depth = self.model.depth
s = Strategy(
"B(" + self.name + ")",
(self.myIndex + self.universe_size) % (self.universe_size * 3),
self.strategy_universe,
self.universe_size,
self.actions,
Beats(self.model.clone(), self.name, depth + 1)
)
# print "new ", s.name
return s
def get_beat_by(self):
if (isinstance(self.model, Beats)):
if (self.model.depth + 1) % len(self.actions) == 0:
return self.find_strategy(self.model.inner_name)
return self.find_beat_by()
def beatenBy(self):
if self.beaten_by:
return self.beaten_by
self.beaten_by = beaten_by = self.get_beat_by()
return beaten_by
def clone(self):
c = Strategy(self.name, self.myIndex, self.strategy_universe, self.universe_size, self.actions, self.model.clone())
return c
def __hash__(self):
# NOTE: in the online learning version the pd can change, so we can't use it in the hash result
# return self.pd_hash
return self.myIndex
def __eq__(self, other):
return self.myIndex == other.myIndex
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return self.name
class FreqStrategySelector:
def __init__(self, strategy_universe):
self.debug = 0
self.ROUND = 1
self.LAST_ROUND = 1000
self.strategy_universe = strategy_universe
self.N = len(self.strategy_universe)
self.last_action = [None] * self.N
self.score = [0] * self.N
for i in range(self.N):
scalar = (self.N-1 - i) / float(self.N)
self.score[i] = scalar * scalar * scalar
# print self.score
self.weight = [1] * self.N
self.decay = [0.85] * self.N
self.selected = [0] * self.N
# self.alpha = 0.005
self.alpha = 0.009
# print self.alpha
def learn(self, me, opp):
self.ROUND += 1
if self.last_action[0]:
# update scores and weights
self.score = [ self.decay[i] * self.score[i] + self.weight[i] * (1 if self.last_action[i] == beatBy[opp] else -1 if beatBy[self.last_action[i]] == opp else 0) for i in range(self.N)]
self.weight = [ self.weight[i] + self.alpha * (1 if self.last_action[i] == beatBy[opp] else -1 if beatBy[self.last_action[i]] == opp else 0) for i in range(self.N)]
for i,s in enumerate(self.strategy_universe):
s.learn(me, opp)
self.last_action[i] = s.evaluate()
def evaluate(self):
# # try pd
# pd = normalize(self.score)
# sel = choice([i for i in range(len(self.strategy_universe))], 1, pd)[0]
# try max
sel = random.choice([i for i in range(len(self.strategy_universe)) if max(self.score) == self.score[i]])
self.selected[sel] += 1
if self.debug == 1:
if self.ROUND == self.LAST_ROUND:
print "score ", self.score
print "weight ", self.weight
print "selected ", self.selected
return self.last_action[sel]
actions = [Rock(), Paper(), Scissors()]
for a in actions:
a.reset(actions)
strategy_universe = []
opponent_models = [
("M", MarkovCore(6)),
# ("B", Bayes13Core()),
("F10", Freq(10, actions)),
("F9", Freq(9, actions)),
("F8", Freq(8, actions)),
("F7", Freq(7, actions)),
("F6", Freq(6, actions)),
("F5", Freq(5, actions)),
("F4", Freq(4, actions)),
("F3", Freq(3, actions)),
("F2", Freq(2, actions)),
("F1", Freq(1, actions)),
]
universe_size = len(opponent_models) * 3
length = len(strategy_universe)
for i,model in enumerate(opponent_models):
strategy_universe.append(Strategy(model[0], i, None, universe_size, actions, model[1]))
length = len(strategy_universe)
for i,model in enumerate(opponent_models):
strategy_universe.append(Strategy("B(" + model[0] + ")", length + i, None, universe_size, actions, Beats(model[1].clone(), model[0], 1)))
length = len(strategy_universe)
for i,model in enumerate(opponent_models):
strategy_universe.append(Strategy("B(B(" + model[0] + "))", length + i, None, universe_size, actions, Beats(Beats(model[1].clone(), model[0], 1), model[0], 2)))
for s in strategy_universe:
s.strategy_universe = strategy_universe
strategy_set = FreqStrategySelector(strategy_universe)
else:
# print "actual ", input, output
strategy_set.learn(output, input)
output = strategy_set.evaluate()