zai_ghpm7all_fsa

Author	zdg
Submission date	2012-04-21 02:22:16.816997
Rating	6969
Matches played	831
Win rate	74.97
Use rpsrunner.py to play unranked matches on your computer.
Source code:

# greedy history pattern match
# greedy - use highest available order and most recent
# order 7
# use my hands, op hands, as well as both to predict next op hand
# switch using a fsa between all 6+6+6 = 18 ways of doing it
# update only the orders that are >= the order last used to make the prediction
#   - something from PPM compression

# --------------------- initialization -----------------------------
if not input:
	import random, collections
	
	# micro-optimizations
	rchoice = random.choice
	randint = random.randint
	uniform = random.uniform
	
	# global constants and maps
	R, P, S = 0, 1, 2
	RPS = [R, P, S]
	T, W, L = 0, 1, 2
	PAYOFFS = [T, W, L]
	
	tonum = {'R':R, 'P':P, 'S':S}
	tostr = {R:'R', P:'P', S:'S'}
	
	scoreround = [[T, L, W], [W, T, L], [L, W, T]]
	frompayoff = [[R, P, S], [P, S, R], [S, R, P]]
	ties, beats, loses = frompayoff[T], frompayoff[W], frompayoff[L]
	
	# more specific variables
	topoints = [0.0, 1.0, -1.0]
	
	encode1hand = [1,2,3]
	decode1hand = [None, R, P, S]
	
	encode2hands = [[1,2,3], [4,5,6], [7,8,9]]
	decode2hands = [None,(R,R),(R,P),(R,S),(P,R),(P,P),(P,S),(S,R),(S,P),(S,S)]
	
	ORDER = 7
	power3 = [0] + [3 ** i for i in xrange(ORDER)]
	power9 = [0] + [9 ** i for i in xrange(ORDER)]
	
	my_history = collections.defaultdict(lambda: None)
	op_history = collections.defaultdict(lambda: None)
	both_history = collections.defaultdict(lambda: None)
	
	my_last_used_order = 0
	op_last_used_order = 0
	both_last_used_order = 0
	
	# DECAY = 1.0 # no decay seems to be better
	STRATEGIES = 18
	# 0-2 is my my, 3-5 is my op, 6-8 is both op my, 9-11 is op op, 12-14 is both my, 15-17 is both op
	next_hands = [None for _ in xrange(STRATEGIES)]
	transitions = [[0] * STRATEGIES for _ in xrange(STRATEGIES)]
	
	next_state = randint(0, STRATEGIES-1)
	
	# bookkeeping
	my_hands_encoded = []
	op_hands_encoded = []
	both_hands_encoded = []
	hands_played = 0
	
	# first hand
	output = tostr[rchoice(RPS)]
# --------------------- turn -----------------------------
else:
	# bookkeeping
	last_hand_index = hands_played
	hands_played += 1
	
	my_last_hand = tonum[output]
	my_last_hand_encoded = encode1hand[my_last_hand]
	
	op_last_hand = tonum[input]
	op_last_hand_encoded = encode1hand[op_last_hand]
	
	both_last_hand_encoded = encode2hands[my_last_hand][op_last_hand]
	
	my_hands_encoded.append(my_last_hand_encoded)
	op_hands_encoded.append(op_last_hand_encoded)
	both_hands_encoded.append(both_last_hand_encoded)
	
	# update the transitions if played at least 2 hands
	if hands_played > 1:
		last_transition = transitions[last_state]
		for i in xrange(STRATEGIES):
			# last_transition[i] *= DECAY
			last_transition[i] += topoints[scoreround[next_hands[i]][op_last_hand]]
			if last_transition[i] < 0.0:
				last_transition[i] = 0.0
	
	# update the history, order 0 as a special case
	my_update_index = 0
	op_update_index = 0
	both_update_index = 0
	if my_last_used_order == 0:
		my_history[0] = last_hand_index
	if op_last_used_order == 0:
		op_history[0] = last_hand_index
	if both_last_used_order == 0:
		both_history[0] = last_hand_index
	
	# start the prediction with the zeroth order
	my_predict = my_history[0]
	op_predict = op_history[0]
	both_predict = both_history[0]
	
	# update the higher orders and predict the next hand
	for order in xrange(1, ORDER+1 if hands_played > ORDER else hands_played):
		my_predict_index = my_update_index * 3 + my_last_hand_encoded
		op_predict_index = op_update_index * 3 + op_last_hand_encoded
		both_predict_index = both_update_index * 9 + both_last_hand_encoded
		
		my_update_index += my_hands_encoded[-order-1] * power3[order]
		op_update_index += op_hands_encoded[-order-1] * power3[order]
		both_update_index += both_hands_encoded[-order-1] * power9[order]
		
		if order >= my_last_used_order:
			my_history[my_update_index] = last_hand_index
		if order >= op_last_used_order:
			op_history[op_update_index] = last_hand_index
		if order >= both_last_used_order:
			both_history[both_update_index] = last_hand_index
		
		my_try_get = my_history[my_predict_index]
		op_try_get = op_history[op_predict_index]
		both_try_get = both_history[both_predict_index]
		
		if my_try_get is not None:
			my_predict = my_try_get
			my_last_used_order = order
		if op_try_get is not None:
			op_predict = op_try_get
			op_last_used_order = order
		if both_try_get is not None:
			both_predict = both_try_get
			both_last_used_order = order
	
	my_base_prediction = decode2hands[both_hands_encoded[my_predict]]
	op_base_prediction = decode2hands[both_hands_encoded[op_predict]]
	both_base_prediction = decode2hands[both_hands_encoded[both_predict]]
	
	# update each strategy's prediction
	for f in PAYOFFS:
		next_hands[f] = frompayoff[f][my_base_prediction[0]]
		next_hands[f+3] = frompayoff[f][my_base_prediction[1]]
		next_hands[f+6] = frompayoff[f][op_base_prediction[0]]
		next_hands[f+9] = frompayoff[f][op_base_prediction[1]]
		next_hands[f+12] = frompayoff[f][both_base_prediction[0]]
		next_hands[f+15] = frompayoff[f][both_base_prediction[1]]
	
	# pick the next state
	last_payoff = scoreround[my_last_hand][op_last_hand]
	last_state = next_state
	
	current_transition = transitions[last_state]
	next_state = randint(0, STRATEGIES-1)
	
	total = sum(current_transition)
	u = uniform(0.0, total)
	acc = 0.0
	for i in xrange(STRATEGIES):
		acc += current_transition[i]
		if u < acc:
			next_state = i
			break
	
	# play the next hand
	output = tostr[next_hands[next_state]]