univprobs4_4

Authoralainchau
Submission date2018-06-13 03:21:24.402010
Rating4450
Matches played297
Win rate46.13

Use rpsrunner.py to play unranked matches on your computer.

Source code:

# Idea: Use mixture of three strategies

# Strategy 1: Use mixture of kth order markov probabilities from HW 2 to make sequential predictions.
# Note: The following code has been adapted to avoid numpy since rpscontest doesn't allow it.
# This is good for learning patterns like
# RRRR...
# RPS RPS RPS ...

# Strategy 2: Counter the naive frequentist strategy. The opponent will probability keep track
# of how many times we used rock,paper,or scissors. If we tend to use scissors often, the opponent
# is more likely to pick rock and, anticipating this reaction, we should pick paper.

# Strategy 3: Random choice. Revert to this strategy if our other strategies don't work.
# This is to avoid getting dominated by superior bots.
if input == "":
    import random, collections, math
    
    def argmax(lst):
        return lst.index(max(lst))
    
    # Make prediction based on probability distribution defined by:
    # probs = [p1, p2, p3] = [prob(Rock), prob(Paper), prob(Scissors)]
    def get_pred(probs, thresh = 0.5):
        # if one probability dominates others, choose that one
        # ex. [0.1, 0.1, 0.8] -> just pick 3 (scissors)
        if max(probs) > thresh:
            return argmax(probs)
        
        roll = random.random()
        if roll <= probs[0]:
            return 0 # rock
        elif roll <= probs[0] + probs[1]:
            return 1 # paper
        else:
            return 2 #scissors
    
    # ===== HYPERPARAMETERS =====
    alpha = 1/2 # KT
    k = 4    # max order of markov
    # ===========================
    
    dct        = {"R": 0, "P":1, "S":2}
    lst        = ["R", "P", "S"]
    rps_counts = {"R": 0, "P":0, "S":0} #how many times have we used each
    
    # weights for "experts". wts[0]=1.1 is a hack to favor markov strategy 
    n_experts = 2 + k
    wts     = [1] * (n_experts)
    outputs = [0] * (n_experts)
    firstround = True # dont adjust weights using results of first round
    
    
    counter = [{} for i in range(0,k)] # key=state, value=counts
    
    # random initial state
    suffix = "".join(random.choice("RPS") for i in range(0, k))
    for i in range(0,k):
        suff = suffix[k-i:]
        counter[i][suff] = counter[i].get(suff, [alpha] * 3)
    
    # random
    output = random.choice("RPS")
    rps_counts[output] += 1

else:
    # for system tests
    try:
        # weight the experts
        if firstround:
            firstround = False
        else:
            for i in range(0,n_experts):
                if (dct[outputs[i]] - dct[input]) % 3 == 1: wts[i] += 1    

        # update probabilities
        for i in range(0,k):
            counter[i][suffix[k-i:]][dct[input]] += 1
        suffix = suffix[1:] + input
        for i in range(0,k):
            suff = suffix[k-i:]
            counter[i][suff] = counter[i].get(suff, [alpha] * 3)

        # strategy 1
        for i in range(0,k):
            T = counter[i][suffix[k-i:]]
            probs = [float(x)/sum(T) for x in T]
            outputs[i+2] = lst[(get_pred(probs) + 1) % 3]

        # strategy 2
        mypred = max(rps_counts, key=rps_counts.get)
        outputs[1] = lst[(dct[mypred] + 2) % 3]

        # strategy 3
        outputs[0] = random.choice("RPS")

        output = outputs[argmax(wts)]
        rps_counts[output] += 1
    except:
        output = random.choice("RPS")