qn2

Authorkera
Submission date2018-07-11 05:33:14.918150
Rating5829
Matches played290
Win rate52.41

Use rpsrunner.py to play unranked matches on your computer.

Source code:

import math
import sys
import random

def calcOutput(hist):
  #wo1 = [[0 for k in range(c*6*m)]for j in range(2)]
  for i in range(0,c*6*m):
    wo1[0][i] = 1*w1[i][c*6]
    for k in range(0,c*6):
      wo1[0][i] = wo1[0][i] + hist[k] * w1[i][k]
  for i in range(0,c*6*m):
    if wo1[0][i] > 0:
      wo1[1][i] = wo1[0][i]
    else:
      wo1[1][i] = 0

  q2 = [0 for k in range(3)]
  for i in range(0,3):
    q2[i] = 1*w2[i][c*6*m]
    for k in range(0,c*6*m):
      q2[i] = q2[i] + wo1[1][k] * w2[i][k]
 # print(wo1[0])
  #print(wo1[1])
  #print(w2)
  return q2

def cvtHand(a):
  if a == 0:
    return 'R'
  if a == 1:
    return 'P'
  if a == 2:
    return 'S'

def cvtNum(a):
  if a == 'R':
    return 0
  if a == 'P':
    return 1
  if a == 'S':
    return 2

def calcHand(q):
  max = q[0]
  maxId = 0
  for i in range(0,3):
    if max < q[i]:
      max = q[i]
      maxId = i
  return cvtHand(maxId)

def calcNextHist(op):
  nextHist = []
  nextHist.extend(hist)
  del nextHist[0:6]
  oph = [0 for k in range(3)]
  oph[cvtNum(op)] = 1
  nextHist = nextHist+oph+[0 for k in range(3)]
  return nextHist

def maxA(tmp):
  ret = tmp[0]
  for i in range(1,len(tmp)):
    if ret < tmp[i]:
      ret = tmp[i]
  return ret

def calcTeacher(op):
  teacher = [0 for k in range(3)]
  nHist = calcNextHist(op)
  winHandNum = (cvtNum(op)+1)%3
  loseHandNum = (cvtNum(op)+2)%3
  for i in range(0,3):
    nHist[c*6-3+i] = 1
    o = maxA(calcOutput(nHist))
    nHist[c*6-3+i] = 0
    r = 0
    if winHandNum == i:
      r=1

    if loseHandNum == i:
      r=-1
    teacher[i] = r*(1.0-g) + g*o
  return teacher

def learn(op):
  gradSum = [0 for k in range(c*6*m+1)]
  teacher = calcTeacher(op)
  #print "-----"
  out = calcOutput(hist)
  #print(hist)
 # print(teacher)
#
 # print(out)
 # print(wo1[1])
  for i in range(0,3):
    grad = out[i] - teacher[i]
    w2[i][c*6*m] -= grad * 1 * r
    for j in range(0, c*6*m):
        gradSum[j] += grad * w2[i][j]
        w2[i][j] -= grad * wo1[1][j] * r


  for i in range(0,c*6*m):
    tmp = 0
    if wo1[1][i] > 0:
      tmp = 1
    grad = tmp * gradSum[i]

    w1[i][c*6] -= grad * 1 * r
    for j in range(0, c*6):
        w1[i][j] -= grad * hist[j] * r

  out = calcOutput(hist)
 # print(out)
  #print "-----"

def updateHist(op, my, h):
  oph = [0 for k in range(3)]
  myh = [0 for k in range(3)]
  del h[0:6]
  oph[cvtNum(op)] = 1
  myh[cvtNum(my)] = 1
  for i in range(0,3):
    h.append(oph[i])
  for i in range(0,3):
    h.append(myh[i])


if input == '':
  c = 1
  g = 0.9
  m = 1
  r = 0.45
  w1 = [[0 for k in range(c*6+1)] for j in range(c*6*m+1)]
  w2 = [[0 for k in range(c*6*m+1)]for j in range(3)]
  wo1 = [[0 for k in range(c*6*m+1)]for j in range(2)]

  wo2 = [[0 for k in range(3)]for j in range(2)]
  hist = [0 for k in range(6*c)]

  for kk in range(c*6*m+1):
    for kkk in range(c*6+1):
      w1[kk][kkk] = random.gauss(0,math.sqrt(2.0/(c*2+1)/3.0))
      #print(random.gauss(0,math.sqrt(2.0/(c*2+1))))
  #print(w1)
else:
 # input = raw_input()
  learn(input)
  updateHist(input, oldOutput, hist)
 # print
 # print(hist)
  #print(a)

output = calcHand(calcOutput(hist))
oldOutput = output