Author | kera |
Submission date | 2018-07-11 05:33:14.918150 |
Rating | 5829 |
Matches played | 290 |
Win rate | 52.41 |
Use rpsrunner.py to play unranked matches on your computer.
import math
import sys
import random
def calcOutput(hist):
#wo1 = [[0 for k in range(c*6*m)]for j in range(2)]
for i in range(0,c*6*m):
wo1[0][i] = 1*w1[i][c*6]
for k in range(0,c*6):
wo1[0][i] = wo1[0][i] + hist[k] * w1[i][k]
for i in range(0,c*6*m):
if wo1[0][i] > 0:
wo1[1][i] = wo1[0][i]
else:
wo1[1][i] = 0
q2 = [0 for k in range(3)]
for i in range(0,3):
q2[i] = 1*w2[i][c*6*m]
for k in range(0,c*6*m):
q2[i] = q2[i] + wo1[1][k] * w2[i][k]
# print(wo1[0])
#print(wo1[1])
#print(w2)
return q2
def cvtHand(a):
if a == 0:
return 'R'
if a == 1:
return 'P'
if a == 2:
return 'S'
def cvtNum(a):
if a == 'R':
return 0
if a == 'P':
return 1
if a == 'S':
return 2
def calcHand(q):
max = q[0]
maxId = 0
for i in range(0,3):
if max < q[i]:
max = q[i]
maxId = i
return cvtHand(maxId)
def calcNextHist(op):
nextHist = []
nextHist.extend(hist)
del nextHist[0:6]
oph = [0 for k in range(3)]
oph[cvtNum(op)] = 1
nextHist = nextHist+oph+[0 for k in range(3)]
return nextHist
def maxA(tmp):
ret = tmp[0]
for i in range(1,len(tmp)):
if ret < tmp[i]:
ret = tmp[i]
return ret
def calcTeacher(op):
teacher = [0 for k in range(3)]
nHist = calcNextHist(op)
winHandNum = (cvtNum(op)+1)%3
loseHandNum = (cvtNum(op)+2)%3
for i in range(0,3):
nHist[c*6-3+i] = 1
o = maxA(calcOutput(nHist))
nHist[c*6-3+i] = 0
r = 0
if winHandNum == i:
r=1
if loseHandNum == i:
r=-1
teacher[i] = r*(1.0-g) + g*o
return teacher
def learn(op):
gradSum = [0 for k in range(c*6*m+1)]
teacher = calcTeacher(op)
#print "-----"
out = calcOutput(hist)
#print(hist)
# print(teacher)
#
# print(out)
# print(wo1[1])
for i in range(0,3):
grad = out[i] - teacher[i]
w2[i][c*6*m] -= grad * 1 * r
for j in range(0, c*6*m):
gradSum[j] += grad * w2[i][j]
w2[i][j] -= grad * wo1[1][j] * r
for i in range(0,c*6*m):
tmp = 0
if wo1[1][i] > 0:
tmp = 1
grad = tmp * gradSum[i]
w1[i][c*6] -= grad * 1 * r
for j in range(0, c*6):
w1[i][j] -= grad * hist[j] * r
out = calcOutput(hist)
# print(out)
#print "-----"
def updateHist(op, my, h):
oph = [0 for k in range(3)]
myh = [0 for k in range(3)]
del h[0:6]
oph[cvtNum(op)] = 1
myh[cvtNum(my)] = 1
for i in range(0,3):
h.append(oph[i])
for i in range(0,3):
h.append(myh[i])
if input == '':
c = 1
g = 0.9
m = 1
r = 0.45
w1 = [[0 for k in range(c*6+1)] for j in range(c*6*m+1)]
w2 = [[0 for k in range(c*6*m+1)]for j in range(3)]
wo1 = [[0 for k in range(c*6*m+1)]for j in range(2)]
wo2 = [[0 for k in range(3)]for j in range(2)]
hist = [0 for k in range(6*c)]
for kk in range(c*6*m+1):
for kkk in range(c*6+1):
w1[kk][kkk] = random.gauss(0,math.sqrt(2.0/(c*2+1)/3.0))
#print(random.gauss(0,math.sqrt(2.0/(c*2+1))))
#print(w1)
else:
# input = raw_input()
learn(input)
updateHist(input, oldOutput, hist)
# print
# print(hist)
#print(a)
output = calcHand(calcOutput(hist))
oldOutput = output