Author | kera |
Submission date | 2017-07-19 05:39:23.276020 |
Rating | 7782 |
Matches played | 342 |
Win rate | 74.27 |
Use rpsrunner.py to play unranked matches on your computer.
def calcOutput(hist):
q = [0 for k in range(3)]
for i in range(0,3):
q[i] = 1*w[i][c*6];
for k in range(0,c*6):
q[i] = q[i] + hist[k] * w[i][k]
return q
def cvtHand(a):
if a == 0:
return 'R'
if a == 1:
return 'P'
if a == 2:
return 'S'
def cvtNum(a):
if a == 'R':
return 0
if a == 'P':
return 1
if a == 'S':
return 2
def calcHand(q):
max = -99999;
maxId = -1;
for i in range(0,3):
if max < q[i]:
max = q[i]
maxId = i
return cvtHand(maxId)
def calcNextHist(op):
nextHist = []
nextHist.extend(hist)
del nextHist[0:6]
oph = [0 for k in range(3)]
oph[cvtNum(op)] = 1
nextHist = nextHist+oph+[0 for k in range(3)]
return nextHist
def maxA(tmp):
ret = tmp[0]
for i in range(1,len(tmp)):
if ret < tmp[i]:
ret = tmp[i]
return ret
def calcTeacher(op):
teacher = [0 for k in range(3)]
nHist = calcNextHist(op)
winHandNum = (cvtNum(op)+1)%3
loseHandNum = (cvtNum(op)+2)%3
for i in range(0,3):
nHist[c*6-3+i] = 1
teacher[i] = g*maxA(calcOutput(nHist))
nHist[c*6-3+i] = 0
if winHandNum == i:
teacher[i] += 1
if loseHandNum == i:
teacher[i] -= 1
return teacher
def learn(op):
dw = 0.000001
idw = 1.0 / dw
teacher = calcTeacher(op)
out = calcOutput(hist)
for i in range(0,3):
oriError = pow(teacher[i] - out[i],2)/2
error = pow(teacher[i] - out[i]+dw,2)/2
w[i][c*6] += (error-oriError) * idw * r
for j in range(0, c*6):
w[i][j] += hist[j] * (error-oriError) * idw * r
def updateHist(op, my, h):
oph = [0 for k in range(3)]
myh = [0 for k in range(3)]
del h[0:6]
oph[cvtNum(op)] = 1
myh[cvtNum(my)] = 1
for i in range(0,3):
h.append(oph[i])
for i in range(0,3):
h.append(myh[i])
if input == '':
c = 7
g = 0.5
r = 1.0/(c*2+1) * 1.0
w = [[0 for k in range(c*6+1)]for j in range(3)]
hist = [0 for k in range(6*c)]
else:
learn(input)
updateHist(input, oldOutput, hist)
output = calcHand(calcOutput(hist))
oldOutput = output