I am simulating epsilon-greedy algorithm in bandit problem with 3 arm and bernolli return. After doing the experiment, I want to draw the return for each arm, that is, if one arm is chosen at each time, the value it takes against the corresponding time will be its return, and for the rest 2 arms, the value will be set to -1. Now I would like to plot the return of one arm against the time slot.(The value will take on -1 or 1 or 0)
import matplotlib.pyplot as plt
import random
from scipy import stats
class greedy():
def __init__(self,epsilon,n):
self.epsilon=epsilon
self.n=n
self.value=[0,0,0]#estimator
self.count=[0,0,0]
self.prob=[0.4,0.6,0.8]
self.greedy_reward=[[0 for x in range(10000)] for y in range(3)]
def exploration(self,i):
max_index=np.random.choice([0,1,2])
r=np.random.choice([0,1],p=(1-self.prob[max_index],self.prob[max_index]))#do experiment, return r
self.count[max_index]+=1
for time in range(3):
self.greedy_reward[time][i]=-1
self.greedy_reward[max_index][i]=r
self.value[max_index]=self.value[max_index]+(1/self.count[max_index])*(r-self.value[max_index])
def exploitation(self,i):
max_index=self.value.index(max(self.value))
r=np.random.choice([0,1],p=(1-self.prob[max_index],self.prob[max_index]))
self.count[max_index]+=1
for time in range(3):
self.greedy_reward[time][i]=-1
self.greedy_reward[max_index][i]=r
self.value[max_index]=self.value[max_index]+(1/self.count[max_index])*(r-self.value[max_index])
def EE_choice(self,i):
output=np.random.choice(# o is exploitation,1 is exploration
[0,1],
p=[1-self.epsilon,self.epsilon]
)
if output==1:
self.exploration(i);
else:
self.exploitation(i);
def exp(self):
for i in range(0,self.n):
Then, we take out the return for one arm, for example, arm3.
import matplotlib.pyplot as plt
x=[i for i in range(1,10001)]
arm_3_y=[0 for i in range(10000)]
for j in range(10000):
arm_3_y[j]=greedy_1.greedy_reward[2][j]
plt.scatter(x,arm_3_y,marker='o')
plt.ylim([-1,1])
plt.show()
As we can see, all points in one vertical line overlap together, is there any way could avoid this?
