-
Notifications
You must be signed in to change notification settings - Fork 0
/
Double_DQN_for_Gym_LunarLander.py
68 lines (50 loc) · 2.01 KB
/
Double_DQN_for_Gym_LunarLander.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
env = gym.make('LunarLander-v2')
agent=Agent(stateShape=env.observation_space.shape[0],actionShape=env.action_space.n\
,exploreRate=1.0,exploreRateDecay=0.9995,minimumExploreRate=0.01,gamma=0.99,copyNetsCycle=100)
# train loop for 200 diffrent episodeds
# to load pretrained model to continue training uncomment the following line (the pretrained wheights are included in the github directory "pretrained_model")
# agent.loadModel("DoubleDQN_LunarLanderV2.h")
averageRewards=[]
totalRewards=[]
for i in range(1,200):
done = False
truncuated = False
state,_=env.reset()
rewards=0
while (not done) and (not truncuated):
action=agent.getAction(state)
nextState,reward,done,truncuated,_=env.step(action)
agent.memory.save(state,action,reward,nextState,int(done))
rewards+=reward
state=nextState
agent.learn(batchSize=64)
totalRewards.append(rewards)
averageRewards.append(sum(totalRewards)/len(totalRewards))
print(f"episode: {i+1} reward: {rewards} avg so far:{averageRewards[-1]} exploreRate:{agent.exploreRate}")
plt.title(f'Total Rewards')
# plt.yscale('symlog')
plt.plot(totalRewards)
plt.savefig("Total Rewards",dpi=200)
plt.clf()
plt.title(f'Average Rewards')
# plt.yscale('symlog')
plt.plot(averageRewards)
plt.savefig("Average Rewards",dpi=200)
# test loop for 10 diffrent episodeds
# to load pretrained model for evaluation uncomment the following line (the pretrained wheights are included in the github directory "pretrained_model")
# agent.loadModel("DoubleDQN_LunarLanderV2.h")
sum_evaluation_rewards=0
for i in range(0,20):
done = False
truncuated = False
state,_=env.reset()
rewards=0
while (not done) and (not truncuated):
action=agent.getAction(state,True)
nextState,reward,done,truncuated,_=env.step(action)
rewards+=reward
state=nextState
# no learning we want to test the model
print(f" Test episode: {i+1} gained reward: {rewards}")
sum_evaluation_rewards+=rewards
print(f"average of evaluation rewards: {sum_evaluation_rewards/20}")