-
Notifications
You must be signed in to change notification settings - Fork 7
/
run_matrix_game.py
46 lines (39 loc) · 1.17 KB
/
run_matrix_game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import matplotlib.pyplot as plt
from nash_q_learner import NashQLearner
from matrix_game import MatrixGame
if __name__ == '__main__':
nb_episode = 1000
agent1 = NashQLearner(
actions=np.arange(2))
agent2 = NashQLearner(
actions=np.arange(2))
game = MatrixGame()
pi1_history = []
pi2_history = []
for episode in range(nb_episode):
action1 = agent1.act()
action2 = agent2.act()
_, r1, r2 = game.step(action1, action2)
agent1.observe(
reward=r1,
reward_o=r2,
opponent_action=agent2.prev_action)
agent2.observe(
reward=r2,
reward_o=r1,
opponent_action=agent1.prev_action)
pi1 = agent1.get_pi()
pi2 = agent2.get_pi()
pi1_history.append(pi1[0])
pi2_history.append(pi2[0])
plt.plot(np.arange(len(pi1_history)),
pi1_history, label="agent1's pi(0)")
plt.plot(np.arange(len(pi2_history)),
pi2_history, label="agent2's pi(0)")
plt.xlabel("episode")
plt.ylabel("pi(0)")
plt.ylim(0, 1)
plt.legend()
plt.savefig("result.jpg")
plt.show()