-
Notifications
You must be signed in to change notification settings - Fork 0
/
Actor.py
43 lines (31 loc) · 1.51 KB
/
Actor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Input, GaussianNoise
from tensorflow.keras.utils import plot_model
import numpy as np
import tensorflow as tf
# Trieda hraca
class Actor:
def __init__(self, state_shape, action_shape, lr):
state_input = Input(shape=state_shape)
i = Dense(400, activation='relu', use_bias=True, kernel_initializer='he_uniform')(state_input)
i = Dense(300, activation='relu', use_bias=True, kernel_initializer='he_uniform')(i)
# vystupna vrstva -- musi byt tanh pre (-1,1) ako posledna vrstva!!!
output = Dense(action_shape[0], activation='tanh', use_bias=True, kernel_initializer='glorot_uniform')(i)
# Vytvor model
self.model = Model(inputs=state_input, outputs=output)
# Skompiluj model
self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
self.model.compile(optimizer=self.optimizer, loss='mse')
self.model.summary()
@tf.function
def train(self, X_train, criticNet):
with tf.GradientTape() as tape:
y_pred = self.model(X_train, training=True)
q_pred = criticNet([X_train, y_pred])
loss_a = -tf.reduce_mean(q_pred)
actor_grads = tape.gradient(loss_a, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(actor_grads, self.model.trainable_variables))
#print(actor_grads)
return loss_a
def save(self):
plot_model(self.model, to_file='model_A.png')