-
Notifications
You must be signed in to change notification settings - Fork 24
/
sell_order_agent.py
70 lines (56 loc) · 2.57 KB
/
sell_order_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import math
from agent import Agent
from model import OrderModel
class SellOrderAgent(Agent):
state = None # save the state to be trained
action = None # save the action needed to pass to fit method
def __init__(self, environment):
super().__init__(environment)
# technical indicator 4*8
self.model = OrderModel(7, [32], 50, str(self.__class__.__name__))
def process_action(self, action, date):
# sell order agent consider state on T-1, and place order on T day
market_data = self.environment.get_market_data_by_date(date)
if market_data is None:
# terminated
return False
ma5 = market_data['ma5']
high = market_data['High']
close = market_data['Close']
if ma5 is None or high is None:
# terminate
return False
sp = ma5 + action / 100 * ma5
d = sp - high
if d > 0:
sp = close
if not self.environment.get_evaluation_mode():
self.fit_all_actions(ma5, high)
else:
profit = (1 - self.environment.transaction_cost) * sp - self.environment.get_buy_price()
pf_return = ((
1 - self.environment.transaction_cost) * sp - self.environment.get_buy_price()) / self.environment.get_buy_price()
record = {'sp': sp, 'date': date, 'profit': profit, 'return': pf_return}
self.environment.record(**record)
# print("processing sell order, sell price: " + str(sp))
self.environment.invoke_buy_signal_agent(sp, date, self.environment.get_buy_price(), sp)
return True
def process_next_state(self, date):
# the date get here is already the next day, but we need the same day of SSA as the state
prev_date = self.environment.get_prev_day(date)
# print("Sell order - processing date: " + str(date))
self.state = self.environment.get_sell_order_states_by_date(prev_date)
action = self.get_action(self.state)
result = self.process_action(action, date)
if not result:
self.environment.process_epoch_end(None, True)
def fit_all_actions(self, ma5, high):
# rewards are well-defined, we generate all rewards for every possible actions and fit the model
for action in OrderModel.action_map.keys():
sp = ma5 + action / 100 * ma5
d = sp - high
if d <= 0:
reward = math.exp(100 * d / high)
else:
reward = 0
self.model.fit(self.state.value, reward, action)