-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpsettings
87 lines (61 loc) · 1.65 KB
/
gpsettings
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Parameters
const context_dim = 3
const context_mean = 0
const context_sd = 1
# set to false for gp
const context_constant = false
const obs_sd = 5
const bandit_count = 5
const bandit_prior_mean = 0
const bandit_prior_sd = 10
# MCMC parameters
prior_mean = repeat([bandit_prior_mean], context_dim)
prior_cov = diagm(repeat([bandit_prior_sd^2], context_dim))
const proposal_sd = .1
const n_burn = 200
const n_dup = 10
# Multi Action
const multi_count = 10
# SIMULATION HORIZON
const T = 100
# NUMBER OF GLOBAL SIMULATION EPISODES (PER INDEX JOB)
const n_episodes = 1
# DISCOUNT PARAMETER
const discount = 1.
# PARAMETER FOR EPSILON GREEDY POLICY
const epsilon = .4
const decreasing = true
# PARAMETERS FOR ALL ROLLOUT METHODS
const rollout_length = 50# 20
const n_rollouts = 20000 # 100000
# PARAMETERS FOR SPSA OPTIMIZATION METHOD
const n_opt_rollouts = 20000 # 100000
const n_spsa_iter = 10000
## PARAMETERS FOR GRID OPTIMIZATION METHOD
const n_grid_iter = 7
const grid_ratio = 2
const grid_num = 6
const int_length = 2
const n_grid_rollouts = 50
### NON ADAPTIVE GRID (n_opt_rollouts used for opt rollouts)
#grid_margin_1 = [0, .5, 1, 2, 4]
#grid_margin_2 = [0, .5, 1, 2]
#grid_margin_3 = [.5, 1, 2]
grid_margin_1 = [0., 1.]
grid_margin_2 = [0., 1.]
grid_margin_3 = [1., 2.]
### NON STATIONARY PARAMETERS
const delta = 1.
### GP PARAMETERS
const kernel_scale = 10
const kernel_bandwidth = 10
### MAB PARAMETERS
const a_beta_prior = 1
const b_beta_prior = 1
### VB PARAMETER
const vb_rollout_mult = 5
const vb_rollout_tol = .00001
const vb_policy_tol = .01
### ADAPTIVE PARAM
const expected_regret_thresh = .0005
const action_expected_regret_thresh = .00001