-
Notifications
You must be signed in to change notification settings - Fork 2
/
config_adapt.yml
86 lines (72 loc) · 2.4 KB
/
config_adapt.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
model_dir: model
data:
train_features_file: train.src
train_labels_file: train.tgt
eval_features_file: dev.src
eval_labels_file: dev.tgt
source_words_vocabulary: vocab.src
target_words_vocabulary: vocab.tgt
params:
beam_width: 5
maximum_iterations: 250
average_loss_in_time: true
label_smoothing: 0.1
length_penalty: 0.6
optimizer: LazyAdamOptimizer
optimizer_params:
beta1: 0.9
beta2: 0.998
learning_rate: 2.0 # The learning rate scale constant.
decay_type: noam_decay
decay_rate: 512
decay_step_duration: 8 # 2 - changed to 8 for using 1GPU # 1 decay step is 2 training steps.
decay_steps: 8000 # Warmup steps (= 16000 training steps).
start_decay_steps: 0 # 16000
# (optional) to freeze sub-component of the network (transformer model)
freeze:
encoder: false
decoder: false
# currently not necessary options
#shared_embs: # used only if enc-dec is trained with shared embs
#src_embs: true # if encoder is trainable
#tgt_embs: false # if decoder is trainable
#projection: false # output_layer before softmax
#nlayer_encoder: 1
#nlayer_decoder: 6
# (optional) to load sub-components of the network, use --checkpoint
load_weights:
encoder: true
decoder: true
shared_embs: false # only if enc-dec shared embs is used
src_embs: true
tgt_embs: true
projection: true
optim: false
global_step: false
words_per_sec: false # not in lstm models
train:
batch_size: 4096
batch_type: tokens
bucket_width: 5
save_checkpoints_steps: 1000 # update accordingly
keep_checkpoint_max: 10
save_summary_steps: 50
train_steps: 500000 # small-models 60000 steps, 200k training examples 100000 steps
sample_buffer_size: 5000000
maximum_features_length: 100
maximum_labels_length: 100
eval:
batch_size: 32
num_threads: 10 # default 1
bucket_width: 5
prefetch_buffer_size: 1
n_best: 1
with_scores: false
with_alignments: null
external_evaluators: BLEU # sacreBLEU, BLEU, BLEU-detok, ROUGE
exporters: last
steps: 100 # 100 (default)
start_delay_secs: 1 # 120 (default)
eval_delay: 1800 # every 1/2 hour | throttle_secs: 60 # 600 (default)
infer:
batch_size: 32