-
Notifications
You must be signed in to change notification settings - Fork 56
/
ser_layoutxlm_xfund_zh.yaml
131 lines (121 loc) · 3.45 KB
/
ser_layoutxlm_xfund_zh.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: False
amp_level: 'O0'
seed: 42
log_interval: 10
val_while_train: True
drop_overflow_update: False
model:
type: kie
transform: null
backbone:
name: layoutxlm
pretrained: True
num_classes: &num_classes 7
use_visual_backbone: True
use_float16: True
head :
name: TokenClassificationHead
num_classes: 7
use_visual_backbone: True
use_float16: True
pretrained:
postprocess:
name: VQASerTokenLayoutLMPostProcess
class_path: &class_path path/to/class_list_xfun.txt
metric:
name: VQASerTokenMetric
main_indicator: hmean
loss:
name: VQASerTokenLayoutLMLoss
num_classes: *num_classes
scheduler:
scheduler: polynomial_decay
lr: 5.0e-5
min_lr: 2.0e-7
num_epochs: 200
warmup_epochs: 2
optimizer:
opt: adam
filter_bias_and_bn: False
weight_decay: 0.0005
train:
ckpt_save_dir: './tmp_kie_ser'
dataset_sink_mode: False
dataset:
type: KieDataset
dataset_root: path/to/train_data/
data_dir: XFUND/zh_train/image
label_file: XFUND/zh_train/train.json
sample_ratio: 1.0
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- VQATokenLabelEncode:
contains_re: False
algorithm: &algorithm LayoutXLM
class_path: *class_path
- VQATokenPad:
max_seq_len: &max_seq_len 512
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- LayoutResize:
size: [ 224, 224 ]
- NormalizeImage:
bgr_to_rgb: False
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize
output_columns: [ 'input_ids', 'bbox','attention_mask','token_type_ids', 'image', 'labels' ]
net_input_column_index: [ 0, 1, 2, 3, 4 ] # input indices for network forward func in output_columns
label_column_index: [ 2, 5 ] # input indices marked as label
loader:
shuffle: True
batch_size: 8
drop_remainder: True
num_workers: 8
eval:
ckpt_load_path: "./tmp_kie_ser/best.ckpt"
dataset_sink_mode: False
dataset:
type: KieDataset
dataset_root: path/to/train_data/
data_dir: XFUND/zh_val/image
label_file: XFUND/zh_val/val.json
sample_ratio: 1.0
shuffle: False
transform_pipeline:
- DecodeImage:
img_mode: RGB
to_float32: False
- VQATokenLabelEncode:
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: *max_seq_len
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- LayoutResize:
size: [ 224, 224 ]
- NormalizeImage:
bgr_to_rgb: False
is_hwc: True
mean: imagenet
std: imagenet
- ToCHWImage:
# the order of the dataloader list, matching the network input and the labels for evaluation
output_columns: [ 'input_ids', 'bbox', 'attention_mask','token_type_ids','image', 'labels' ]
net_input_column_index: [ 0, 1, 2, 3, 4 ] # input indices for network forward func in output_columns
label_column_index: [ 2, 5 ] # input indices marked as label
loader:
shuffle: False
batch_size: 1
drop_remainder: False
num_workers: 1