-
Notifications
You must be signed in to change notification settings - Fork 1
/
RucocoAncor_rubertb_a150_s20_sw04.log
2030 lines (2030 loc) · 377 KB
/
RucocoAncor_rubertb_a150_s20_sw04.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
2022-07-08 11:12:18,849 - INFO - allennlp.common.params - random_seed = 13370
2022-07-08 11:12:18,849 - INFO - allennlp.common.params - numpy_seed = 1337
2022-07-08 11:12:18,849 - INFO - allennlp.common.params - pytorch_seed = 133
2022-07-08 11:12:18,850 - INFO - allennlp.common.checks - Pytorch version: 1.9.0+cu111
2022-07-08 11:12:18,850 - INFO - allennlp.common.params - type = default
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.type = rucoref
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.max_span_width = 20
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = DeepPavlov/rubert-base-cased
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags
2022-07-08 11:12:18,851 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = 128
2022-07-08 11:12:18,852 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None
2022-07-08 11:12:25,052 - INFO - allennlp.common.params - dataset_reader.wordpiece_modeling_tokenizer = None
2022-07-08 11:12:25,052 - INFO - allennlp.common.params - dataset_reader.max_sentences = None
2022-07-08 11:12:25,052 - INFO - allennlp.common.params - dataset_reader.remove_singleton_clusters = False
2022-07-08 11:12:25,052 - INFO - allennlp.common.params - train_data_path = /root/datacrunch_training/data/train.conll
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - vocabulary = <allennlp.common.lazy.Lazy object at 0x7f4228708af0>
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - datasets_for_vocab_creation = None
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - validation_dataset_reader = None
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - validation_data_path = /root/datacrunch_training/data/dev.conll
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - validation_data_loader = None
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - test_data_path = /root/datacrunch_training/data/test.conll
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - evaluate_on_test = False
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - batch_weight_key =
2022-07-08 11:12:25,053 - INFO - allennlp.common.params - data_loader.type = multiprocess
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batch_size = None
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.drop_last = False
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.shuffle = False
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batch_sampler.type = bucket
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batch_sampler.batch_size = 1
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batch_sampler.sorting_keys = ['text']
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batch_sampler.padding_noise = 0
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batch_sampler.drop_last = False
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.num_workers = 0
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None
2022-07-08 11:12:25,054 - INFO - allennlp.common.params - data_loader.start_method = fork
2022-07-08 11:12:25,055 - INFO - allennlp.common.params - data_loader.cuda_device = None
2022-07-08 11:12:25,055 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
2022-07-08 11:12:35,139 - INFO - tqdm - loading instances: 546it [00:10, 38.78it/s]
2022-07-08 11:12:45,255 - INFO - tqdm - loading instances: 1165it [00:20, 56.93it/s]
2022-07-08 11:12:55,259 - INFO - tqdm - loading instances: 1740it [00:30, 29.88it/s]
2022-07-08 11:13:05,354 - INFO - tqdm - loading instances: 2400it [00:40, 81.77it/s]
2022-07-08 11:13:15,550 - INFO - tqdm - loading instances: 2834it [00:50, 33.46it/s]
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.type = multiprocess
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.batch_size = None
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.drop_last = False
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.shuffle = False
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.batch_sampler.type = bucket
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.batch_sampler.batch_size = 1
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.batch_sampler.sorting_keys = ['text']
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.batch_sampler.padding_noise = 0
2022-07-08 11:13:15,856 - INFO - allennlp.common.params - data_loader.batch_sampler.drop_last = False
2022-07-08 11:13:15,857 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None
2022-07-08 11:13:15,857 - INFO - allennlp.common.params - data_loader.num_workers = 0
2022-07-08 11:13:15,857 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None
2022-07-08 11:13:15,857 - INFO - allennlp.common.params - data_loader.start_method = fork
2022-07-08 11:13:15,857 - INFO - allennlp.common.params - data_loader.cuda_device = None
2022-07-08 11:13:15,857 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
2022-07-08 11:13:23,678 - INFO - allennlp.common.params - data_loader.type = multiprocess
2022-07-08 11:13:23,678 - INFO - allennlp.common.params - data_loader.batch_size = None
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.drop_last = False
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.shuffle = False
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.batch_sampler.type = bucket
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.batch_sampler.batch_size = 1
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.batch_sampler.sorting_keys = ['text']
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.batch_sampler.padding_noise = 0
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.batch_sampler.drop_last = False
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.num_workers = 0
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.start_method = fork
2022-07-08 11:13:23,679 - INFO - allennlp.common.params - data_loader.cuda_device = None
2022-07-08 11:13:23,680 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
2022-07-08 11:13:27,979 - INFO - allennlp.common.params - type = from_instances
2022-07-08 11:13:27,979 - INFO - allennlp.common.params - min_count = None
2022-07-08 11:13:27,979 - INFO - allennlp.common.params - max_vocab_size = None
2022-07-08 11:13:27,979 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2022-07-08 11:13:27,979 - INFO - allennlp.common.params - pretrained_files = None
2022-07-08 11:13:27,980 - INFO - allennlp.common.params - only_include_pretrained_words = False
2022-07-08 11:13:27,980 - INFO - allennlp.common.params - tokens_to_add = None
2022-07-08 11:13:27,980 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2022-07-08 11:13:27,980 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2022-07-08 11:13:27,980 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2022-07-08 11:13:27,980 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.
2022-07-08 11:13:27,980 - INFO - tqdm - building vocab: 0it [00:00, ?it/s]
2022-07-08 11:13:30,295 - INFO - allennlp.common.params - model.type = coref
2022-07-08 11:13:30,295 - INFO - allennlp.common.params - model.regularizer = None
2022-07-08 11:13:30,295 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2022-07-08 11:13:30,295 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = pretrained_transformer_mismatched
2022-07-08 11:13:30,295 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.model_name = DeepPavlov/rubert-base-cased
2022-07-08 11:13:30,296 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.max_length = 128
2022-07-08 11:13:30,296 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.train_parameters = True
2022-07-08 11:13:30,296 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.last_layer_only = True
2022-07-08 11:13:30,296 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.gradient_checkpointing = None
2022-07-08 11:13:30,296 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.tokenizer_kwargs = None
2022-07-08 11:13:30,296 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.transformer_kwargs = None
2022-07-08 11:13:33,834 - INFO - allennlp.common.params - model.context_layer.type = lstm
2022-07-08 11:13:33,835 - INFO - allennlp.common.params - model.context_layer.input_size = 768
2022-07-08 11:13:33,835 - INFO - allennlp.common.params - model.context_layer.hidden_size = 200
2022-07-08 11:13:33,835 - INFO - allennlp.common.params - model.context_layer.num_layers = 1
2022-07-08 11:13:33,835 - INFO - allennlp.common.params - model.context_layer.bias = True
2022-07-08 11:13:33,835 - INFO - allennlp.common.params - model.context_layer.dropout = 0.0
2022-07-08 11:13:33,835 - INFO - allennlp.common.params - model.context_layer.bidirectional = True
2022-07-08 11:13:33,836 - INFO - allennlp.common.params - model.context_layer.stateful = False
2022-07-08 11:13:33,853 - INFO - allennlp.common.params - model.mention_feedforward.input_dim = 1588
2022-07-08 11:13:33,853 - INFO - allennlp.common.params - model.mention_feedforward.num_layers = 2
2022-07-08 11:13:33,853 - INFO - allennlp.common.params - model.mention_feedforward.hidden_dims = 150
2022-07-08 11:13:33,853 - INFO - allennlp.common.params - model.mention_feedforward.activations = relu
2022-07-08 11:13:33,853 - INFO - allennlp.common.params - type = relu
2022-07-08 11:13:33,853 - INFO - allennlp.common.params - model.mention_feedforward.dropout = 0.2
2022-07-08 11:13:33,855 - INFO - allennlp.common.params - model.antecedent_feedforward.input_dim = 4784
2022-07-08 11:13:33,855 - INFO - allennlp.common.params - model.antecedent_feedforward.num_layers = 2
2022-07-08 11:13:33,855 - INFO - allennlp.common.params - model.antecedent_feedforward.hidden_dims = 150
2022-07-08 11:13:33,855 - INFO - allennlp.common.params - model.antecedent_feedforward.activations = relu
2022-07-08 11:13:33,855 - INFO - allennlp.common.params - type = relu
2022-07-08 11:13:33,855 - INFO - allennlp.common.params - model.antecedent_feedforward.dropout = 0.2
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.feature_size = 20
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.max_span_width = 20
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.spans_per_word = 0.4
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.max_antecedents = 150
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.coarse_to_fine = False
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.inference_order = 1
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.lexical_dropout = 0.5
2022-07-08 11:13:33,860 - INFO - allennlp.common.params - model.initializer.regexes.0.1.type = xavier_normal
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.0.1.gain = 1.0
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.1.1.type = xavier_normal
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.1.1.gain = 1.0
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.2.1.type = xavier_normal
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.2.1.gain = 1.0
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.3.1.type = xavier_normal
2022-07-08 11:13:33,861 - INFO - allennlp.common.params - model.initializer.regexes.3.1.gain = 1.0
2022-07-08 11:13:33,862 - INFO - allennlp.common.params - model.initializer.regexes.4.1.type = xavier_normal
2022-07-08 11:13:33,862 - INFO - allennlp.common.params - model.initializer.regexes.4.1.gain = 1.0
2022-07-08 11:13:33,862 - INFO - allennlp.common.params - model.initializer.regexes.5.1.type = orthogonal
2022-07-08 11:13:33,862 - INFO - allennlp.common.params - model.initializer.regexes.5.1.gain = 1.0
2022-07-08 11:13:33,862 - INFO - allennlp.common.params - model.initializer.prevent_regexes = None
2022-07-08 11:13:33,863 - INFO - allennlp.nn.initializers - Initializing parameters
2022-07-08 11:13:33,870 - INFO - allennlp.nn.initializers - Initializing _context_layer._module.weight_ih_l0 using _context_layer._module.weight_ih.* initializer
2022-07-08 11:13:33,873 - INFO - allennlp.nn.initializers - Initializing _context_layer._module.weight_hh_l0 using _context_layer._module.weight_hh.* initializer
2022-07-08 11:13:34,133 - INFO - allennlp.nn.initializers - Initializing _context_layer._module.weight_ih_l0_reverse using _context_layer._module.weight_ih.* initializer
2022-07-08 11:13:34,137 - INFO - allennlp.nn.initializers - Initializing _context_layer._module.weight_hh_l0_reverse using _context_layer._module.weight_hh.* initializer
2022-07-08 11:13:34,176 - INFO - allennlp.nn.initializers - Initializing _mention_feedforward._module._linear_layers.0.weight using .*linear_layers.*weight initializer
2022-07-08 11:13:34,178 - INFO - allennlp.nn.initializers - Initializing _mention_feedforward._module._linear_layers.1.weight using .*linear_layers.*weight initializer
2022-07-08 11:13:34,179 - INFO - allennlp.nn.initializers - Initializing _mention_scorer._module.weight using .*scorer._module.weight initializer
2022-07-08 11:13:34,180 - INFO - allennlp.nn.initializers - Initializing _antecedent_feedforward._module._linear_layers.0.weight using .*linear_layers.*weight initializer
2022-07-08 11:13:34,186 - INFO - allennlp.nn.initializers - Initializing _antecedent_feedforward._module._linear_layers.1.weight using .*linear_layers.*weight initializer
2022-07-08 11:13:34,186 - INFO - allennlp.nn.initializers - Initializing _antecedent_scorer._module.weight using .*scorer._module.weight initializer
2022-07-08 11:13:34,188 - INFO - allennlp.nn.initializers - Initializing _endpoint_span_extractor._span_width_embedding.weight using _span_width_embedding.weight initializer
2022-07-08 11:13:34,188 - INFO - allennlp.nn.initializers - Initializing _distance_embedding.weight using _distance_embedding.weight initializer
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - Done initializing parameters; the following parameters are using their default initialization from their code
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _antecedent_feedforward._module._linear_layers.0.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _antecedent_feedforward._module._linear_layers.1.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _antecedent_scorer._module.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _attentive_span_extractor._global_attention._module.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _attentive_span_extractor._global_attention._module.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _context_layer._module.bias_hh_l0
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _context_layer._module.bias_hh_l0_reverse
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _context_layer._module.bias_ih_l0
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _context_layer._module.bias_ih_l0_reverse
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _mention_feedforward._module._linear_layers.0.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _mention_feedforward._module._linear_layers.1.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _mention_scorer._module.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.LayerNorm.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.LayerNorm.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.position_embeddings.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.word_embeddings.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight
2022-07-08 11:13:34,189 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight
2022-07-08 11:13:34,190 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias
2022-07-08 11:13:34,191 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias
2022-07-08 11:13:34,192 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias
2022-07-08 11:13:34,193 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight
2022-07-08 11:13:34,194 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight
2022-07-08 11:13:34,195 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias
2022-07-08 11:13:34,196 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.pooler.dense.bias
2022-07-08 11:13:34,197 - INFO - allennlp.nn.initializers - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.pooler.dense.weight
2022-07-08 11:14:48,207 - INFO - allennlp.common.params - trainer.type = gradient_descent
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.patience = 10
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.validation_metric = +coref_f1
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.num_epochs = 150
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.cuda_device = None
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.grad_norm = 5
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.grad_clipping = None
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.distributed = False
2022-07-08 11:14:48,208 - INFO - allennlp.common.params - trainer.world_size = 1
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.use_amp = False
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.no_grad = None
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.momentum_scheduler = None
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.moving_average = None
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.checkpointer = <allennlp.common.lazy.Lazy object at 0x7f42286e8a60>
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.callbacks = None
2022-07-08 11:14:48,209 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True
2022-07-08 11:14:51,366 - INFO - allennlp.common.params - trainer.optimizer.type = huggingface_adamw
2022-07-08 11:14:51,367 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.001
2022-07-08 11:14:51,367 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999)
2022-07-08 11:14:51,367 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08
2022-07-08 11:14:51,367 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.01
2022-07-08 11:14:51,367 - INFO - allennlp.common.params - trainer.optimizer.correct_bias = True
2022-07-08 11:14:51,368 - INFO - allennlp.training.optimizers - Done constructing parameter groups.
2022-07-08 11:14:51,368 - INFO - allennlp.training.optimizers - Group 0: ['_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.position_embeddings.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.pooler.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.pooler.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.word_embeddings.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', '_text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias'], {'lr': 1e-05}
2022-07-08 11:14:51,369 - INFO - allennlp.training.optimizers - Group 1: ['_context_layer._module.bias_ih_l0', '_context_layer._module.weight_ih_l0', '_mention_feedforward._module._linear_layers.1.bias', '_mention_feedforward._module._linear_layers.0.bias', '_mention_scorer._module.weight', '_mention_feedforward._module._linear_layers.1.weight', '_context_layer._module.weight_hh_l0', '_context_layer._module.bias_hh_l0', '_distance_embedding.weight', '_antecedent_scorer._module.weight', '_context_layer._module.weight_ih_l0_reverse', '_mention_feedforward._module._linear_layers.0.weight', '_antecedent_feedforward._module._linear_layers.1.bias', '_context_layer._module.weight_hh_l0_reverse', '_antecedent_feedforward._module._linear_layers.1.weight', '_context_layer._module.bias_ih_l0_reverse', '_antecedent_feedforward._module._linear_layers.0.bias', '_endpoint_span_extractor._span_width_embedding.weight', '_context_layer._module.bias_hh_l0_reverse', '_antecedent_feedforward._module._linear_layers.0.weight', '_antecedent_scorer._module.bias', '_attentive_span_extractor._global_attention._module.bias', '_attentive_span_extractor._global_attention._module.weight', '_mention_scorer._module.bias'], {}
2022-07-08 11:14:51,369 - INFO - allennlp.training.optimizers - Number of trainable parameters: 180408511
2022-07-08 11:14:51,369 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient):
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient):
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.word_embeddings.weight
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.position_embeddings.weight
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.LayerNorm.weight
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.embeddings.LayerNorm.bias
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias
2022-07-08 11:14:51,370 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias
2022-07-08 11:14:51,371 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias
2022-07-08 11:14:51,372 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight
2022-07-08 11:14:51,373 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight
2022-07-08 11:14:51,374 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight
2022-07-08 11:14:51,375 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight
2022-07-08 11:14:51,376 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias
2022-07-08 11:14:51,377 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight
2022-07-08 11:14:51,378 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight
2022-07-08 11:14:51,379 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias
2022-07-08 11:14:51,380 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight
2022-07-08 11:14:51,381 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight
2022-07-08 11:14:51,382 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias
2022-07-08 11:14:51,383 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.pooler.dense.weight
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _text_field_embedder.token_embedder_tokens._matched_embedder.transformer_model.pooler.dense.bias
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.weight_ih_l0
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.weight_hh_l0
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.bias_ih_l0
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.bias_hh_l0
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.weight_ih_l0_reverse
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.weight_hh_l0_reverse
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.bias_ih_l0_reverse
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _context_layer._module.bias_hh_l0_reverse
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _mention_feedforward._module._linear_layers.0.weight
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _mention_feedforward._module._linear_layers.0.bias
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _mention_feedforward._module._linear_layers.1.weight
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _mention_feedforward._module._linear_layers.1.bias
2022-07-08 11:14:51,384 - INFO - allennlp.common.util - _mention_scorer._module.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _mention_scorer._module.bias
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _antecedent_feedforward._module._linear_layers.0.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _antecedent_feedforward._module._linear_layers.0.bias
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _antecedent_feedforward._module._linear_layers.1.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _antecedent_feedforward._module._linear_layers.1.bias
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _antecedent_scorer._module.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _antecedent_scorer._module.bias
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _endpoint_span_extractor._span_width_embedding.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _attentive_span_extractor._global_attention._module.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _attentive_span_extractor._global_attention._module.bias
2022-07-08 11:14:51,385 - INFO - allennlp.common.util - _distance_embedding.weight
2022-07-08 11:14:51,385 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = reduce_on_plateau
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.mode = max
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.factor = 0.5
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.patience = 2
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.verbose = False
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.threshold_mode = rel
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.threshold = 0.0001
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cooldown = 0
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.min_lr = 0
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.eps = 1e-08
2022-07-08 11:14:51,386 - INFO - allennlp.common.params - type = default
2022-07-08 11:14:51,387 - INFO - allennlp.common.params - keep_serialized_model_every_num_seconds = None
2022-07-08 11:14:51,387 - INFO - allennlp.common.params - num_serialized_models_to_keep = 2
2022-07-08 11:14:51,387 - INFO - allennlp.common.params - model_save_interval = None
2022-07-08 11:14:51,388 - INFO - allennlp.training.trainer - Beginning training.
2022-07-08 11:14:51,388 - INFO - allennlp.training.trainer - Epoch 0/149
2022-07-08 11:14:51,388 - INFO - allennlp.training.trainer - Worker 0 memory usage: 9.4G
2022-07-08 11:14:51,388 - INFO - allennlp.training.trainer - GPU 0 memory usage: 691M
2022-07-08 11:14:51,389 - INFO - allennlp.training.trainer - Training
2022-07-08 11:14:51,389 - INFO - tqdm - 0%| | 0/2851 [00:00<?, ?it/s]
2022-07-08 11:15:01,457 - INFO - tqdm - coref_precision: 0.0138, coref_recall: 0.0007, coref_f1: 0.0009, mention_recall: 0.1887, batch_loss: 113.1446, loss: 127.5528 ||: 2%|2 | 60/2851 [00:10<05:29, 8.46it/s]
2022-07-08 11:15:11,570 - INFO - tqdm - coref_precision: 0.3271, coref_recall: 0.0255, coref_f1: 0.0468, mention_recall: 0.4633, batch_loss: 241.9008, loss: 119.3468 ||: 5%|4 | 138/2851 [00:20<05:59, 7.55it/s]
2022-07-08 11:15:21,682 - INFO - tqdm - coref_precision: 0.4627, coref_recall: 0.0730, coref_f1: 0.1254, mention_recall: 0.5842, batch_loss: 92.9696, loss: 109.9484 ||: 8%|7 | 215/2851 [00:30<05:05, 8.62it/s]
2022-07-08 11:15:31,908 - INFO - tqdm - coref_precision: 0.5339, coref_recall: 0.1214, coref_f1: 0.1970, mention_recall: 0.6699, batch_loss: 152.5146, loss: 101.4454 ||: 10%|# | 296/2851 [00:40<05:42, 7.46it/s]
2022-07-08 11:15:42,017 - INFO - tqdm - coref_precision: 0.5643, coref_recall: 0.1631, coref_f1: 0.2521, mention_recall: 0.7204, batch_loss: 62.8987, loss: 95.8449 ||: 13%|#3 | 371/2851 [00:50<05:19, 7.77it/s]
2022-07-08 11:15:52,129 - INFO - tqdm - coref_precision: 0.5974, coref_recall: 0.2067, coref_f1: 0.3063, mention_recall: 0.7620, batch_loss: 60.4733, loss: 92.0225 ||: 16%|#5 | 450/2851 [01:00<05:22, 7.45it/s]
2022-07-08 11:16:02,277 - INFO - tqdm - coref_precision: 0.6037, coref_recall: 0.2384, coref_f1: 0.3411, mention_recall: 0.7882, batch_loss: 83.0853, loss: 91.0793 ||: 18%|#7 | 511/2851 [01:10<08:13, 4.74it/s]
2022-07-08 11:16:12,359 - INFO - tqdm - coref_precision: 0.6158, coref_recall: 0.2637, coref_f1: 0.3687, mention_recall: 0.8072, batch_loss: 42.0125, loss: 86.9116 ||: 21%|## | 585/2851 [01:20<04:45, 7.94it/s]
2022-07-08 11:16:22,605 - INFO - tqdm - coref_precision: 0.6346, coref_recall: 0.2918, coref_f1: 0.3992, mention_recall: 0.8250, batch_loss: 357.8776, loss: 83.1297 ||: 23%|##3 | 663/2851 [01:31<07:52, 4.63it/s]
2022-07-08 11:16:32,608 - INFO - tqdm - coref_precision: 0.6422, coref_recall: 0.3081, coref_f1: 0.4159, mention_recall: 0.8349, batch_loss: 14.2933, loss: 80.4165 ||: 25%|##5 | 720/2851 [01:41<03:29, 10.15it/s]
2022-07-08 11:16:42,833 - INFO - tqdm - coref_precision: 0.6537, coref_recall: 0.3294, coref_f1: 0.4376, mention_recall: 0.8477, batch_loss: 79.3069, loss: 77.5762 ||: 28%|##7 | 798/2851 [01:51<05:41, 6.00it/s]
2022-07-08 11:16:52,859 - INFO - tqdm - coref_precision: 0.6624, coref_recall: 0.3467, coref_f1: 0.4547, mention_recall: 0.8572, batch_loss: 19.1554, loss: 74.6864 ||: 31%|### | 879/2851 [02:01<04:08, 7.93it/s]
2022-07-08 11:17:03,014 - INFO - tqdm - coref_precision: 0.6712, coref_recall: 0.3641, coref_f1: 0.4716, mention_recall: 0.8669, batch_loss: 32.5517, loss: 72.9589 ||: 33%|###3 | 951/2851 [02:11<04:39, 6.79it/s]
2022-07-08 11:17:13,069 - INFO - tqdm - coref_precision: 0.6796, coref_recall: 0.3801, coref_f1: 0.4871, mention_recall: 0.8742, batch_loss: 27.9380, loss: 70.8214 ||: 36%|###6 | 1030/2851 [02:21<04:00, 7.58it/s]
2022-07-08 11:17:23,228 - INFO - tqdm - coref_precision: 0.6842, coref_recall: 0.3917, coref_f1: 0.4978, mention_recall: 0.8798, batch_loss: 54.5995, loss: 68.3663 ||: 39%|###9 | 1118/2851 [02:31<03:28, 8.33it/s]
2022-07-08 11:17:33,249 - INFO - tqdm - coref_precision: 0.6881, coref_recall: 0.4008, coref_f1: 0.5062, mention_recall: 0.8848, batch_loss: 9.7995, loss: 67.5499 ||: 42%|####1 | 1191/2851 [02:41<03:49, 7.24it/s]
2022-07-08 11:17:43,537 - INFO - tqdm - coref_precision: 0.6937, coref_recall: 0.4115, coref_f1: 0.5162, mention_recall: 0.8902, batch_loss: 74.3690, loss: 66.3811 ||: 45%|####4 | 1269/2851 [02:52<04:51, 5.43it/s]
2022-07-08 11:17:53,594 - INFO - tqdm - coref_precision: 0.6983, coref_recall: 0.4220, coref_f1: 0.5257, mention_recall: 0.8943, batch_loss: 94.9507, loss: 65.0060 ||: 47%|####7 | 1347/2851 [03:02<03:43, 6.74it/s]
2022-07-08 11:18:03,623 - INFO - tqdm - coref_precision: 0.6999, coref_recall: 0.4303, coref_f1: 0.5326, mention_recall: 0.8990, batch_loss: 9.7258, loss: 64.5151 ||: 50%|####9 | 1418/2851 [03:12<02:53, 8.25it/s]
2022-07-08 11:18:13,778 - INFO - tqdm - coref_precision: 0.7021, coref_recall: 0.4370, coref_f1: 0.5384, mention_recall: 0.9023, batch_loss: 62.9414, loss: 64.1805 ||: 52%|#####1 | 1481/2851 [03:22<02:45, 8.27it/s]
2022-07-08 11:18:24,055 - INFO - tqdm - coref_precision: 0.7026, coref_recall: 0.4403, coref_f1: 0.5411, mention_recall: 0.9049, batch_loss: 845.9071, loss: 63.7275 ||: 55%|#####4 | 1554/2851 [03:32<05:31, 3.91it/s]
2022-07-08 11:18:34,162 - INFO - tqdm - coref_precision: 0.7067, coref_recall: 0.4471, coref_f1: 0.5474, mention_recall: 0.9075, batch_loss: 28.6537, loss: 62.4144 ||: 57%|#####7 | 1637/2851 [03:42<02:09, 9.38it/s]
2022-07-08 11:18:44,166 - INFO - tqdm - coref_precision: 0.7105, coref_recall: 0.4556, coref_f1: 0.5549, mention_recall: 0.9106, batch_loss: 50.8262, loss: 61.4662 ||: 60%|###### | 1714/2851 [03:52<02:11, 8.63it/s]
2022-07-08 11:18:54,466 - INFO - tqdm - coref_precision: 0.7092, coref_recall: 0.4602, coref_f1: 0.5579, mention_recall: 0.9128, batch_loss: 138.7786, loss: 61.5905 ||: 62%|######2 | 1773/2851 [04:03<02:33, 7.03it/s]
2022-07-08 11:19:04,474 - INFO - tqdm - coref_precision: 0.7114, coref_recall: 0.4646, coref_f1: 0.5618, mention_recall: 0.9143, batch_loss: 14.0020, loss: 61.0368 ||: 65%|######4 | 1851/2851 [04:13<01:59, 8.34it/s]
2022-07-08 11:19:14,484 - INFO - tqdm - coref_precision: 0.7142, coref_recall: 0.4701, coref_f1: 0.5667, mention_recall: 0.9165, batch_loss: 2.1111, loss: 60.0509 ||: 68%|######7 | 1932/2851 [04:23<01:33, 9.86it/s]
2022-07-08 11:19:24,744 - INFO - tqdm - coref_precision: 0.7153, coref_recall: 0.4738, coref_f1: 0.5698, mention_recall: 0.9184, batch_loss: 112.2957, loss: 59.7888 ||: 70%|####### | 2003/2851 [04:33<02:26, 5.77it/s]
2022-07-08 11:19:34,819 - INFO - tqdm - coref_precision: 0.7161, coref_recall: 0.4764, coref_f1: 0.5719, mention_recall: 0.9195, batch_loss: 27.2171, loss: 59.0403 ||: 73%|#######3 | 2082/2851 [04:43<01:45, 7.28it/s]
2022-07-08 11:19:44,884 - INFO - tqdm - coref_precision: 0.7187, coref_recall: 0.4809, coref_f1: 0.5759, mention_recall: 0.9210, batch_loss: 43.4294, loss: 58.7142 ||: 75%|#######5 | 2150/2851 [04:53<01:38, 7.14it/s]
2022-07-08 11:19:54,896 - INFO - tqdm - coref_precision: 0.7207, coref_recall: 0.4855, coref_f1: 0.5799, mention_recall: 0.9227, batch_loss: 34.8253, loss: 58.3983 ||: 78%|#######7 | 2222/2851 [05:03<01:25, 7.39it/s]
2022-07-08 11:20:04,927 - INFO - tqdm - coref_precision: 0.7215, coref_recall: 0.4892, coref_f1: 0.5828, mention_recall: 0.9238, batch_loss: 36.3739, loss: 57.9260 ||: 80%|######## | 2293/2851 [05:13<01:17, 7.24it/s]
2022-07-08 11:20:14,992 - INFO - tqdm - coref_precision: 0.7229, coref_recall: 0.4927, coref_f1: 0.5858, mention_recall: 0.9251, batch_loss: 23.9618, loss: 57.3928 ||: 83%|########3 | 2370/2851 [05:23<00:53, 8.91it/s]
2022-07-08 11:20:25,087 - INFO - tqdm - coref_precision: 0.7240, coref_recall: 0.4953, coref_f1: 0.5880, mention_recall: 0.9265, batch_loss: 45.0046, loss: 56.8132 ||: 86%|########5 | 2447/2851 [05:33<00:50, 8.02it/s]
2022-07-08 11:20:35,190 - INFO - tqdm - coref_precision: 0.7249, coref_recall: 0.4986, coref_f1: 0.5906, mention_recall: 0.9278, batch_loss: 11.7281, loss: 56.3373 ||: 89%|########8 | 2524/2851 [05:43<00:36, 8.90it/s]
2022-07-08 11:20:45,228 - INFO - tqdm - coref_precision: 0.7269, coref_recall: 0.5030, coref_f1: 0.5943, mention_recall: 0.9292, batch_loss: 39.7941, loss: 55.7422 ||: 91%|#########1| 2602/2851 [05:53<00:41, 6.06it/s]
2022-07-08 11:20:55,409 - INFO - tqdm - coref_precision: 0.7280, coref_recall: 0.5063, coref_f1: 0.5970, mention_recall: 0.9303, batch_loss: 97.5591, loss: 55.2749 ||: 94%|#########4| 2681/2851 [06:04<00:23, 7.24it/s]
2022-07-08 11:21:05,455 - INFO - tqdm - coref_precision: 0.7293, coref_recall: 0.5089, coref_f1: 0.5993, mention_recall: 0.9313, batch_loss: 25.4749, loss: 54.9314 ||: 97%|#########6| 2758/2851 [06:14<00:10, 8.51it/s]
2022-07-08 11:21:15,046 - INFO - tqdm - coref_precision: 0.7315, coref_recall: 0.5132, coref_f1: 0.6030, mention_recall: 0.9325, batch_loss: 15.0383, loss: 54.2836 ||: 100%|#########9| 2838/2851 [06:23<00:01, 9.51it/s]
2022-07-08 11:21:15,228 - INFO - tqdm - coref_precision: 0.7316, coref_recall: 0.5132, coref_f1: 0.6030, mention_recall: 0.9326, batch_loss: 5.5134, loss: 54.2508 ||: 100%|#########9| 2840/2851 [06:23<00:01, 9.96it/s]
2022-07-08 11:21:15,384 - INFO - tqdm - coref_precision: 0.7316, coref_recall: 0.5133, coref_f1: 0.6031, mention_recall: 0.9326, batch_loss: 70.7066, loss: 54.2566 ||: 100%|#########9| 2841/2851 [06:23<00:01, 8.98it/s]
2022-07-08 11:21:15,545 - INFO - tqdm - coref_precision: 0.7316, coref_recall: 0.5134, coref_f1: 0.6031, mention_recall: 0.9326, batch_loss: 32.6996, loss: 54.2490 ||: 100%|#########9| 2842/2851 [06:24<00:01, 8.17it/s]
2022-07-08 11:21:15,715 - INFO - tqdm - coref_precision: 0.7316, coref_recall: 0.5133, coref_f1: 0.6031, mention_recall: 0.9326, batch_loss: 44.7230, loss: 54.2335 ||: 100%|#########9| 2844/2851 [06:24<00:00, 9.27it/s]
2022-07-08 11:21:15,894 - INFO - tqdm - coref_precision: 0.7317, coref_recall: 0.5135, coref_f1: 0.6032, mention_recall: 0.9326, batch_loss: 48.8563, loss: 54.2316 ||: 100%|#########9| 2845/2851 [06:24<00:00, 8.11it/s]
2022-07-08 11:21:16,124 - INFO - tqdm - coref_precision: 0.7317, coref_recall: 0.5135, coref_f1: 0.6033, mention_recall: 0.9327, batch_loss: 62.4913, loss: 54.2182 ||: 100%|#########9| 2847/2851 [06:24<00:00, 8.33it/s]
2022-07-08 11:21:16,262 - INFO - tqdm - coref_precision: 0.7317, coref_recall: 0.5135, coref_f1: 0.6033, mention_recall: 0.9327, batch_loss: 66.4132, loss: 54.2225 ||: 100%|#########9| 2848/2851 [06:24<00:00, 8.06it/s]
2022-07-08 11:21:16,377 - INFO - tqdm - coref_precision: 0.7317, coref_recall: 0.5135, coref_f1: 0.6033, mention_recall: 0.9327, batch_loss: 68.1624, loss: 54.2273 ||: 100%|#########9| 2849/2851 [06:24<00:00, 8.21it/s]
2022-07-08 11:21:16,544 - INFO - tqdm - coref_precision: 0.7318, coref_recall: 0.5136, coref_f1: 0.6033, mention_recall: 0.9327, batch_loss: 8.1653, loss: 54.2011 ||: 100%|##########| 2851/2851 [06:25<00:00, 9.40it/s]
2022-07-08 11:21:16,545 - INFO - tqdm - coref_precision: 0.7318, coref_recall: 0.5136, coref_f1: 0.6033, mention_recall: 0.9327, batch_loss: 8.1653, loss: 54.2011 ||: 100%|##########| 2851/2851 [06:25<00:00, 7.40it/s]
2022-07-08 11:21:17,422 - INFO - allennlp.training.trainer - Validating
2022-07-08 11:21:17,423 - INFO - tqdm - 0%| | 0/397 [00:00<?, ?it/s]
2022-07-08 11:21:27,460 - INFO - tqdm - coref_precision: 0.7312, coref_recall: 0.6916, coref_f1: 0.7106, mention_recall: 0.9741, batch_loss: 26.1741, loss: 43.0083 ||: 37%|###6 | 145/397 [00:10<00:09, 25.99it/s]
2022-07-08 11:21:37,516 - INFO - tqdm - coref_precision: 0.7439, coref_recall: 0.7121, coref_f1: 0.7273, mention_recall: 0.9777, batch_loss: 28.9936, loss: 38.4336 ||: 91%|######### | 361/397 [00:20<00:01, 19.80it/s]
2022-07-08 11:21:39,180 - INFO - tqdm - coref_precision: 0.7419, coref_recall: 0.7102, coref_f1: 0.7255, mention_recall: 0.9770, batch_loss: 49.2966, loss: 38.9107 ||: 100%|#########9| 396/397 [00:21<00:00, 15.03it/s]
2022-07-08 11:21:39,196 - INFO - tqdm - coref_precision: 0.7419, coref_recall: 0.7102, coref_f1: 0.7255, mention_recall: 0.9770, batch_loss: 1.7542, loss: 38.8171 ||: 100%|##########| 397/397 [00:21<00:00, 18.23it/s]
2022-07-08 11:21:40,920 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'resulting_models/best.th'.
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - Training | Validation
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - coref_f1 | 0.603 | 0.726
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - coref_precision | 0.732 | 0.742
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - coref_recall | 0.514 | 0.710
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 690.568 | N/A
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - loss | 54.201 | 38.817
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - mention_recall | 0.933 | 0.977
2022-07-08 11:21:41,235 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 9626.746 | N/A
2022-07-08 11:21:41,235 - INFO - allennlp.training.trainer - Epoch duration: 0:06:49.847021
2022-07-08 11:21:41,235 - INFO - allennlp.training.trainer - Estimated training time remaining: 16:57:47
2022-07-08 11:21:41,235 - INFO - allennlp.training.trainer - Epoch 1/149
2022-07-08 11:21:41,235 - INFO - allennlp.training.trainer - Worker 0 memory usage: 9.4G
2022-07-08 11:21:41,236 - INFO - allennlp.training.trainer - GPU 0 memory usage: 37G
2022-07-08 11:21:41,237 - INFO - allennlp.training.trainer - Training
2022-07-08 11:21:41,237 - INFO - tqdm - 0%| | 0/2851 [00:00<?, ?it/s]
2022-07-08 11:21:51,417 - INFO - tqdm - coref_precision: 0.7874, coref_recall: 0.6202, coref_f1: 0.6937, mention_recall: 0.9690, batch_loss: 66.5801, loss: 45.2540 ||: 2%|2 | 67/2851 [00:10<07:59, 5.81it/s]
2022-07-08 11:22:01,486 - INFO - tqdm - coref_precision: 0.7812, coref_recall: 0.6222, coref_f1: 0.6925, mention_recall: 0.9711, batch_loss: 40.6481, loss: 42.6784 ||: 5%|4 | 142/2851 [00:20<05:48, 7.78it/s]
2022-07-08 11:22:11,521 - INFO - tqdm - coref_precision: 0.7813, coref_recall: 0.6277, coref_f1: 0.6960, mention_recall: 0.9707, batch_loss: 50.0850, loss: 42.7402 ||: 7%|7 | 209/2851 [00:30<05:14, 8.41it/s]
2022-07-08 11:22:22,515 - INFO - tqdm - coref_precision: 0.7774, coref_recall: 0.6327, coref_f1: 0.6975, mention_recall: 0.9723, batch_loss: 239.5259, loss: 44.6640 ||: 10%|9 | 276/2851 [00:41<16:06, 2.66it/s]
2022-07-08 11:22:32,581 - INFO - tqdm - coref_precision: 0.7810, coref_recall: 0.6406, coref_f1: 0.7038, mention_recall: 0.9741, batch_loss: 80.0517, loss: 43.4288 ||: 12%|#2 | 350/2851 [00:51<06:04, 6.87it/s]
2022-07-08 11:22:42,659 - INFO - tqdm - coref_precision: 0.7838, coref_recall: 0.6483, coref_f1: 0.7095, mention_recall: 0.9740, batch_loss: 16.3223, loss: 40.5315 ||: 15%|#5 | 431/2851 [01:01<05:49, 6.92it/s]
2022-07-08 11:22:52,768 - INFO - tqdm - coref_precision: 0.7807, coref_recall: 0.6444, coref_f1: 0.7059, mention_recall: 0.9712, batch_loss: 61.5703, loss: 41.4229 ||: 18%|#7 | 504/2851 [01:11<05:01, 7.79it/s]
2022-07-08 11:23:02,848 - INFO - tqdm - coref_precision: 0.7843, coref_recall: 0.6481, coref_f1: 0.7096, mention_recall: 0.9724, batch_loss: 35.6306, loss: 40.6857 ||: 20%|## | 580/2851 [01:21<04:40, 8.09it/s]
2022-07-08 11:23:12,896 - INFO - tqdm - coref_precision: 0.7874, coref_recall: 0.6533, coref_f1: 0.7140, mention_recall: 0.9730, batch_loss: 7.4603, loss: 39.6420 ||: 23%|##3 | 661/2851 [01:31<04:38, 7.86it/s]
2022-07-08 11:23:22,944 - INFO - tqdm - coref_precision: 0.7892, coref_recall: 0.6553, coref_f1: 0.7160, mention_recall: 0.9730, batch_loss: 11.5554, loss: 38.9002 ||: 26%|##5 | 736/2851 [01:41<06:23, 5.52it/s]
2022-07-08 11:23:33,092 - INFO - tqdm - coref_precision: 0.7904, coref_recall: 0.6578, coref_f1: 0.7179, mention_recall: 0.9732, batch_loss: 24.3121, loss: 38.1302 ||: 29%|##8 | 815/2851 [01:51<03:37, 9.35it/s]
2022-07-08 11:23:43,265 - INFO - tqdm - coref_precision: 0.7934, coref_recall: 0.6594, coref_f1: 0.7201, mention_recall: 0.9725, batch_loss: 65.2845, loss: 36.9706 ||: 32%|###1 | 903/2851 [02:02<04:04, 7.96it/s]
2022-07-08 11:23:53,327 - INFO - tqdm - coref_precision: 0.7949, coref_recall: 0.6624, coref_f1: 0.7225, mention_recall: 0.9730, batch_loss: 19.0830, loss: 37.0296 ||: 34%|###4 | 972/2851 [02:12<03:50, 8.16it/s]
2022-07-08 11:24:03,441 - INFO - tqdm - coref_precision: 0.7937, coref_recall: 0.6622, coref_f1: 0.7219, mention_recall: 0.9737, batch_loss: 14.4340, loss: 37.3286 ||: 37%|###6 | 1044/2851 [02:22<04:15, 7.08it/s]
2022-07-08 11:24:13,466 - INFO - tqdm - coref_precision: 0.7945, coref_recall: 0.6633, coref_f1: 0.7229, mention_recall: 0.9736, batch_loss: 22.8258, loss: 36.8770 ||: 39%|###9 | 1120/2851 [02:32<04:15, 6.78it/s]
2022-07-08 11:24:23,685 - INFO - tqdm - coref_precision: 0.7927, coref_recall: 0.6634, coref_f1: 0.7222, mention_recall: 0.9737, batch_loss: 31.6780, loss: 36.7886 ||: 42%|####1 | 1196/2851 [02:42<03:47, 7.27it/s]
2022-07-08 11:24:33,773 - INFO - tqdm - coref_precision: 0.7936, coref_recall: 0.6622, coref_f1: 0.7219, mention_recall: 0.9739, batch_loss: 96.7174, loss: 37.0934 ||: 45%|####4 | 1270/2851 [02:52<04:04, 6.46it/s]
2022-07-08 11:24:43,810 - INFO - tqdm - coref_precision: 0.7941, coref_recall: 0.6629, coref_f1: 0.7225, mention_recall: 0.9738, batch_loss: 16.8027, loss: 36.5455 ||: 47%|####7 | 1354/2851 [03:02<02:35, 9.65it/s]
2022-07-08 11:24:53,926 - INFO - tqdm - coref_precision: 0.7937, coref_recall: 0.6613, coref_f1: 0.7214, mention_recall: 0.9730, batch_loss: 53.4845, loss: 36.4789 ||: 50%|##### | 1429/2851 [03:12<03:33, 6.67it/s]
2022-07-08 11:25:03,970 - INFO - tqdm - coref_precision: 0.7939, coref_recall: 0.6616, coref_f1: 0.7216, mention_recall: 0.9732, batch_loss: 18.6237, loss: 36.3148 ||: 53%|#####2 | 1511/2851 [03:22<02:29, 8.97it/s]
2022-07-08 11:25:14,026 - INFO - tqdm - coref_precision: 0.7945, coref_recall: 0.6626, coref_f1: 0.7225, mention_recall: 0.9731, batch_loss: 159.6927, loss: 36.3251 ||: 56%|#####5 | 1586/2851 [03:32<03:16, 6.44it/s]
2022-07-08 11:25:24,056 - INFO - tqdm - coref_precision: 0.7946, coref_recall: 0.6634, coref_f1: 0.7230, mention_recall: 0.9734, batch_loss: 41.0816, loss: 36.0823 ||: 59%|#####8 | 1670/2851 [03:42<02:56, 6.69it/s]
2022-07-08 11:25:34,117 - INFO - tqdm - coref_precision: 0.7963, coref_recall: 0.6650, coref_f1: 0.7246, mention_recall: 0.9740, batch_loss: 46.2213, loss: 36.0344 ||: 61%|######1 | 1747/2851 [03:52<02:12, 8.31it/s]
2022-07-08 11:25:44,353 - INFO - tqdm - coref_precision: 0.7960, coref_recall: 0.6627, coref_f1: 0.7231, mention_recall: 0.9742, batch_loss: 152.9556, loss: 36.4484 ||: 64%|######3 | 1820/2851 [04:03<02:38, 6.52it/s]
2022-07-08 11:25:55,466 - INFO - tqdm - coref_precision: 0.7964, coref_recall: 0.6633, coref_f1: 0.7237, mention_recall: 0.9741, batch_loss: 21.9010, loss: 36.2524 ||: 66%|######6 | 1894/2851 [04:14<11:29, 1.39it/s]
2022-07-08 11:26:06,250 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6627, coref_f1: 0.7231, mention_recall: 0.9741, batch_loss: 987.4395, loss: 36.6352 ||: 69%|######8 | 1967/2851 [04:25<06:28, 2.27it/s]
2022-07-08 11:26:16,279 - INFO - tqdm - coref_precision: 0.7964, coref_recall: 0.6631, coref_f1: 0.7236, mention_recall: 0.9742, batch_loss: 37.1745, loss: 36.4240 ||: 72%|#######1 | 2047/2851 [04:35<01:37, 8.25it/s]
2022-07-08 11:26:26,426 - INFO - tqdm - coref_precision: 0.7966, coref_recall: 0.6642, coref_f1: 0.7243, mention_recall: 0.9742, batch_loss: 18.0038, loss: 36.2528 ||: 75%|#######4 | 2129/2851 [04:45<01:13, 9.88it/s]
2022-07-08 11:26:36,555 - INFO - tqdm - coref_precision: 0.7974, coref_recall: 0.6657, coref_f1: 0.7255, mention_recall: 0.9746, batch_loss: 42.0794, loss: 35.9715 ||: 78%|#######7 | 2211/2851 [04:55<01:08, 9.35it/s]
2022-07-08 11:26:46,611 - INFO - tqdm - coref_precision: 0.7976, coref_recall: 0.6656, coref_f1: 0.7255, mention_recall: 0.9747, batch_loss: 6.4616, loss: 35.9938 ||: 80%|######## | 2283/2851 [05:05<01:01, 9.26it/s]
2022-07-08 11:26:56,743 - INFO - tqdm - coref_precision: 0.7957, coref_recall: 0.6652, coref_f1: 0.7245, mention_recall: 0.9743, batch_loss: 24.2400, loss: 36.3083 ||: 82%|########2 | 2346/2851 [05:15<00:55, 9.04it/s]
2022-07-08 11:27:06,825 - INFO - tqdm - coref_precision: 0.7955, coref_recall: 0.6652, coref_f1: 0.7244, mention_recall: 0.9742, batch_loss: 23.7093, loss: 36.2595 ||: 85%|########5 | 2425/2851 [05:25<00:55, 7.66it/s]
2022-07-08 11:27:16,846 - INFO - tqdm - coref_precision: 0.7953, coref_recall: 0.6653, coref_f1: 0.7244, mention_recall: 0.9742, batch_loss: 2.2523, loss: 36.2668 ||: 88%|########7 | 2502/2851 [05:35<00:35, 9.87it/s]
2022-07-08 11:27:26,868 - INFO - tqdm - coref_precision: 0.7957, coref_recall: 0.6657, coref_f1: 0.7248, mention_recall: 0.9743, batch_loss: 26.5574, loss: 36.2241 ||: 90%|######### | 2578/2851 [05:45<00:32, 8.42it/s]
2022-07-08 11:27:36,958 - INFO - tqdm - coref_precision: 0.7962, coref_recall: 0.6667, coref_f1: 0.7256, mention_recall: 0.9746, batch_loss: 32.1787, loss: 36.0257 ||: 93%|#########3| 2661/2851 [05:55<00:24, 7.68it/s]
2022-07-08 11:27:47,102 - INFO - tqdm - coref_precision: 0.7960, coref_recall: 0.6666, coref_f1: 0.7255, mention_recall: 0.9744, batch_loss: 47.4201, loss: 36.0996 ||: 96%|#########6| 2737/2851 [06:05<00:14, 7.86it/s]
2022-07-08 11:27:57,629 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6656, coref_f1: 0.7248, mention_recall: 0.9742, batch_loss: 245.7632, loss: 36.0548 ||: 99%|#########8| 2818/2851 [06:16<00:07, 4.15it/s]
2022-07-08 11:28:00,299 - INFO - tqdm - coref_precision: 0.7960, coref_recall: 0.6656, coref_f1: 0.7249, mention_recall: 0.9742, batch_loss: 18.8091, loss: 36.0302 ||: 100%|#########9| 2837/2851 [06:19<00:01, 7.19it/s]
2022-07-08 11:28:00,534 - INFO - tqdm - coref_precision: 0.7960, coref_recall: 0.6656, coref_f1: 0.7249, mention_recall: 0.9742, batch_loss: 37.9720, loss: 36.0206 ||: 100%|#########9| 2839/2851 [06:19<00:01, 7.60it/s]
2022-07-08 11:28:00,717 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6657, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 62.7544, loss: 36.0300 ||: 100%|#########9| 2840/2851 [06:19<00:01, 7.05it/s]
2022-07-08 11:28:00,902 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6657, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 17.6564, loss: 36.0230 ||: 100%|#########9| 2842/2851 [06:19<00:01, 8.07it/s]
2022-07-08 11:28:01,002 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6658, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 28.1669, loss: 36.0202 ||: 100%|#########9| 2843/2851 [06:19<00:00, 8.40it/s]
2022-07-08 11:28:01,218 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6659, coref_f1: 0.7250, mention_recall: 0.9743, batch_loss: 35.2668, loss: 36.0100 ||: 100%|#########9| 2845/2851 [06:19<00:00, 8.70it/s]
2022-07-08 11:28:01,683 - INFO - tqdm - coref_precision: 0.7959, coref_recall: 0.6658, coref_f1: 0.7250, mention_recall: 0.9743, batch_loss: 215.0549, loss: 36.0729 ||: 100%|#########9| 2846/2851 [06:20<00:00, 5.32it/s]
2022-07-08 11:28:01,849 - INFO - tqdm - coref_precision: 0.7958, coref_recall: 0.6658, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 54.6312, loss: 36.0794 ||: 100%|#########9| 2847/2851 [06:20<00:00, 5.46it/s]
2022-07-08 11:28:02,045 - INFO - tqdm - coref_precision: 0.7958, coref_recall: 0.6657, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 24.6623, loss: 36.0654 ||: 100%|#########9| 2849/2851 [06:20<00:00, 6.70it/s]
2022-07-08 11:28:02,274 - INFO - tqdm - coref_precision: 0.7958, coref_recall: 0.6658, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 79.5006, loss: 36.0806 ||: 100%|#########9| 2850/2851 [06:21<00:00, 5.99it/s]
2022-07-08 11:28:02,360 - INFO - tqdm - coref_precision: 0.7958, coref_recall: 0.6658, coref_f1: 0.7249, mention_recall: 0.9743, batch_loss: 9.8562, loss: 36.0714 ||: 100%|##########| 2851/2851 [06:21<00:00, 7.48it/s]
2022-07-08 11:28:03,231 - INFO - allennlp.training.trainer - Validating
2022-07-08 11:28:03,232 - INFO - tqdm - 0%| | 0/397 [00:00<?, ?it/s]
2022-07-08 11:28:13,353 - INFO - tqdm - coref_precision: 0.8061, coref_recall: 0.6925, coref_f1: 0.7449, mention_recall: 0.9748, batch_loss: 8.1655, loss: 30.9510 ||: 55%|#####5 | 219/397 [00:10<00:09, 19.46it/s]
2022-07-08 11:28:22,580 - INFO - tqdm - coref_precision: 0.7974, coref_recall: 0.6926, coref_f1: 0.7412, mention_recall: 0.9745, batch_loss: 9.8057, loss: 32.9178 ||: 100%|##########| 397/397 [00:19<00:00, 26.33it/s]
2022-07-08 11:28:22,580 - INFO - tqdm - coref_precision: 0.7974, coref_recall: 0.6926, coref_f1: 0.7412, mention_recall: 0.9745, batch_loss: 9.8057, loss: 32.9178 ||: 100%|##########| 397/397 [00:19<00:00, 20.52it/s]
2022-07-08 11:28:24,303 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'resulting_models/best.th'.
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - Training | Validation
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - coref_f1 | 0.725 | 0.741
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - coref_precision | 0.796 | 0.797
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - coref_recall | 0.666 | 0.693
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 37455.492 | N/A
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - loss | 36.071 | 32.918
2022-07-08 11:28:25,327 - INFO - allennlp.training.callbacks.console_logger - mention_recall | 0.974 | 0.974
2022-07-08 11:28:25,328 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 9626.746 | N/A
2022-07-08 11:28:25,328 - INFO - allennlp.training.trainer - Epoch duration: 0:06:44.092280
2022-07-08 11:28:25,328 - INFO - allennlp.training.trainer - Estimated training time remaining: 16:43:51
2022-07-08 11:28:25,328 - INFO - allennlp.training.trainer - Epoch 2/149
2022-07-08 11:28:25,328 - INFO - allennlp.training.trainer - Worker 0 memory usage: 9.4G
2022-07-08 11:28:25,329 - INFO - allennlp.training.trainer - GPU 0 memory usage: 37G
2022-07-08 11:28:25,330 - INFO - allennlp.training.trainer - Training
2022-07-08 11:28:25,330 - INFO - tqdm - 0%| | 0/2851 [00:00<?, ?it/s]
2022-07-08 11:28:35,367 - INFO - tqdm - coref_precision: 0.8286, coref_recall: 0.7067, coref_f1: 0.7627, mention_recall: 0.9848, batch_loss: 40.0873, loss: 27.1533 ||: 3%|2 | 81/2851 [00:10<05:29, 8.40it/s]
2022-07-08 11:28:45,426 - INFO - tqdm - coref_precision: 0.8316, coref_recall: 0.7161, coref_f1: 0.7695, mention_recall: 0.9808, batch_loss: 82.2419, loss: 27.6101 ||: 5%|5 | 147/2851 [00:20<15:11, 2.97it/s]
2022-07-08 11:28:55,462 - INFO - tqdm - coref_precision: 0.8310, coref_recall: 0.7124, coref_f1: 0.7670, mention_recall: 0.9799, batch_loss: 88.4922, loss: 27.4961 ||: 8%|7 | 222/2851 [00:30<07:59, 5.49it/s]
2022-07-08 11:29:05,636 - INFO - tqdm - coref_precision: 0.8288, coref_recall: 0.7120, coref_f1: 0.7658, mention_recall: 0.9803, batch_loss: 57.6760, loss: 26.5870 ||: 11%|# | 307/2851 [00:40<05:07, 8.27it/s]
2022-07-08 11:29:15,656 - INFO - tqdm - coref_precision: 0.8332, coref_recall: 0.7222, coref_f1: 0.7736, mention_recall: 0.9817, batch_loss: 64.7184, loss: 26.0443 ||: 14%|#3 | 386/2851 [00:50<05:50, 7.03it/s]
2022-07-08 11:29:25,781 - INFO - tqdm - coref_precision: 0.8328, coref_recall: 0.7270, coref_f1: 0.7762, mention_recall: 0.9818, batch_loss: 84.4399, loss: 26.1610 ||: 16%|#6 | 464/2851 [01:00<05:57, 6.68it/s]
2022-07-08 11:29:35,817 - INFO - tqdm - coref_precision: 0.8330, coref_recall: 0.7252, coref_f1: 0.7753, mention_recall: 0.9810, batch_loss: 26.3558, loss: 26.6756 ||: 19%|#8 | 537/2851 [01:10<05:33, 6.93it/s]
2022-07-08 11:29:45,967 - INFO - tqdm - coref_precision: 0.8327, coref_recall: 0.7227, coref_f1: 0.7737, mention_recall: 0.9811, batch_loss: 6.1265, loss: 27.1412 ||: 22%|##1 | 617/2851 [01:20<06:18, 5.90it/s]
2022-07-08 11:29:56,035 - INFO - tqdm - coref_precision: 0.8324, coref_recall: 0.7211, coref_f1: 0.7727, mention_recall: 0.9807, batch_loss: 27.2064, loss: 27.1216 ||: 24%|##4 | 697/2851 [01:30<04:01, 8.91it/s]
2022-07-08 11:30:06,146 - INFO - tqdm - coref_precision: 0.8314, coref_recall: 0.7182, coref_f1: 0.7706, mention_recall: 0.9804, batch_loss: 9.1089, loss: 27.5905 ||: 27%|##7 | 770/2851 [01:40<03:45, 9.24it/s]
2022-07-08 11:30:16,192 - INFO - tqdm - coref_precision: 0.8317, coref_recall: 0.7199, coref_f1: 0.7717, mention_recall: 0.9810, batch_loss: 3.3660, loss: 27.2830 ||: 29%|##9 | 833/2851 [01:50<13:05, 2.57it/s]
2022-07-08 11:30:26,221 - INFO - tqdm - coref_precision: 0.8305, coref_recall: 0.7157, coref_f1: 0.7687, mention_recall: 0.9792, batch_loss: 14.4528, loss: 26.9976 ||: 32%|###2 | 915/2851 [02:00<03:43, 8.67it/s]
2022-07-08 11:30:36,360 - INFO - tqdm - coref_precision: 0.8298, coref_recall: 0.7155, coref_f1: 0.7683, mention_recall: 0.9789, batch_loss: 23.6229, loss: 26.9149 ||: 35%|###4 | 996/2851 [02:11<04:07, 7.49it/s]
2022-07-08 11:30:46,424 - INFO - tqdm - coref_precision: 0.8303, coref_recall: 0.7160, coref_f1: 0.7688, mention_recall: 0.9789, batch_loss: 99.7042, loss: 26.7496 ||: 38%|###7 | 1080/2851 [02:21<04:12, 7.02it/s]
2022-07-08 11:30:56,608 - INFO - tqdm - coref_precision: 0.8286, coref_recall: 0.7140, coref_f1: 0.7669, mention_recall: 0.9788, batch_loss: 55.6358, loss: 27.3162 ||: 41%|#### | 1157/2851 [02:31<03:37, 7.81it/s]
2022-07-08 11:31:06,621 - INFO - tqdm - coref_precision: 0.8291, coref_recall: 0.7141, coref_f1: 0.7672, mention_recall: 0.9791, batch_loss: 38.5025, loss: 27.4666 ||: 43%|####3 | 1236/2851 [02:41<03:10, 8.47it/s]
2022-07-08 11:31:16,688 - INFO - tqdm - coref_precision: 0.8266, coref_recall: 0.7121, coref_f1: 0.7650, mention_recall: 0.9791, batch_loss: 94.2816, loss: 27.8721 ||: 46%|####5 | 1308/2851 [02:51<03:30, 7.33it/s]
2022-07-08 11:31:26,886 - INFO - tqdm - coref_precision: 0.8270, coref_recall: 0.7125, coref_f1: 0.7654, mention_recall: 0.9795, batch_loss: 27.4306, loss: 28.1574 ||: 48%|####8 | 1376/2851 [03:01<02:55, 8.39it/s]
2022-07-08 11:31:37,222 - INFO - tqdm - coref_precision: 0.8278, coref_recall: 0.7149, coref_f1: 0.7671, mention_recall: 0.9799, batch_loss: 85.2460, loss: 27.8449 ||: 51%|#####1 | 1459/2851 [03:11<03:13, 7.19it/s]
2022-07-08 11:31:47,277 - INFO - tqdm - coref_precision: 0.8266, coref_recall: 0.7151, coref_f1: 0.7667, mention_recall: 0.9799, batch_loss: 139.3119, loss: 28.1835 ||: 54%|#####3 | 1526/2851 [03:21<03:35, 6.14it/s]
2022-07-08 11:31:57,385 - INFO - tqdm - coref_precision: 0.8267, coref_recall: 0.7160, coref_f1: 0.7672, mention_recall: 0.9801, batch_loss: 17.0196, loss: 28.1376 ||: 56%|#####6 | 1605/2851 [03:32<02:38, 7.88it/s]
2022-07-08 11:32:07,519 - INFO - tqdm - coref_precision: 0.8261, coref_recall: 0.7156, coref_f1: 0.7667, mention_recall: 0.9801, batch_loss: 60.3528, loss: 28.1189 ||: 59%|#####9 | 1687/2851 [03:42<02:37, 7.41it/s]
2022-07-08 11:32:17,555 - INFO - tqdm - coref_precision: 0.8272, coref_recall: 0.7166, coref_f1: 0.7678, mention_recall: 0.9803, batch_loss: 11.1002, loss: 27.9964 ||: 62%|######1 | 1761/2851 [03:52<02:20, 7.73it/s]
2022-07-08 11:32:27,982 - INFO - tqdm - coref_precision: 0.8262, coref_recall: 0.7143, coref_f1: 0.7660, mention_recall: 0.9799, batch_loss: 278.2693, loss: 28.3165 ||: 64%|######4 | 1836/2851 [04:02<04:52, 3.47it/s]
2022-07-08 11:32:38,080 - INFO - tqdm - coref_precision: 0.8264, coref_recall: 0.7151, coref_f1: 0.7666, mention_recall: 0.9800, batch_loss: 44.8001, loss: 28.2183 ||: 67%|######7 | 1921/2851 [04:12<01:43, 8.96it/s]
2022-07-08 11:32:48,173 - INFO - tqdm - coref_precision: 0.8259, coref_recall: 0.7136, coref_f1: 0.7655, mention_recall: 0.9796, batch_loss: 99.0387, loss: 28.3592 ||: 70%|####### | 1996/2851 [04:22<01:54, 7.49it/s]
2022-07-08 11:32:58,188 - INFO - tqdm - coref_precision: 0.8261, coref_recall: 0.7117, coref_f1: 0.7645, mention_recall: 0.9793, batch_loss: 21.1375, loss: 28.7156 ||: 72%|#######2 | 2060/2851 [04:32<01:45, 7.50it/s]
2022-07-08 11:33:08,254 - INFO - tqdm - coref_precision: 0.8259, coref_recall: 0.7105, coref_f1: 0.7637, mention_recall: 0.9793, batch_loss: 6.9384, loss: 29.0344 ||: 75%|#######4 | 2127/2851 [04:42<01:59, 6.06it/s]
2022-07-08 11:33:18,353 - INFO - tqdm - coref_precision: 0.8259, coref_recall: 0.7093, coref_f1: 0.7630, mention_recall: 0.9790, batch_loss: 16.9151, loss: 29.1998 ||: 77%|#######7 | 2199/2851 [04:53<01:04, 10.03it/s]
2022-07-08 11:33:28,521 - INFO - tqdm - coref_precision: 0.8251, coref_recall: 0.7076, coref_f1: 0.7617, mention_recall: 0.9786, batch_loss: 41.9815, loss: 29.4347 ||: 80%|#######9 | 2270/2851 [05:03<01:54, 5.09it/s]
2022-07-08 11:33:38,657 - INFO - tqdm - coref_precision: 0.8256, coref_recall: 0.7089, coref_f1: 0.7626, mention_recall: 0.9786, batch_loss: 9.3741, loss: 29.3202 ||: 83%|########2 | 2353/2851 [05:13<00:54, 9.21it/s]
2022-07-08 11:33:48,818 - INFO - tqdm - coref_precision: 0.8259, coref_recall: 0.7088, coref_f1: 0.7627, mention_recall: 0.9783, batch_loss: 10.1856, loss: 29.2296 ||: 85%|########5 | 2434/2851 [05:23<00:43, 9.62it/s]
2022-07-08 11:33:58,848 - INFO - tqdm - coref_precision: 0.8250, coref_recall: 0.7072, coref_f1: 0.7615, mention_recall: 0.9780, batch_loss: 284.7780, loss: 29.5793 ||: 88%|########7 | 2506/2851 [05:33<01:09, 4.93it/s]
2022-07-08 11:34:08,960 - INFO - tqdm - coref_precision: 0.8242, coref_recall: 0.7055, coref_f1: 0.7601, mention_recall: 0.9775, batch_loss: 79.6790, loss: 29.9132 ||: 90%|######### | 2575/2851 [05:43<00:45, 6.10it/s]
2022-07-08 11:34:18,969 - INFO - tqdm - coref_precision: 0.8236, coref_recall: 0.7055, coref_f1: 0.7599, mention_recall: 0.9776, batch_loss: 21.5113, loss: 30.1379 ||: 93%|#########2| 2638/2851 [05:53<00:26, 7.90it/s]
2022-07-08 11:34:29,062 - INFO - tqdm - coref_precision: 0.8239, coref_recall: 0.7064, coref_f1: 0.7605, mention_recall: 0.9777, batch_loss: 0.9558, loss: 30.0104 ||: 95%|#########5| 2717/2851 [06:03<00:16, 7.95it/s]
2022-07-08 11:34:39,093 - INFO - tqdm - coref_precision: 0.8240, coref_recall: 0.7065, coref_f1: 0.7607, mention_recall: 0.9776, batch_loss: 55.4033, loss: 29.9205 ||: 98%|#########8| 2797/2851 [06:13<00:08, 6.19it/s]
2022-07-08 11:34:43,779 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7069, coref_f1: 0.7611, mention_recall: 0.9776, batch_loss: 64.2230, loss: 29.7751 ||: 100%|#########9| 2837/2851 [06:18<00:01, 7.55it/s]
2022-07-08 11:34:43,941 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7070, coref_f1: 0.7611, mention_recall: 0.9776, batch_loss: 59.0752, loss: 29.7854 ||: 100%|#########9| 2838/2851 [06:18<00:01, 7.14it/s]
2022-07-08 11:34:44,154 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7069, coref_f1: 0.7611, mention_recall: 0.9776, batch_loss: 13.5897, loss: 29.7850 ||: 100%|#########9| 2840/2851 [06:18<00:01, 7.95it/s]
2022-07-08 11:34:44,382 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7070, coref_f1: 0.7611, mention_recall: 0.9776, batch_loss: 45.4672, loss: 29.7826 ||: 100%|#########9| 2842/2851 [06:19<00:01, 8.24it/s]
2022-07-08 11:34:44,542 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7070, coref_f1: 0.7611, mention_recall: 0.9776, batch_loss: 35.4396, loss: 29.7846 ||: 100%|#########9| 2843/2851 [06:19<00:01, 7.72it/s]
2022-07-08 11:34:44,835 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7070, coref_f1: 0.7611, mention_recall: 0.9776, batch_loss: 79.3849, loss: 29.8020 ||: 100%|#########9| 2844/2851 [06:19<00:01, 5.97it/s]
2022-07-08 11:34:44,944 - INFO - tqdm - coref_precision: 0.8244, coref_recall: 0.7071, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 10.3474, loss: 29.7952 ||: 100%|#########9| 2845/2851 [06:19<00:00, 6.53it/s]
2022-07-08 11:34:45,089 - INFO - tqdm - coref_precision: 0.8245, coref_recall: 0.7072, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 18.0059, loss: 29.7910 ||: 100%|#########9| 2846/2851 [06:19<00:00, 6.63it/s]
2022-07-08 11:34:45,279 - INFO - tqdm - coref_precision: 0.8245, coref_recall: 0.7071, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 23.6136, loss: 29.7837 ||: 100%|#########9| 2848/2851 [06:19<00:00, 7.88it/s]
2022-07-08 11:34:45,409 - INFO - tqdm - coref_precision: 0.8245, coref_recall: 0.7071, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 30.4442, loss: 29.7840 ||: 100%|#########9| 2849/2851 [06:20<00:00, 7.83it/s]
2022-07-08 11:34:45,549 - INFO - tqdm - coref_precision: 0.8245, coref_recall: 0.7071, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 40.2772, loss: 29.7876 ||: 100%|#########9| 2850/2851 [06:20<00:00, 7.64it/s]
2022-07-08 11:34:45,686 - INFO - tqdm - coref_precision: 0.8246, coref_recall: 0.7071, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 11.2484, loss: 29.7811 ||: 100%|##########| 2851/2851 [06:20<00:00, 7.56it/s]
2022-07-08 11:34:45,687 - INFO - tqdm - coref_precision: 0.8246, coref_recall: 0.7071, coref_f1: 0.7612, mention_recall: 0.9776, batch_loss: 11.2484, loss: 29.7811 ||: 100%|##########| 2851/2851 [06:20<00:00, 7.50it/s]
2022-07-08 11:34:46,558 - INFO - allennlp.training.trainer - Validating
2022-07-08 11:34:46,559 - INFO - tqdm - 0%| | 0/397 [00:00<?, ?it/s]
2022-07-08 11:34:56,654 - INFO - tqdm - coref_precision: 0.8008, coref_recall: 0.6906, coref_f1: 0.7416, mention_recall: 0.9649, batch_loss: 25.8156, loss: 31.3685 ||: 39%|###9 | 155/397 [00:10<00:12, 19.96it/s]
2022-07-08 11:35:06,685 - INFO - tqdm - coref_precision: 0.8027, coref_recall: 0.6945, coref_f1: 0.7446, mention_recall: 0.9672, batch_loss: 15.3370, loss: 31.4689 ||: 90%|######### | 358/397 [00:20<00:01, 22.57it/s]
2022-07-08 11:35:08,256 - INFO - tqdm - coref_precision: 0.8062, coref_recall: 0.6991, coref_f1: 0.7488, mention_recall: 0.9678, batch_loss: 11.2156, loss: 30.7955 ||: 100%|##########| 397/397 [00:21<00:00, 27.05it/s]
2022-07-08 11:35:08,257 - INFO - tqdm - coref_precision: 0.8062, coref_recall: 0.6991, coref_f1: 0.7488, mention_recall: 0.9678, batch_loss: 11.2156, loss: 30.7955 ||: 100%|##########| 397/397 [00:21<00:00, 18.30it/s]
2022-07-08 11:35:09,979 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'resulting_models/best.th'.
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - Training | Validation
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - coref_f1 | 0.761 | 0.749
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - coref_precision | 0.825 | 0.806
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - coref_recall | 0.707 | 0.699
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 37455.492 | N/A
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - loss | 29.781 | 30.796
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - mention_recall | 0.978 | 0.968
2022-07-08 11:35:11,347 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 9626.746 | N/A
2022-07-08 11:35:11,347 - INFO - allennlp.training.trainer - Epoch duration: 0:06:46.019423
2022-07-08 11:35:11,347 - INFO - allennlp.training.trainer - Estimated training time remaining: 16:36:18
2022-07-08 11:35:11,347 - INFO - allennlp.training.trainer - Epoch 3/149
2022-07-08 11:35:11,347 - INFO - allennlp.training.trainer - Worker 0 memory usage: 9.4G
2022-07-08 11:35:11,348 - INFO - allennlp.training.trainer - GPU 0 memory usage: 37G
2022-07-08 11:35:11,349 - INFO - allennlp.training.trainer - Training
2022-07-08 11:35:11,349 - INFO - tqdm - 0%| | 0/2851 [00:00<?, ?it/s]
2022-07-08 11:35:21,408 - INFO - tqdm - coref_precision: 0.8603, coref_recall: 0.7209, coref_f1: 0.7842, mention_recall: 0.9766, batch_loss: 21.3463, loss: 26.3518 ||: 3%|2 | 74/2851 [00:10<06:53, 6.71it/s]
2022-07-08 11:35:31,562 - INFO - tqdm - coref_precision: 0.8671, coref_recall: 0.7455, coref_f1: 0.8015, mention_recall: 0.9767, batch_loss: 69.2001, loss: 22.0856 ||: 5%|5 | 155/2851 [00:20<06:03, 7.42it/s]
2022-07-08 11:35:41,684 - INFO - tqdm - coref_precision: 0.8653, coref_recall: 0.7571, coref_f1: 0.8073, mention_recall: 0.9770, batch_loss: 4.9128, loss: 21.0592 ||: 8%|8 | 239/2851 [00:30<05:49, 7.48it/s]
2022-07-08 11:35:51,728 - INFO - tqdm - coref_precision: 0.8617, coref_recall: 0.7587, coref_f1: 0.8067, mention_recall: 0.9786, batch_loss: 24.3618, loss: 21.5298 ||: 11%|# | 312/2851 [00:40<05:18, 7.98it/s]
2022-07-08 11:36:01,797 - INFO - tqdm - coref_precision: 0.8598, coref_recall: 0.7580, coref_f1: 0.8055, mention_recall: 0.9787, batch_loss: 10.8996, loss: 21.5519 ||: 14%|#3 | 393/2851 [00:50<05:40, 7.22it/s]
2022-07-08 11:36:11,880 - INFO - tqdm - coref_precision: 0.8597, coref_recall: 0.7557, coref_f1: 0.8042, mention_recall: 0.9793, batch_loss: 52.1849, loss: 22.2844 ||: 16%|#6 | 469/2851 [01:00<04:35, 8.65it/s]
2022-07-08 11:36:21,886 - INFO - tqdm - coref_precision: 0.8584, coref_recall: 0.7552, coref_f1: 0.8034, mention_recall: 0.9790, batch_loss: 41.1378, loss: 22.3064 ||: 19%|#9 | 550/2851 [01:10<05:57, 6.43it/s]
2022-07-08 11:36:32,006 - INFO - tqdm - coref_precision: 0.8544, coref_recall: 0.7504, coref_f1: 0.7989, mention_recall: 0.9789, batch_loss: 11.7711, loss: 23.5639 ||: 22%|##1 | 624/2851 [01:20<03:55, 9.46it/s]
2022-07-08 11:36:42,496 - INFO - tqdm - coref_precision: 0.8517, coref_recall: 0.7489, coref_f1: 0.7968, mention_recall: 0.9795, batch_loss: 161.5757, loss: 24.1700 ||: 25%|##4 | 700/2851 [01:31<06:56, 5.16it/s]
2022-07-08 11:36:52,707 - INFO - tqdm - coref_precision: 0.8496, coref_recall: 0.7470, coref_f1: 0.7948, mention_recall: 0.9791, batch_loss: 51.7975, loss: 24.4838 ||: 27%|##7 | 774/2851 [01:41<04:16, 8.09it/s]
2022-07-08 11:37:02,769 - INFO - tqdm - coref_precision: 0.8502, coref_recall: 0.7472, coref_f1: 0.7952, mention_recall: 0.9791, batch_loss: 25.8111, loss: 24.2650 ||: 30%|##9 | 855/2851 [01:51<03:40, 9.04it/s]
2022-07-08 11:37:12,918 - INFO - tqdm - coref_precision: 0.8494, coref_recall: 0.7470, coref_f1: 0.7948, mention_recall: 0.9790, batch_loss: 37.7874, loss: 24.3755 ||: 33%|###2 | 931/2851 [02:01<03:47, 8.44it/s]
2022-07-08 11:37:23,063 - INFO - tqdm - coref_precision: 0.8485, coref_recall: 0.7468, coref_f1: 0.7942, mention_recall: 0.9792, batch_loss: 15.2542, loss: 24.2909 ||: 35%|###5 | 1004/2851 [02:11<03:20, 9.22it/s]
2022-07-08 11:37:33,199 - INFO - tqdm - coref_precision: 0.8493, coref_recall: 0.7480, coref_f1: 0.7953, mention_recall: 0.9791, batch_loss: 26.9646, loss: 24.0755 ||: 38%|###8 | 1086/2851 [02:21<03:30, 8.40it/s]
2022-07-08 11:37:43,273 - INFO - tqdm - coref_precision: 0.8492, coref_recall: 0.7476, coref_f1: 0.7950, mention_recall: 0.9791, batch_loss: 27.5563, loss: 24.2065 ||: 41%|#### | 1162/2851 [02:31<03:55, 7.16it/s]
2022-07-08 11:37:53,357 - INFO - tqdm - coref_precision: 0.8488, coref_recall: 0.7476, coref_f1: 0.7948, mention_recall: 0.9793, batch_loss: 1.8524, loss: 23.9742 ||: 44%|####3 | 1248/2851 [02:42<02:53, 9.25it/s]
2022-07-08 11:38:03,524 - INFO - tqdm - coref_precision: 0.8474, coref_recall: 0.7450, coref_f1: 0.7927, mention_recall: 0.9792, batch_loss: 30.8934, loss: 24.2577 ||: 46%|####6 | 1324/2851 [02:52<03:09, 8.05it/s]
2022-07-08 11:38:13,604 - INFO - tqdm - coref_precision: 0.8473, coref_recall: 0.7464, coref_f1: 0.7935, mention_recall: 0.9795, batch_loss: 1.4536, loss: 24.2237 ||: 49%|####9 | 1399/2851 [03:02<03:11, 7.57it/s]
2022-07-08 11:38:23,636 - INFO - tqdm - coref_precision: 0.8454, coref_recall: 0.7437, coref_f1: 0.7912, mention_recall: 0.9787, batch_loss: 15.2695, loss: 24.6405 ||: 52%|#####1 | 1470/2851 [03:12<02:44, 8.41it/s]
2022-07-08 11:38:33,750 - INFO - tqdm - coref_precision: 0.8451, coref_recall: 0.7434, coref_f1: 0.7909, mention_recall: 0.9787, batch_loss: 26.2711, loss: 24.6390 ||: 54%|#####4 | 1548/2851 [03:22<03:01, 7.17it/s]
2022-07-08 11:38:43,815 - INFO - tqdm - coref_precision: 0.8452, coref_recall: 0.7425, coref_f1: 0.7904, mention_recall: 0.9786, batch_loss: 23.5708, loss: 24.9195 ||: 57%|#####6 | 1619/2851 [03:32<03:14, 6.33it/s]
2022-07-08 11:38:53,856 - INFO - tqdm - coref_precision: 0.8444, coref_recall: 0.7413, coref_f1: 0.7894, mention_recall: 0.9786, batch_loss: 24.9847, loss: 25.4811 ||: 59%|#####9 | 1689/2851 [03:42<02:36, 7.40it/s]
2022-07-08 11:39:03,947 - INFO - tqdm - coref_precision: 0.8450, coref_recall: 0.7421, coref_f1: 0.7901, mention_recall: 0.9789, batch_loss: 62.7251, loss: 25.2918 ||: 62%|######2 | 1771/2851 [03:52<02:19, 7.73it/s]
2022-07-08 11:39:13,983 - INFO - tqdm - coref_precision: 0.8443, coref_recall: 0.7408, coref_f1: 0.7890, mention_recall: 0.9789, batch_loss: 20.5979, loss: 25.5196 ||: 65%|######4 | 1843/2851 [04:02<02:10, 7.73it/s]
2022-07-08 11:39:24,103 - INFO - tqdm - coref_precision: 0.8449, coref_recall: 0.7429, coref_f1: 0.7905, mention_recall: 0.9792, batch_loss: 56.5674, loss: 25.2774 ||: 67%|######7 | 1924/2851 [04:12<02:28, 6.24it/s]
2022-07-08 11:39:34,184 - INFO - tqdm - coref_precision: 0.8450, coref_recall: 0.7423, coref_f1: 0.7902, mention_recall: 0.9791, batch_loss: 48.3291, loss: 25.1954 ||: 70%|####### | 2005/2851 [04:22<01:43, 8.16it/s]
2022-07-08 11:39:44,542 - INFO - tqdm - coref_precision: 0.8449, coref_recall: 0.7416, coref_f1: 0.7898, mention_recall: 0.9789, batch_loss: 202.4464, loss: 25.1396 ||: 72%|#######2 | 2066/2851 [04:33<02:28, 5.27it/s]
2022-07-08 11:39:54,732 - INFO - tqdm - coref_precision: 0.8450, coref_recall: 0.7412, coref_f1: 0.7895, mention_recall: 0.9790, batch_loss: 198.4191, loss: 25.0977 ||: 75%|#######5 | 2144/2851 [04:43<02:04, 5.67it/s]
2022-07-08 11:40:04,760 - INFO - tqdm - coref_precision: 0.8455, coref_recall: 0.7427, coref_f1: 0.7906, mention_recall: 0.9789, batch_loss: 11.6296, loss: 24.9186 ||: 78%|#######8 | 2226/2851 [04:53<01:43, 6.05it/s]
2022-07-08 11:40:14,935 - INFO - tqdm - coref_precision: 0.8462, coref_recall: 0.7437, coref_f1: 0.7915, mention_recall: 0.9792, batch_loss: 45.1180, loss: 24.8541 ||: 81%|######## | 2303/2851 [05:03<01:10, 7.76it/s]
2022-07-08 11:40:25,120 - INFO - tqdm - coref_precision: 0.8457, coref_recall: 0.7424, coref_f1: 0.7905, mention_recall: 0.9792, batch_loss: 12.4429, loss: 24.8797 ||: 83%|########3 | 2377/2851 [05:13<00:55, 8.53it/s]
2022-07-08 11:40:36,871 - INFO - tqdm - coref_precision: 0.8439, coref_recall: 0.7416, coref_f1: 0.7893, mention_recall: 0.9791, batch_loss: 833.1605, loss: 25.2384 ||: 86%|########5 | 2451/2851 [05:25<03:59, 1.67it/s]
2022-07-08 11:40:46,903 - INFO - tqdm - coref_precision: 0.8446, coref_recall: 0.7409, coref_f1: 0.7892, mention_recall: 0.9789, batch_loss: 27.2669, loss: 25.2836 ||: 88%|########8 | 2523/2851 [05:35<00:53, 6.17it/s]
2022-07-08 11:40:56,920 - INFO - tqdm - coref_precision: 0.8439, coref_recall: 0.7399, coref_f1: 0.7883, mention_recall: 0.9788, batch_loss: 8.8587, loss: 25.4650 ||: 91%|######### | 2592/2851 [05:45<00:35, 7.39it/s]
2022-07-08 11:41:07,006 - INFO - tqdm - coref_precision: 0.8439, coref_recall: 0.7394, coref_f1: 0.7880, mention_recall: 0.9785, batch_loss: 28.0594, loss: 25.5840 ||: 93%|#########3| 2659/2851 [05:55<00:22, 8.57it/s]
2022-07-08 11:41:17,048 - INFO - tqdm - coref_precision: 0.8437, coref_recall: 0.7399, coref_f1: 0.7882, mention_recall: 0.9784, batch_loss: 3.8206, loss: 25.5808 ||: 96%|#########5| 2735/2851 [06:05<00:14, 7.98it/s]
2022-07-08 11:41:27,188 - INFO - tqdm - coref_precision: 0.8435, coref_recall: 0.7399, coref_f1: 0.7881, mention_recall: 0.9781, batch_loss: 6.0471, loss: 25.6757 ||: 98%|#########8| 2807/2851 [06:15<00:06, 6.57it/s]
2022-07-08 11:41:30,750 - INFO - tqdm - coref_precision: 0.8437, coref_recall: 0.7400, coref_f1: 0.7883, mention_recall: 0.9782, batch_loss: 3.7795, loss: 25.6146 ||: 100%|#########9| 2837/2851 [06:19<00:01, 8.86it/s]
2022-07-08 11:41:30,929 - INFO - tqdm - coref_precision: 0.8438, coref_recall: 0.7400, coref_f1: 0.7884, mention_recall: 0.9782, batch_loss: 5.1570, loss: 25.6048 ||: 100%|#########9| 2839/2851 [06:19<00:01, 9.68it/s]
2022-07-08 11:41:31,168 - INFO - tqdm - coref_precision: 0.8438, coref_recall: 0.7401, coref_f1: 0.7884, mention_recall: 0.9782, batch_loss: 5.3816, loss: 25.5893 ||: 100%|#########9| 2841/2851 [06:19<00:01, 9.15it/s]
2022-07-08 11:41:31,364 - INFO - tqdm - coref_precision: 0.8439, coref_recall: 0.7402, coref_f1: 0.7885, mention_recall: 0.9782, batch_loss: 1.7994, loss: 25.5720 ||: 100%|#########9| 2843/2851 [06:20<00:00, 9.48it/s]
2022-07-08 11:41:31,680 - INFO - tqdm - coref_precision: 0.8437, coref_recall: 0.7398, coref_f1: 0.7882, mention_recall: 0.9782, batch_loss: 178.4339, loss: 25.6258 ||: 100%|#########9| 2844/2851 [06:20<00:01, 6.81it/s]
2022-07-08 11:41:31,793 - INFO - tqdm - coref_precision: 0.8437, coref_recall: 0.7398, coref_f1: 0.7882, mention_recall: 0.9782, batch_loss: 13.3408, loss: 25.6214 ||: 100%|#########9| 2845/2851 [06:20<00:00, 7.17it/s]
2022-07-08 11:41:31,907 - INFO - tqdm - coref_precision: 0.8437, coref_recall: 0.7398, coref_f1: 0.7882, mention_recall: 0.9782, batch_loss: 13.2531, loss: 25.6171 ||: 100%|#########9| 2846/2851 [06:20<00:00, 7.49it/s]
2022-07-08 11:41:32,120 - INFO - tqdm - coref_precision: 0.8438, coref_recall: 0.7399, coref_f1: 0.7883, mention_recall: 0.9782, batch_loss: 21.3783, loss: 25.6071 ||: 100%|#########9| 2848/2851 [06:20<00:00, 8.16it/s]
2022-07-08 11:41:32,370 - INFO - tqdm - coref_precision: 0.8437, coref_recall: 0.7399, coref_f1: 0.7883, mention_recall: 0.9782, batch_loss: 98.4275, loss: 25.6326 ||: 100%|#########9| 2849/2851 [06:21<00:00, 6.62it/s]
2022-07-08 11:41:32,538 - INFO - tqdm - coref_precision: 0.8438, coref_recall: 0.7399, coref_f1: 0.7883, mention_recall: 0.9782, batch_loss: 47.5648, loss: 25.6403 ||: 100%|#########9| 2850/2851 [06:21<00:00, 6.44it/s]
2022-07-08 11:41:32,713 - INFO - tqdm - coref_precision: 0.8438, coref_recall: 0.7399, coref_f1: 0.7883, mention_recall: 0.9782, batch_loss: 57.3925, loss: 25.6514 ||: 100%|##########| 2851/2851 [06:21<00:00, 6.24it/s]
2022-07-08 11:41:32,714 - INFO - tqdm - coref_precision: 0.8438, coref_recall: 0.7399, coref_f1: 0.7883, mention_recall: 0.9782, batch_loss: 57.3925, loss: 25.6514 ||: 100%|##########| 2851/2851 [06:21<00:00, 7.48it/s]
2022-07-08 11:41:33,673 - INFO - allennlp.training.trainer - Validating
2022-07-08 11:41:33,674 - INFO - tqdm - 0%| | 0/397 [00:00<?, ?it/s]
2022-07-08 11:41:43,716 - INFO - tqdm - coref_precision: 0.7955, coref_recall: 0.7382, coref_f1: 0.7657, mention_recall: 0.9756, batch_loss: 30.7657, loss: 34.9632 ||: 52%|#####2 | 208/397 [00:10<00:06, 27.17it/s]
2022-07-08 11:41:53,228 - INFO - tqdm - coref_precision: 0.7875, coref_recall: 0.7147, coref_f1: 0.7493, mention_recall: 0.9722, batch_loss: 32.0841, loss: 36.5335 ||: 100%|##########| 397/397 [00:19<00:00, 20.30it/s]
2022-07-08 11:41:54,948 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'resulting_models/best.th'.
2022-07-08 11:41:56,336 - INFO - allennlp.training.callbacks.console_logger - Training | Validation
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - coref_f1 | 0.788 | 0.749
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - coref_precision | 0.844 | 0.787
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - coref_recall | 0.740 | 0.715
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 37455.680 | N/A
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - loss | 25.651 | 36.534
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - mention_recall | 0.978 | 0.972
2022-07-08 11:41:56,337 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 9626.746 | N/A
2022-07-08 11:41:56,337 - INFO - allennlp.training.trainer - Epoch duration: 0:06:44.989519
2022-07-08 11:41:56,337 - INFO - allennlp.training.trainer - Estimated training time remaining: 16:28:30
2022-07-08 11:41:56,337 - INFO - allennlp.training.trainer - Epoch 4/149
2022-07-08 11:41:56,337 - INFO - allennlp.training.trainer - Worker 0 memory usage: 9.4G
2022-07-08 11:41:56,337 - INFO - allennlp.training.trainer - GPU 0 memory usage: 37G
2022-07-08 11:41:56,338 - INFO - allennlp.training.trainer - Training
2022-07-08 11:41:56,339 - INFO - tqdm - 0%| | 0/2851 [00:00<?, ?it/s]
2022-07-08 11:42:06,457 - INFO - tqdm - coref_precision: 0.8817, coref_recall: 0.7745, coref_f1: 0.8244, mention_recall: 0.9828, batch_loss: 1.9276, loss: 19.1376 ||: 3%|2 | 78/2851 [00:10<06:42, 6.89it/s]
2022-07-08 11:42:16,540 - INFO - tqdm - coref_precision: 0.8703, coref_recall: 0.7774, coref_f1: 0.8211, mention_recall: 0.9799, batch_loss: 53.7265, loss: 20.5729 ||: 5%|5 | 154/2851 [00:20<05:56, 7.56it/s]
2022-07-08 11:42:26,566 - INFO - tqdm - coref_precision: 0.8475, coref_recall: 0.7510, coref_f1: 0.7963, mention_recall: 0.9790, batch_loss: 63.7421, loss: 25.8757 ||: 8%|7 | 214/2851 [00:30<06:14, 7.03it/s]
2022-07-08 11:42:36,659 - INFO - tqdm - coref_precision: 0.8434, coref_recall: 0.7349, coref_f1: 0.7853, mention_recall: 0.9761, batch_loss: 10.4182, loss: 27.8449 ||: 10%|9 | 282/2851 [00:40<05:26, 7.87it/s]
2022-07-08 11:42:46,733 - INFO - tqdm - coref_precision: 0.8411, coref_recall: 0.7307, coref_f1: 0.7819, mention_recall: 0.9761, batch_loss: 1.3874, loss: 28.6812 ||: 12%|#2 | 349/2851 [00:50<07:36, 5.48it/s]
2022-07-08 11:42:56,785 - INFO - tqdm - coref_precision: 0.8410, coref_recall: 0.7270, coref_f1: 0.7797, mention_recall: 0.9759, batch_loss: 1.8620, loss: 29.2747 ||: 15%|#4 | 423/2851 [01:00<03:55, 10.29it/s]
2022-07-08 11:43:06,853 - INFO - tqdm - coref_precision: 0.8444, coref_recall: 0.7286, coref_f1: 0.7821, mention_recall: 0.9768, batch_loss: 16.1034, loss: 28.2336 ||: 18%|#7 | 499/2851 [01:10<05:10, 7.58it/s]
2022-07-08 11:43:16,856 - INFO - tqdm - coref_precision: 0.8487, coref_recall: 0.7386, coref_f1: 0.7897, mention_recall: 0.9779, batch_loss: 0.4163, loss: 26.4759 ||: 20%|## | 582/2851 [01:20<04:16, 8.85it/s]
2022-07-08 11:43:26,968 - INFO - tqdm - coref_precision: 0.8510, coref_recall: 0.7436, coref_f1: 0.7936, mention_recall: 0.9782, batch_loss: 53.3564, loss: 25.9728 ||: 23%|##3 | 660/2851 [01:30<05:05, 7.17it/s]
2022-07-08 11:43:37,020 - INFO - tqdm - coref_precision: 0.8523, coref_recall: 0.7448, coref_f1: 0.7948, mention_recall: 0.9783, batch_loss: 4.6817, loss: 25.7676 ||: 26%|##5 | 730/2851 [01:40<05:39, 6.24it/s]
2022-07-08 11:43:47,195 - INFO - tqdm - coref_precision: 0.8546, coref_recall: 0.7493, coref_f1: 0.7983, mention_recall: 0.9786, batch_loss: 17.7737, loss: 24.6217 ||: 28%|##8 | 803/2851 [01:50<03:16, 10.42it/s]
2022-07-08 11:43:57,409 - INFO - tqdm - coref_precision: 0.8555, coref_recall: 0.7506, coref_f1: 0.7995, mention_recall: 0.9782, batch_loss: 62.1159, loss: 24.3758 ||: 31%|### | 881/2851 [02:01<05:22, 6.11it/s]
2022-07-08 11:44:07,492 - INFO - tqdm - coref_precision: 0.8557, coref_recall: 0.7532, coref_f1: 0.8010, mention_recall: 0.9788, batch_loss: 6.7704, loss: 24.0535 ||: 34%|###3 | 963/2851 [02:11<03:42, 8.49it/s]
2022-07-08 11:44:17,753 - INFO - tqdm - coref_precision: 0.8553, coref_recall: 0.7545, coref_f1: 0.8016, mention_recall: 0.9785, batch_loss: 205.5602, loss: 23.8193 ||: 37%|###6 | 1042/2851 [02:21<05:14, 5.75it/s]
2022-07-08 11:44:27,813 - INFO - tqdm - coref_precision: 0.8577, coref_recall: 0.7566, coref_f1: 0.8038, mention_recall: 0.9788, batch_loss: 56.3863, loss: 23.3574 ||: 39%|###9 | 1122/2851 [02:31<03:56, 7.31it/s]
2022-07-08 11:44:37,917 - INFO - tqdm - coref_precision: 0.8573, coref_recall: 0.7549, coref_f1: 0.8027, mention_recall: 0.9790, batch_loss: 59.6040, loss: 23.6492 ||: 42%|####1 | 1196/2851 [02:41<04:24, 6.25it/s]
2022-07-08 11:44:47,935 - INFO - tqdm - coref_precision: 0.8582, coref_recall: 0.7584, coref_f1: 0.8050, mention_recall: 0.9796, batch_loss: 7.2600, loss: 23.1746 ||: 45%|####5 | 1283/2851 [02:51<03:00, 8.70it/s]
2022-07-08 11:44:58,098 - INFO - tqdm - coref_precision: 0.8557, coref_recall: 0.7581, coref_f1: 0.8038, mention_recall: 0.9796, batch_loss: 82.7667, loss: 23.4580 ||: 47%|####7 | 1354/2851 [03:01<02:52, 8.66it/s]
2022-07-08 11:45:08,098 - INFO - tqdm - coref_precision: 0.8565, coref_recall: 0.7587, coref_f1: 0.8045, mention_recall: 0.9801, batch_loss: 76.6576, loss: 23.2912 ||: 50%|##### | 1430/2851 [03:11<03:40, 6.44it/s]
2022-07-08 11:45:18,159 - INFO - tqdm - coref_precision: 0.8579, coref_recall: 0.7608, coref_f1: 0.8063, mention_recall: 0.9804, batch_loss: 11.9600, loss: 23.0931 ||: 53%|#####2 | 1506/2851 [03:21<03:21, 6.67it/s]
2022-07-08 11:45:28,190 - INFO - tqdm - coref_precision: 0.8576, coref_recall: 0.7609, coref_f1: 0.8062, mention_recall: 0.9805, batch_loss: 29.2761, loss: 23.1612 ||: 55%|#####5 | 1579/2851 [03:31<03:19, 6.39it/s]
2022-07-08 11:45:38,324 - INFO - tqdm - coref_precision: 0.8584, coref_recall: 0.7615, coref_f1: 0.8068, mention_recall: 0.9804, batch_loss: 108.3255, loss: 22.9257 ||: 58%|#####8 | 1660/2851 [03:41<03:43, 5.34it/s]
2022-07-08 11:45:48,354 - INFO - tqdm - coref_precision: 0.8591, coref_recall: 0.7622, coref_f1: 0.8076, mention_recall: 0.9806, batch_loss: 13.4211, loss: 22.7487 ||: 61%|###### | 1739/2851 [03:52<02:37, 7.05it/s]
2022-07-08 11:45:58,564 - INFO - tqdm - coref_precision: 0.8597, coref_recall: 0.7632, coref_f1: 0.8084, mention_recall: 0.9806, batch_loss: 72.6438, loss: 22.7396 ||: 64%|######3 | 1814/2851 [04:02<02:40, 6.45it/s]
2022-07-08 11:46:08,830 - INFO - tqdm - coref_precision: 0.8590, coref_recall: 0.7610, coref_f1: 0.8069, mention_recall: 0.9805, batch_loss: 81.7495, loss: 23.1809 ||: 66%|######6 | 1883/2851 [04:12<02:12, 7.30it/s]
2022-07-08 11:46:18,920 - INFO - tqdm - coref_precision: 0.8592, coref_recall: 0.7623, coref_f1: 0.8077, mention_recall: 0.9808, batch_loss: 35.6256, loss: 23.0136 ||: 69%|######8 | 1959/2851 [04:22<01:41, 8.80it/s]
2022-07-08 11:46:28,943 - INFO - tqdm - coref_precision: 0.8594, coref_recall: 0.7626, coref_f1: 0.8079, mention_recall: 0.9807, batch_loss: 29.4589, loss: 22.9234 ||: 71%|#######1 | 2035/2851 [04:32<01:46, 7.63it/s]
2022-07-08 11:46:39,443 - INFO - tqdm - coref_precision: 0.8600, coref_recall: 0.7627, coref_f1: 0.8082, mention_recall: 0.9807, batch_loss: 420.2042, loss: 22.9233 ||: 74%|#######4 | 2111/2851 [04:43<03:54, 3.15it/s]
2022-07-08 11:46:49,575 - INFO - tqdm - coref_precision: 0.8602, coref_recall: 0.7630, coref_f1: 0.8085, mention_recall: 0.9807, batch_loss: 0.0039, loss: 22.7524 ||: 77%|#######6 | 2194/2851 [04:53<01:07, 9.67it/s]
2022-07-08 11:46:59,696 - INFO - tqdm - coref_precision: 0.8597, coref_recall: 0.7627, coref_f1: 0.8081, mention_recall: 0.9808, batch_loss: 107.1108, loss: 22.8476 ||: 80%|#######9 | 2270/2851 [05:03<01:49, 5.33it/s]
2022-07-08 11:47:09,891 - INFO - tqdm - coref_precision: 0.8603, coref_recall: 0.7631, coref_f1: 0.8085, mention_recall: 0.9808, batch_loss: 48.4600, loss: 22.8005 ||: 82%|########2 | 2348/2851 [05:13<01:18, 6.39it/s]
2022-07-08 11:47:19,975 - INFO - tqdm - coref_precision: 0.8607, coref_recall: 0.7635, coref_f1: 0.8090, mention_recall: 0.9809, batch_loss: 5.3224, loss: 22.7023 ||: 85%|########4 | 2421/2851 [05:23<00:54, 7.87it/s]
2022-07-08 11:47:30,156 - INFO - tqdm - coref_precision: 0.8605, coref_recall: 0.7638, coref_f1: 0.8091, mention_recall: 0.9809, batch_loss: 46.0964, loss: 22.6988 ||: 87%|########7 | 2493/2851 [05:33<00:46, 7.76it/s]
2022-07-08 11:47:40,256 - INFO - tqdm - coref_precision: 0.8606, coref_recall: 0.7643, coref_f1: 0.8094, mention_recall: 0.9809, batch_loss: 0.9504, loss: 22.6704 ||: 90%|######### | 2570/2851 [05:43<00:44, 6.26it/s]
2022-07-08 11:47:51,164 - INFO - tqdm - coref_precision: 0.8605, coref_recall: 0.7640, coref_f1: 0.8092, mention_recall: 0.9808, batch_loss: 190.0669, loss: 22.6735 ||: 93%|#########2| 2643/2851 [05:54<00:52, 3.95it/s]
2022-07-08 11:48:01,204 - INFO - tqdm - coref_precision: 0.8606, coref_recall: 0.7640, coref_f1: 0.8092, mention_recall: 0.9808, batch_loss: 12.9834, loss: 22.6741 ||: 95%|#########5| 2713/2851 [06:04<00:19, 7.26it/s]
2022-07-08 11:48:11,222 - INFO - tqdm - coref_precision: 0.8606, coref_recall: 0.7642, coref_f1: 0.8093, mention_recall: 0.9808, batch_loss: 95.9621, loss: 22.6921 ||: 98%|#########7| 2784/2851 [06:14<00:09, 7.36it/s]
2022-07-08 11:48:17,919 - INFO - tqdm - coref_precision: 0.8609, coref_recall: 0.7649, coref_f1: 0.8098, mention_recall: 0.9810, batch_loss: 14.6832, loss: 22.6142 ||: 100%|#########9| 2837/2851 [06:21<00:01, 8.44it/s]
2022-07-08 11:48:18,099 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7649, coref_f1: 0.8098, mention_recall: 0.9810, batch_loss: 101.1863, loss: 22.6419 ||: 100%|#########9| 2838/2851 [06:21<00:01, 7.56it/s]
2022-07-08 11:48:18,249 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8098, mention_recall: 0.9810, batch_loss: 0.0028, loss: 22.6262 ||: 100%|#########9| 2840/2851 [06:21<00:01, 9.11it/s]
2022-07-08 11:48:18,363 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8098, mention_recall: 0.9810, batch_loss: 20.9301, loss: 22.6256 ||: 100%|#########9| 2841/2851 [06:22<00:01, 9.03it/s]
2022-07-08 11:48:18,562 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 46.1141, loss: 22.6338 ||: 100%|#########9| 2842/2851 [06:22<00:01, 7.59it/s]
2022-07-08 11:48:18,666 - INFO - tqdm - coref_precision: 0.8607, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 18.7981, loss: 22.6325 ||: 100%|#########9| 2843/2851 [06:22<00:00, 8.03it/s]
2022-07-08 11:48:18,767 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 8.1688, loss: 22.6274 ||: 100%|#########9| 2844/2851 [06:22<00:00, 8.44it/s]
2022-07-08 11:48:18,928 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8098, mention_recall: 0.9810, batch_loss: 13.4392, loss: 22.6242 ||: 100%|#########9| 2845/2851 [06:22<00:00, 7.69it/s]
2022-07-08 11:48:19,056 - INFO - tqdm - coref_precision: 0.8607, coref_recall: 0.7647, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 55.4046, loss: 22.6357 ||: 100%|#########9| 2846/2851 [06:22<00:00, 7.72it/s]
2022-07-08 11:48:19,200 - INFO - tqdm - coref_precision: 0.8607, coref_recall: 0.7647, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 0.0991, loss: 22.6198 ||: 100%|#########9| 2848/2851 [06:22<00:00, 9.63it/s]
2022-07-08 11:48:19,319 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 3.4207, loss: 22.6131 ||: 100%|#########9| 2849/2851 [06:22<00:00, 9.30it/s]
2022-07-08 11:48:19,452 - INFO - tqdm - coref_precision: 0.8608, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 18.2044, loss: 22.6115 ||: 100%|#########9| 2850/2851 [06:23<00:00, 8.76it/s]
2022-07-08 11:48:19,612 - INFO - tqdm - coref_precision: 0.8607, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 23.5295, loss: 22.6119 ||: 100%|##########| 2851/2851 [06:23<00:00, 7.92it/s]
2022-07-08 11:48:19,613 - INFO - tqdm - coref_precision: 0.8607, coref_recall: 0.7648, coref_f1: 0.8097, mention_recall: 0.9810, batch_loss: 23.5295, loss: 22.6119 ||: 100%|##########| 2851/2851 [06:23<00:00, 7.44it/s]
2022-07-08 11:48:20,484 - INFO - allennlp.training.trainer - Validating
2022-07-08 11:48:20,485 - INFO - tqdm - 0%| | 0/397 [00:00<?, ?it/s]
2022-07-08 11:48:32,342 - INFO - tqdm - coref_precision: 0.8004, coref_recall: 0.7117, coref_f1: 0.7534, mention_recall: 0.9692, batch_loss: 372.2585, loss: 34.4092 ||: 48%|####7 | 190/397 [00:11<01:10, 2.95it/s]
2022-07-08 11:48:42,269 - INFO - tqdm - coref_precision: 0.7971, coref_recall: 0.7097, coref_f1: 0.7508, mention_recall: 0.9714, batch_loss: 26.1008, loss: 34.2500 ||: 100%|##########| 397/397 [00:21<00:00, 23.40it/s]
2022-07-08 11:48:42,270 - INFO - tqdm - coref_precision: 0.7971, coref_recall: 0.7097, coref_f1: 0.7508, mention_recall: 0.9714, batch_loss: 26.1008, loss: 34.2500 ||: 100%|##########| 397/397 [00:21<00:00, 18.22it/s]
2022-07-08 11:48:43,993 - INFO - allennlp.training.checkpointer - Best validation performance so far. Copying weights to 'resulting_models/best.th'.
2022-07-08 11:48:45,198 - INFO - allennlp.training.callbacks.console_logger - Training | Validation
2022-07-08 11:48:45,198 - INFO - allennlp.training.callbacks.console_logger - coref_f1 | 0.810 | 0.751
2022-07-08 11:48:45,199 - INFO - allennlp.training.callbacks.console_logger - coref_precision | 0.861 | 0.797
2022-07-08 11:48:45,199 - INFO - allennlp.training.callbacks.console_logger - coref_recall | 0.765 | 0.710
2022-07-08 11:48:45,199 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 37455.680 | N/A
2022-07-08 11:48:45,199 - INFO - allennlp.training.callbacks.console_logger - loss | 22.612 | 34.250
2022-07-08 11:48:45,199 - INFO - allennlp.training.callbacks.console_logger - mention_recall | 0.981 | 0.971
2022-07-08 11:48:45,199 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 9626.746 | N/A
2022-07-08 11:48:45,199 - INFO - allennlp.training.trainer - Epoch duration: 0:06:48.861685
2022-07-08 11:48:45,199 - INFO - allennlp.training.trainer - Estimated training time remaining: 16:23:00
2022-07-08 11:48:45,199 - INFO - allennlp.training.trainer - Epoch 5/149
2022-07-08 11:48:45,199 - INFO - allennlp.training.trainer - Worker 0 memory usage: 9.4G
2022-07-08 11:48:45,199 - INFO - allennlp.training.trainer - GPU 0 memory usage: 37G
2022-07-08 11:48:45,200 - INFO - allennlp.training.trainer - Training
2022-07-08 11:48:45,200 - INFO - tqdm - 0%| | 0/2851 [00:00<?, ?it/s]
2022-07-08 11:48:55,215 - INFO - tqdm - coref_precision: 0.8865, coref_recall: 0.7908, coref_f1: 0.8355, mention_recall: 0.9864, batch_loss: 14.9319, loss: 17.3562 ||: 3%|2 | 75/2851 [00:10<06:56, 6.66it/s]
2022-07-08 11:49:05,253 - INFO - tqdm - coref_precision: 0.8882, coref_recall: 0.7982, coref_f1: 0.8403, mention_recall: 0.9869, batch_loss: 10.0869, loss: 18.2874 ||: 5%|5 | 146/2851 [00:20<09:00, 5.00it/s]
2022-07-08 11:49:15,253 - INFO - tqdm - coref_precision: 0.8827, coref_recall: 0.7965, coref_f1: 0.8369, mention_recall: 0.9864, batch_loss: 13.2016, loss: 18.8314 ||: 8%|7 | 219/2851 [00:30<06:09, 7.12it/s]
2022-07-08 11:49:25,301 - INFO - tqdm - coref_precision: 0.8815, coref_recall: 0.8021, coref_f1: 0.8395, mention_recall: 0.9871, batch_loss: 3.7503, loss: 18.5739 ||: 10%|# | 294/2851 [00:40<06:12, 6.86it/s]
2022-07-08 11:49:35,346 - INFO - tqdm - coref_precision: 0.8796, coref_recall: 0.7957, coref_f1: 0.8352, mention_recall: 0.9852, batch_loss: 5.6011, loss: 18.9233 ||: 13%|#2 | 367/2851 [00:50<05:12, 7.96it/s]
2022-07-08 11:49:45,463 - INFO - tqdm - coref_precision: 0.8799, coref_recall: 0.7928, coref_f1: 0.8337, mention_recall: 0.9847, batch_loss: 51.5642, loss: 19.1033 ||: 16%|#5 | 447/2851 [01:00<07:08, 5.61it/s]
2022-07-08 11:49:55,537 - INFO - tqdm - coref_precision: 0.8816, coref_recall: 0.7923, coref_f1: 0.8343, mention_recall: 0.9841, batch_loss: 0.5120, loss: 18.6530 ||: 18%|#8 | 527/2851 [01:10<05:28, 7.08it/s]
2022-07-08 11:50:05,713 - INFO - tqdm - coref_precision: 0.8832, coref_recall: 0.7944, coref_f1: 0.8361, mention_recall: 0.9837, batch_loss: 8.6386, loss: 18.3202 ||: 21%|##1 | 600/2851 [01:20<04:39, 8.05it/s]
2022-07-08 11:50:15,726 - INFO - tqdm - coref_precision: 0.8839, coref_recall: 0.7971, coref_f1: 0.8380, mention_recall: 0.9834, batch_loss: 0.7458, loss: 17.9161 ||: 24%|##3 | 683/2851 [01:30<03:43, 9.71it/s]
2022-07-08 11:50:25,751 - INFO - tqdm - coref_precision: 0.8848, coref_recall: 0.7998, coref_f1: 0.8398, mention_recall: 0.9836, batch_loss: 5.9150, loss: 17.7561 ||: 27%|##6 | 760/2851 [01:40<04:26, 7.85it/s]