-
Notifications
You must be signed in to change notification settings - Fork 4
/
utils.py
137 lines (100 loc) · 4.34 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
class ModelUtils:
@staticmethod
def build_text_image(word2vec, words, embedding_size=300):
word_vectors = []
for w in words:
w_vec = word2vec.get_word_vector(w)
if w_vec is None:
word_vectors.append(np.array([0.] * embedding_size))
else:
word_vectors.append(w_vec)
return [word_vectors, ]
@staticmethod
def mrr(q_list, y_true, y_pred):
assert sum(len(i) for i in q_list) == len(y_pred)
index_begin = 0
mrr = 0.0
for q in q_list:
index_end = index_begin + len(q)
predictions_slice = y_pred[index_begin:index_end].flatten().tolist()
# Pair label-prediction and sort by prediction descending order
xx = zip(q, predictions_slice)
xx = sorted(xx, key=lambda tup: tup[1], reverse=True)
for idx, x in enumerate(xx):
if x[0] == 1:
mrr += float(1) / (idx + 1)
break
index_begin = index_end
mrr = float(mrr) / len(q_list) * 100
return mrr
@staticmethod
def map(q_list, y_true, y_pred):
avg_prec = 0
index_begin = 0
for q in q_list:
index_end = index_begin + len(q)
predictions_slice = y_pred[index_begin:index_end].flatten().tolist()
correct_answers = len([1 for x in q if x == 1])
xx = zip(q, predictions_slice)
xx = sorted(xx, key=lambda tup: tup[1], reverse=True)
correct = 0
wrong = 0
av_prec_i = 0
for idx, x in enumerate(xx):
if x[0] == 1:
correct += 1
else:
wrong += 1
if x[0] == 1:
av_prec_i += float(correct) / (correct + wrong)
if correct == correct_answers:
break
if correct_answers > 0:
avg_prec += av_prec_i / correct_answers
index_begin = index_end
omap = float(avg_prec) / len(q_list) * 100
return omap
@staticmethod
def precision_recall_f1(q_list, y_true, y_pred):
results = []
for thr in range(1, 15):
thr /= 100.0
p, r, f1 = ModelUtils._precision_recall_f1_threshold(q_list, y_true, y_pred, thr)
results.append(('thre: %.2f, prec: %.2f, rec: %.2f, f1: %.2f' % (thr, p, r, f1), thr, p, r, f1))
return results
@staticmethod
def _precision_recall_f1_threshold(q_list, y_true, y_pred, thre):
index_begin = 0
all_questions_with_answers = 0.
predicted_questions = 0.
correctly_predicted_questions = 0.
for q in q_list:
# Get the slice from predictions
index_end = index_begin + len(q)
predictions_slice = y_pred[index_begin:index_end].flatten()
# Find the maximum value prediction
max_val_index = np.argmax(predictions_slice)
if predictions_slice[max_val_index] > thre:
predicted_answer = max_val_index
else:
predicted_answer = -1
# Check if this question has an answer.
gold_answer_ids = [x for x, y in enumerate(q) if y == 1]
nb_gold_correct_answers = len(gold_answer_ids)
# If there is an answer, increment number of all questions
if nb_gold_correct_answers > 0:
all_questions_with_answers += 1
# If the question predicted with the answer, increment predicted_questions
if predicted_answer > -1:
predicted_questions += 1
# If the question predicted correctly, increment correctly_predicted_questions
if predicted_answer > -1 and predicted_answer in gold_answer_ids:
correctly_predicted_questions += 1
index_begin = index_end
if correctly_predicted_questions == 0 or predicted_questions == 0:
return 0., 0., 0.
precision = float(correctly_predicted_questions) / predicted_questions * 100
recall = float(correctly_predicted_questions) / all_questions_with_answers * 100
f1 = (2 * precision * recall) / (precision + recall) if (precision > 0.0 and recall > 0.0) else 0.0
return precision, recall, f1