-
Notifications
You must be signed in to change notification settings - Fork 0
/
metric.py
119 lines (95 loc) · 3.86 KB
/
metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from pathlib import Path
import json
import numpy as np
class Metric():
def __init__(self, domain, data_dir ='data/model_data'):
self.domain = domain
self.data_dir = data_dir
def apk(self, actual, predicted, k=50):
"""
Computes the average precision at k.
This function computes the average prescision at k between two lists of
items.
Parameters
----------
actual : list
A list of elements that are to be predicted (order doesn't matter)
predicted : list
A list of predicted elements (order does matter)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The average precision at k over the input lists
"""
if len(predicted)>k:
predicted = predicted[:k]
score = 0.0
num_hits = 0.0
for i,p in enumerate(predicted):
if p in actual and p not in predicted[:i]:
num_hits += 1.0
score += num_hits / (i+1.0)
if not actual:
return 0.0
return score / min(len(actual), k)
def mapk(self, actual, predicted, k=50):
"""
Computes the mean average precision at k.
This function computes the mean average prescision at k between two lists
of lists of items.
Parameters
----------
actual : list
A list of lists of elements that are to be predicted
(order doesn't matter in the lists)
predicted : list
A list of lists of predicted elements
(order matters in the lists)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The mean average precision at k over the input lists
"""
return np.mean([self.apk(a,p,k) for a,p in zip(actual, predicted)])
def mapk_train_course(self, pred):
samples = json.loads(Path(f'{self.data_dir}/train.json').read_text())
actual = [sample['course'] for sample in samples]
return self.mapk(actual, pred)
def mapk_train_topic(self, pred):
samples = json.loads(Path(f'{self.data_dir}/train.json').read_text())
actual = [sample['topic'] for sample in samples]
return self.mapk(actual, pred)
def mapk_valseen_course(self, pred):
samples = json.loads(Path(f'{self.data_dir}/valseen.json').read_text())
actual = [sample['course'] for sample in samples]
return self.mapk(actual, pred)
def mapk_valseen_topic(self, pred):
samples = json.loads(Path(f'{self.data_dir}/valseen.json').read_text())
actual = [sample['topic'] for sample in samples]
return self.mapk(actual, pred)
def mapk_valunseen_course(self, pred):
samples = json.loads(Path(f'{self.data_dir}/valunseen.json').read_text())
actual = [sample['course'] for sample in samples]
return self.mapk(actual, pred)
def mapk_valunseen_topic(self, pred):
samples = json.loads(Path(f'{self.data_dir}/valunseen.json').read_text())
actual = [sample['topic'] for sample in samples]
return self.mapk(actual, pred)
def train_metric(self, pred):
if self.domain == 'seen_course' or self.domain == 'unseen_course':
return self.mapk_train_course(pred)
else:
return self.mapk_train_topic(pred)
def val_metric(self, pred):
if self.domain == 'seen_course':
return self.mapk_valseen_course(pred)
elif self.domain == 'seen_topic':
return self.mapk_valseen_topic(pred)
elif self.domain == 'unseen_course':
return self.mapk_valunseen_course(pred)
else:
return self.mapk_valunseen_topic(pred)