-
Notifications
You must be signed in to change notification settings - Fork 0
/
E2_clf_synthetic.py
77 lines (59 loc) · 2.25 KB
/
E2_clf_synthetic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
E2 - classification for synthetic streams
"""
import numpy as np
from sklearn import clone
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from tqdm import tqdm
from sklearn.metrics import balanced_accuracy_score
np.random.seed(1233)
measures = ["clustering",
"complexity",
"concept",
"general",
"info-theory",
"itemset",
"landmarking",
"model-based",
"statistical"
]
base_clfs = [
GaussianNB(),
KNeighborsClassifier(),
SVC(random_state=11313),
DecisionTreeClassifier(random_state=11313),
MLPClassifier(random_state=11313)
]
n_splits=2
n_repeats=5
n_drift_types = 3
stream_reps=5
clf_res = np.zeros((len(measures), n_drift_types, stream_reps, n_splits*n_repeats, len(base_clfs)))
pbar = tqdm(total=len(measures)*n_drift_types*stream_reps*n_splits*n_repeats*len(base_clfs))
for m_id, m in enumerate(measures):
res = np.load('results/%s.npy' % m)
for d_id, res_drift in enumerate(res):
for r_id, res_rep in enumerate(res_drift):
#shuffle
p = np.random.permutation(res_rep.shape[0])
res_rep = res_rep[p]
# print(res_rep.shape) # chunks, measures + label
X = res_rep[:,:-1]
y = res_rep[:,-1]
X[np.isnan(X)]=1
X[np.isinf(X)]=1
rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=3242)
for fold, (train, test) in enumerate(rskf.split(X, y)):
for base_id, base_c in enumerate(base_clfs):
clf = clone(base_c)
pred = clf.fit(X[train], y[train]).predict(X[test])
acc = balanced_accuracy_score(y[test], pred)
clf_res[m_id, d_id, r_id, fold, base_id] = acc
pbar.update(1)
print(m, np.mean(clf_res[m_id, d_id, r_id], axis=0))
np.save('results/clf.npy', clf_res)