-
Notifications
You must be signed in to change notification settings - Fork 0
/
E2_clf_semi.py
81 lines (61 loc) · 2.45 KB
/
E2_clf_semi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
E2 - classification for semi-synthetic streams
"""
import numpy as np
from sklearn import clone
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from tqdm import tqdm
from sklearn.metrics import balanced_accuracy_score
np.random.seed(1233)
measures = ["clustering",
"complexity",
"concept",
"general",
"info-theory",
"itemset",
"landmarking",
"model-based",
"statistical"
]
base_clfs = [
GaussianNB(),
KNeighborsClassifier(),
SVC(random_state=11313),
DecisionTreeClassifier(random_state=11313),
MLPClassifier(random_state=11313)
]
origial_datasets=6
n_splits=2
n_repeats=5
n_drift_types = 2
clf_res = np.zeros((len(measures), origial_datasets, n_drift_types, n_splits*n_repeats, len(base_clfs)))
pbar = tqdm(total=len(measures)*origial_datasets*n_drift_types*n_splits*n_repeats*len(base_clfs))
for m_id, m in enumerate(measures):
res = np.load('results/semi_%s.npy' % m)
res = res.reshape(6,2,5000,-1)
# print(res.shape) # drfs, reps, chunks, measures + label
for origin_id, res_origin in enumerate(res):
for d_id, res_drift in enumerate(res_origin):
# print(res_drift.shape) # reps, chunks, measures + label
p = np.random.permutation(res_drift.shape[0])
res_drift = res_drift[p]
# print(res_rep.shape) # chunks, measures + label
X = res_drift[:,:-1]
y = res_drift[:,-1]
X[np.isnan(X)]=1
X[np.isinf(X)]=1
rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=3242)
for fold, (train, test) in enumerate(rskf.split(X, y)):
for base_id, base_c in enumerate(base_clfs):
clf = clone(base_c)
pred = clf.fit(X[train], y[train]).predict(X[test])
acc = balanced_accuracy_score(y[test], pred)
clf_res[m_id, origin_id, d_id, fold, base_id] = acc
pbar.update(1)
print(m, np.mean(clf_res[m_id, origin_id, d_id], axis=0))
np.save('results/semi_clf.npy', clf_res)