-
Notifications
You must be signed in to change notification settings - Fork 1
/
balancingData.py
53 lines (46 loc) · 1.55 KB
/
balancingData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN
import os
import cv2
dic = {8: [1, 0, 0, 0], 4: [0, 1, 0, 0],
2: [ 0, 0, 1, 0], 1: [0, 0, 0, 1], 0: [0, 0, 0, 0]}
file_num = 1
file_name = 'collectedData/collected_data-{}'.format(file_num)
ros = RandomOverSampler()
def balance_data(file_name):
saved_file = file_name + '.npy'
print(file_name)
collected_data = np.load(saved_file)
X = list(collected_data[0][0])
y = list(collected_data[0][1])
for i in range(len(X)):
X[i] = cv2.cvtColor(X[i],cv2.COLOR_RGBA2RGB)
shape = X[0].shape
dim = shape[0]*shape[1]*shape[2]
print(shape)
print(dim)
for i in range(len(X)):
X[i] = X[i].reshape(dim)
y[i] = int(int(''.join(str(j) for j in y[i]), 2))
X = np.array(X)
y = np.array(y)
rus = RandomUnderSampler()
X_resampled, y_resampled = rus.fit_sample(X, y)
balanced_y = []
balanced_X = []
for i in range(len(y_resampled)):
balanced_y.append(dic[y_resampled[i]])
balanced_X.append(X_resampled[i].reshape(shape[0], shape[1], shape[2]))
collected_data = []
collected_data.append([balanced_X, balanced_y])
np.save(file_name + '_balanced.npy', collected_data)
while(True):
file_name = "collectedData/collected_data-{}".format(file_num)
if (os.path.isfile(file_name + '.npy')):
balance_data(file_name)
file_num += 1
else:
print("balanced all data")
break