-
Notifications
You must be signed in to change notification settings - Fork 0
/
RBFN_Classification.py
209 lines (162 loc) · 5.41 KB
/
RBFN_Classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/python
# Radial Basis Function - Classification
# Gaussian function as activation function.
# Class wise centroids are found using K-Means Clustring.
# Normalized spread(sigma) is calculated form these centroids and is used as common spread for all centroids.
# Lloyd's(pseudo inverse) method is used to obtain optimal output weights.
# These obtained output weights are used to predict the class labels on testing data.
# Cross Validation is used for testing our model.
# 75% of train dataset is used for training and remaining 25% is used for testing purpose.
import numpy as np
from random import randint
from math import sqrt,exp
import matplotlib.pyplot as plt
# To convert class labels into coded class labels.
def coded_conversion(metrix,inp_size):
coded_metrix = np.zeros(shape=(inp_size,len(np.unique(metrix))))
coded_metrix += -1
for i in range(inp_size):
coded_metrix[i][metrix[i][0]-1]=1
return coded_metrix
# To get no. of inputs lying within perticular centroid(center).
def get_dims(memberships, centroid_num):
m = memberships.shape[0]
row_dim = 0
for i in range(m):
if memberships[i][0]==centroid_num:
row_dim += 1
return row_dim
# To obtain the centroids.
def computeCentroids(x, prev_centroids, memberships, k):
m, n = x.shape
centroids = np.zeros(shape=(k, n))
for i in range(k):
if not np.any(memberships==i):
centroids[i,:] = prev_centroids[i,:]
else:
divisor = get_dims(memberships, i)
prices = np.zeros(shape=(m,n))
for j in range(m):
if memberships[j][0]==i:
prices[j,:]=x[j,:]
else:
prices[j,:]=0
centroids[i,:] = (np.sum(prices,axis=0))/divisor
return centroids
# To obtain membership matrix which is a matrix mentioning
# which centroid is closest to the given input.
def findClosestCentroids(x, centroids):
k = centroids.shape[0]
m = x.shape[0]
memberships = np.zeros(shape=(m,1))
distances = np.zeros(shape=(m,k))
for i in range(k):
diffs = np.zeros(shape=(m,x.shape[1]))
for j in range(m):
diffs[j:] = x[j,:] - centroids[i,:]
sqrdDiffs = diffs**2
temp = np.array([np.sum(sqrdDiffs,axis=1)]).T
for iter in range(m):
distances[iter][i] = temp[iter][0]
for i in range(m):
memberships[i][0] = np.where(distances==min(distances[i,:]))[1][0]
return memberships
# At first initializing the centroids randomly.
def KMeansInitCentroids(x,k):
centroids = np.zeros(shape=(k,x.shape[1]))
randidx = np.random.permutation(100)
centroids = x[randidx[0:k],:]
return centroids
# K-Means Clustring
def KMeans(x, initial_centroids, max_iters):
k = initial_centroids.shape[0]
centroids = initial_centroids
prevCentroids = centroids
for i in range(max_iters):
memberships = findClosestCentroids(x,centroids)
centroids = computeCentroids(x, centroids, memberships, k)
if (prevCentroids==centroids).all():
break
prevCentroids = centroids
return centroids,memberships
if __name__ == '__main__':
# Load file containing training data set.
# Outputs are in class label format.
NTrain = np.loadtxt('xyz.tra',dtype = float)
print "loadfile-shape:",NTrain.shape
m,n = NTrain.shape
NTD = m
NTD=(NTD*3)/4
# inp = no. of input neurons i.e. input features/dimensions.
inp = n-1
print "Inp-features:",inp
numRBFNeuronsPerClass = 5
x_train = np.zeros(shape=(NTD,inp))
y_train = np.zeros(shape=(NTD,1))
x_train = NTrain[0:NTD,0:inp]
y_train = NTrain[0:NTD,inp:]
numClass = len(np.unique(y_train))
y_train_coded = coded_conversion(y_train,NTD)
final_centroids = np.zeros(shape=(numRBFNeuronsPerClass*numClass,inp))
counter = 0
# Obtaining classwise centroids.
for c in range(numClass):
extract = []
for i in range(NTD):
if int(y_train[i][0])==c+1:
extract.append(i)
Xc = x_train[extract,:]
init_centroids = Xc[0:numRBFNeuronsPerClass,:]
centers, memberships = KMeans(Xc, init_centroids, 100)
for y in range(numRBFNeuronsPerClass):
final_centroids[counter,:]=centers[y,:]
counter += 1
numRBFNeurons = final_centroids.shape[0]
centers = final_centroids
# Obtaining the normalized spread.
maxi = 0
for i in range(numRBFNeurons-1):
for j in range(i+1,numRBFNeurons):
dist = centers[i,:] - centers[j,:]
sqrdist = dist**2
temp = np.sum(sqrdist)
if temp>maxi:
maxi = temp
sigma = maxi/sqrt(numRBFNeurons)
# Obtaining output weights using Lloyd's(pseudo inverse) method.
pseudo = np.zeros(shape=(NTD,numRBFNeurons+1))
pseudo[:,numRBFNeurons] = 1
for i in range(NTD):
for j in range(numRBFNeurons):
dist = x_train[i,:] - centers[j,:]
sqrdist = dist**2
divident = np.sum(sqrdist)
gauss = divident / (2*(sigma**2))
pseudo[i][j] = exp(-gauss)
weight = np.linalg.pinv(pseudo).dot(y_train_coded)
# Testing the network.
x_test = NTrain[NTD:,0:inp]
y_test = NTrain[NTD:,inp:]
NTD = m - NTD
pseudo = np.zeros(shape=(NTD,numRBFNeurons+1))
pseudo[:,numRBFNeurons] = 1
for i in range(NTD):
for j in range(numRBFNeurons):
dist = x_test[i,:] - centers[j,:]
sqrdist = dist**2
divident = np.sum(sqrdist)
gauss = divident / (2*(sigma**2))
pseudo[i][j] = exp(-gauss)
sumerr = 0
y_predicited = pseudo.dot(weight)
ca = []
for z in range(NTD):
ca.append(np.where(y_predicited[z,:]==max(y_predicited[z,:]))[0][0]+1)
y_predicited = np.array([ca]).T
correctly_classified = 0
for i in range(NTD):
if int(y_predicited[i][0])==int(y_test[i][0]):
correctly_classified += 1
print "Accuracy:",(correctly_classified/float(NTD))*100
err = y_test - y_predicited
sumerr = sumerr + np.sum(err**2)