-
Notifications
You must be signed in to change notification settings - Fork 1
/
1105079_ML_Assignment_KNN.py
105 lines (76 loc) · 3.12 KB
/
1105079_ML_Assignment_KNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 7 13:03:24 2020
@author: Sayanta Chowdhury
"""
import pandas as pd
import numpy as np
#Import Data Set
dataSet = pd.read_csv("diabetes.csv")
#print(dataSet.head())
#Total Null Value Check
print("How many Null value are here: ")
print(dataSet.isnull().sum())
## print(dataSet.isnull().values.any())
#All Column Data Type Check
dataSet.dtypes
# dataSet.corr()
# Diabetes True/False Count
diabetes_true_count = len(dataSet.loc[dataSet['Outcome'] == True])
diabetes_false_count = len(dataSet.loc[dataSet['Outcome'] == False])
print("True Outcome: {0}, False Outcome: {1}".format(diabetes_true_count, diabetes_false_count))
#Missing Zeros without Outcome Column
print("How many Zero value are here: ")
print(dataSet.iloc[:, 0 : 8].eq(0).sum())
# Feature Cloumn or Independent Variable
X = dataSet.iloc[:,:-1].values
#Dependent column or Predict Class
y = dataSet.iloc[:,8].values
# Zero fill with mean of the column
from sklearn.impute import SimpleImputer
fill_values = SimpleImputer(missing_values= 0, strategy = 'mean')
X[:,1:8] = fill_values.fit_transform(X[:,1:8])
#Trin/Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
scale_X = StandardScaler()
X_train = scale_X.fit_transform(X_train)
X_test = scale_X.transform(X_test)
#KNN Classifier Algorithm
from sklearn.neighbors import KNeighborsClassifier
KNNClassifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p=2)
KNNClassifier.fit(X_train, y_train)
KNN_y_pred = KNNClassifier.predict(X_test)
# Train/test Split Accuracy
from sklearn.metrics import confusion_matrix, accuracy_score
cm_KNNclass = confusion_matrix(y_test, KNN_y_pred)
ac_KNNclass = accuracy_score(y_test,KNN_y_pred)
print("Confusion Matrix :")
print(cm_KNNclass)
print("KNN Train/test Split Accurary: ", ac_KNNclass)
# K Fold Cross validation of decision Tree
from sklearn.model_selection import cross_val_score
#cross_test_View = cross_val_score(KNNClassifier, X, y, cv= 10)
#print(cross_test_View)
KNNscores = cross_val_score(KNNClassifier, X, y, cv= 10).mean()
print("Accuracy of K fold cross validation using KNN:", KNNscores)
#Startified k Fold
from sklearn.model_selection import StratifiedKFold
accuracy = []
skf = StratifiedKFold(n_splits=10, random_state = None)
skf.get_n_splits(X, y)
#print("Confusion Matrix of K fold Cross validation ")
for train_index, test_index in skf.split(X,y):
# print("Train: ",train_index, "Validation: ", test_index)
X1_train, X1_test = X[train_index], X[test_index]
y1_train, y1_test = y[train_index], y[test_index]
KNNClassifier.fit(X1_train, y1_train)
prediction = KNNClassifier.predict(X1_test)
#print( confusion_matrix(y1_test, prediction) )
score = accuracy_score(y1_test, prediction )
accuracy.append(score)
# print(accuracy)
# numpy
print("Accuracy of Startified K fold cross validation using KNN::", np.array(accuracy).mean())