-
Notifications
You must be signed in to change notification settings - Fork 1
/
clf.py
91 lines (58 loc) · 1.58 KB
/
clf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#loading the modules
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from sklearn import model_selection
from sklearn.preprocessing import LabelEncoder
from sklearn import ensemble
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
class classifier(object):
def __init__(self,path):
# load dataset
self.path = path
@classmethod
def load_data(self,path):
#loading dataset
self.df = pd.read_csv(path, sep = ",", engine = "python")
#taking care of null values
self.df = self.df.fillna('0')
return self.df
@classmethod
def encoding(self):
#label encoding
lb_make = LabelEncoder()
heads = self.df.columns
for i in range(len(df.columns)):
if self.df[heads[i]].dtypes == 'O':
self.df[heads[i]] = lb_make.fit_transform(self.df[heads[i]].astype(str))
return self.df
@classmethod
def load_x_y(self):
#extracting input and output features
self.X = self.df.iloc[:,:-1].values
self.Y = self.df.iloc[:,-1].values
return self.X,self.Y
@classmethod
def scale(self):
sc = StandardScaler()
self.X = sc.fit_transform(self.X)
return self.X
@classmethod
def model(self):
self.clf = XGBRegressor()
self.clf.fit(self.X,self.Y)
return self.clf
@classmethod
def save_model(self):
return pickle.dump(self.clf, open( "classifier/clf.pkl", "wb" ))
if __name__ == "__main__":
path = 'Dataset/train.csv'
obj = classifier(path)
df = obj.load_data(path)
df= obj.encoding()
X,Y = obj.load_x_y()
obj.scale()
obj.model()
obj.save_model()