-
Notifications
You must be signed in to change notification settings - Fork 0
/
simple-linear-regression.txt
65 lines (51 loc) · 1.85 KB
/
simple-linear-regression.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
df=pd.read_csv('Fuel Consumption Ratings.csv', engine='python')
print(df.head())
print(df.describe())
cdf = df[['ENGINE SIZE','CYLINDERS','FUEL CONSUMPTION COMB (mpg)', 'CO2 EMISSIONS (g/km)']]
print(cdf.head(9))
cdf.hist()
plt.show()
plt.scatter(df['ENGINE SIZE'], df['CO2 EMISSIONS (g/km)'], color='red', s=7)
plt.xlabel('ENGINE SIZE')
plt.ylabel('CO2 EMISSIONS')
plt.show()
plt.scatter(df['CYLINDERS'], df['CO2 EMISSIONS (g/km)'], color='green', s=7)
plt.xlabel('CYLINDERS')
plt.ylabel('CO2 EMISSIONS')
plt.show()
plt.scatter(df['FUEL CONSUMPTION COMB (mpg)'], df['CO2 EMISSIONS (g/km)'], color='yellow', s=7)
plt.xlabel('FUEL CONSUMPTION COMB')
plt.ylabel('CO2 EMISSIONS')
plt.show()
#train data distribution
msk = np.random.rand(len(df))<0.8
train = cdf[msk]
test = cdf[~msk]
plt.scatter(train['ENGINE SIZE'], train['CO2 EMISSIONS (g/km)'], color='purple', s=7)
plt.xlabel('ENGINE SIZE')
plt.ylabel('CO2 EMISSIONS')
plt.show()
#modeling
from sklearn import linear_model
regr=linear_model.LinearRegression()
train_x = np.isnan(train[['ENGINE SIZE']])
train_y = np.isnan(train[['CO2 EMISSIONS (g/km)']])
regr.fit(train_x, train_y)
print('Coefficients: ',regr.coef_)
print('Intercept: ', regr.intercept_)
plt.scatter(train['ENGINE SIZE'], train['CO2 EMISSIONS (g/km)'], color='pink', s=7)
plt.plot(train_x, regr.coef_[0][0] * train_x + regr.intercept_[0], '-r')
plt.xlabel('ENGINE SIZE')
plt.ylabel('CO2 EMISSIONS')
plt.show()
from sklearn.metrics import r2_score
test_x = np.isnan(test[['ENGINE SIZE']])
test_y = np.isnan(test[['CO2 EMISSIONS (g/km)']])
test_y_ = regr.predict(test_x)
print('Mean absolute error: %.2f' % np.mean(np.absolute(test_y_ - test_y)))
print('Residual sum of sqaures (MSE): %.2f' % np.mean(test_y_ - test_y))
print('R2-score: %.2f'% r2_score(test_y_, test_y))