-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
37 lines (29 loc) · 1.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from sklearn.preprocessing import StandardScaler
from helper_functions import import_data, fix_units, normalise_sun
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from mpl_toolkits.mplot3d import Axes3D
if __name__ == "__main__":
# Import dataframe
df = import_data()
# Fix the units
fix_units(df)
# Transform the sunlight column
normalise_sun(df)
df['Sunshine'] = df['Sunshine'].clip(0.0, 1.0)
df['MaxWindSpeed'] = df['MaxWindSpeed'].clip(0.0, 75.0)
df.dropna(inplace=True)
# Scale columns using normal transform
scaler = StandardScaler()
dfscaled = df.copy()
dfscaled[df.columns] = scaler.fit_transform(df[df.columns])
# cluster and plot
keys = ['Rainfall', 'Sunshine', 'MaxWindSpeed']
kmeans = KMeans(n_clusters=5, random_state=0).fit(dfscaled[keys])
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(*(df[i] for i in keys), c=kmeans.labels_)
ax.set_xlabel(keys[0])
ax.set_ylabel(keys[1])
ax.set_zlabel(keys[2])
plt.show()