-
Notifications
You must be signed in to change notification settings - Fork 102
/
p4 momentum SGD.py
146 lines (127 loc) · 3.66 KB
/
p4 momentum SGD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random
# 本代码是一个最简单的线形回归问题,优化函数为minibatch SGD
rate = 0.1 # learning rate
def da(y,y_p,x):
return (y-y_p)*(-x)
def db(y,y_p):
return (y-y_p)*(-1)
def calc_loss(a,b,x,y):
tmp = y - (a * x + b)
tmp = tmp ** 2 # 对矩阵内的每一个元素平方
SSE = sum(tmp) / (2 * len(x))
return SSE
def draw_hill(x,y):
a = np.linspace(-20,20,100)
print(a)
b = np.linspace(-20,20,100)
x = np.array(x)
y = np.array(y)
allSSE = np.zeros(shape=(len(a),len(b)))
for ai in range(0,len(a)):
for bi in range(0,len(b)):
a0 = a[ai]
b0 = b[bi]
SSE = calc_loss(a=a0,b=b0,x=x,y=y)
allSSE[ai][bi] = SSE
a,b = np.meshgrid(a, b)
return [a,b,allSSE]
def shuffle_data(x,y):
# 随机打乱x,y的数据,并且保持x和y一一对应
seed = random.random()
random.seed(seed)
random.shuffle(x)
random.seed(seed)
random.shuffle(y)
def get_batch_data(x,y,batch=3):
shuffle_data(x,y)
x_new = x[0:batch]
y_new = y[0:batch]
return [x_new,y_new]
# 模拟数据
x = [30 ,35,37, 59, 70, 76, 88, 100]
y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839]
# 数据归一化
x_max = max(x)
x_min = min(x)
y_max = max(y)
y_min = min(y)
for i in range(0,len(x)):
x[i] = (x[i] - x_min)/(x_max - x_min)
y[i] = (y[i] - y_min)/(y_max - y_min)
[ha,hb,hallSSE] = draw_hill(x,y)
hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。
# 初始化a,b值
a = 10.0
b = -20.0
fig = plt.figure(1, figsize=(12, 8))
fig.suptitle('learning rate: %.2f method:momentum SGD'%(rate), fontsize=15)
# 绘制图1的曲面
ax = fig.add_subplot(2, 2, 1, projection='3d')
ax.set_top_view()
ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow')
# 绘制图2的等高线图
plt.subplot(2,2,2)
ta = np.linspace(-20, 20, 100)
tb = np.linspace(-20, 20, 100)
plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot)
C = plt.contour(ha,hb,hallSSE,15,colors='black')
plt.clabel(C,inline=True)
plt.xlabel('a')
plt.ylabel('b')
plt.ion() # iteration on
all_loss = []
all_step = []
last_a = a
last_b = b
va = 0
vb = 0
gamma = 0.9
for step in range(1,100):
loss = 0
all_da = 0
all_db = 0
shuffle_data(x,y)
[x_new,y_new] = get_batch_data(x,y,batch=4)
for i in range(0,len(x_new)):
y_p = a*x_new[i] + b
loss = loss + (y_new[i] - y_p)*(y_new[i] - y_p)/2
all_da = all_da + da(y_new[i],y_p,x_new[i])
all_db = all_db + db(y_new[i],y_p)
#loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y))
loss = loss/len(x_new)
# 绘制图1中的loss点
ax.scatter(a, b, loss, color='black')
# 绘制图2中的loss点
plt.subplot(2,2,2)
plt.scatter(a,b,s=5,color='blue')
plt.plot([last_a,a],[last_b,b],color='aqua')
# 绘制图3中的回归直线
plt.subplot(2, 2, 3)
plt.plot(x, y)
plt.plot(x, y, 'o')
x_ = np.linspace(0, 1, 2)
y_draw = a * x_ + b
plt.plot(x_, y_draw)
# 绘制图4的loss更新曲线
all_loss.append(loss)
all_step.append(step)
plt.subplot(2,2,4)
plt.plot(all_step,all_loss,color='orange')
plt.xlabel("step")
plt.ylabel("loss")
# print('a = %.3f,b = %.3f' % (a,b))
last_a = a
last_b = b
va = gamma*va + rate*all_da
vb = gamma*vb + rate*all_db
a = a - va
b = b - vb
if step%1 == 0:
print("step: ", step, " loss: ", loss)
plt.show()
plt.pause(0.01)
plt.show()
plt.pause(99999999999)