import numpy as np
from matplotlib import pyplot as plt
# 生成权重以及偏执项layers_dim代表每层的神经元个数,
# 比如[2,3,1]代表一个三成的网络,输入为2层,中间为3层输出为1层
def init_parameters(layers_dim):
L = len(layers_dim)
parameters = {}
for i in range(1, L):
parameters["w" + str(i)] = np.random.random([layers_dim[i], layers_dim[i - 1]])
parameters["b" + str(i)] = np.zeros((layers_dim[i], 1))
return parameters
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
# sigmoid的导函数
def sigmoid_prime(z):
return sigmoid(z) * (1 - sigmoid(z))
# 前向传播,需要用到一个输入x以及所有的权重以及偏执项,都在parameters这个字典里面存储
# 最后返回会返回一个caches里面包含的 是各层的a和z,a[layers]就是最终的输出
def forward(x, parameters):
a = []
z = []
caches = {}
a.append(x)
z.append(x)
layers = len(parameters) // 2
# 前面都要用sigmoid
for i in range(1, layers):
z_temp = parameters["w" + str(i)].dot(x) + parameters["b" + str(i)]
z.append(z_temp)
a.append(sigmoid(z_temp))
# 最后一层不用sigmoid
z_temp = parameters["w" + str(layers)].dot(a[layers - 1]) + parameters["b" + str(layers)]
z.append(z_temp)
a.append(z_temp)
caches["z"] = z
caches["a"] = a
return caches, a[layers]
# 反向传播,parameters里面存储的是所有的各层的权重以及偏执,caches里面存储各层的a和z
# al是经过反向传播后最后一层的输出,y代表真实值
# 返回的grades代表着误差对所有的w以及b的导数
def backward(parameters, caches, al, y):
layers = len(parameters) // 2
grades = {}
m = y.shape[1]
# 假设最后一层不经历激活函数
# 就是按照上面的图片中的公式写的
grades["dz" + str(layers)] = al - y
grades["dw" + str(layers)] = grades["dz" + str(layers)].dot(caches["a"][layers - 1].T) / m
grades["db" + str(layers)] = np.sum(grades["dz" + str(layers)], axis=1, keepdims=True) / m
# 前面全部都是sigmoid激活
for i in reversed(range(1, layers)):
grades["dz" + str(i)] = parameters["w" + str(i + 1)].T.dot(grades["dz" + str(i + 1)]) * sigmoid_prime(
caches["z"][i])
grades["dw" + str(i)] = grades["dz" + str(i)].dot(caches["a"][i - 1].T) / m
grades["db" + str(i)] = np.sum(grades["dz" + str(i)], axis=1, keepdims=True) / m
return grades
# 就是把其所有的权重以及偏执都更新一下
def update_grades(parameters, grades, learning_rate):
layers = len(parameters) // 2
for i in range(1, layers + 1):
parameters["w" + str(i)] -= learning_rate * grades["dw" + str(i)]
parameters["b" + str(i)] -= learning_rate * grades["db" + str(i)]
return parameters
# 计算误差值
def compute_loss(al, y):
return np.mean(np.square(al - y))
# 加载数据
def load_data():
"""
加载数据集
"""
x = np.arange(0.0, 1.0, 0.01)
y = 20 * np.sin(2 * np.pi * x)
# 数据可视化
plt.scatter(x, y)
return x, y
# 进行测试
x, y = load_data()
x = x.reshape(1, 100)
y = y.reshape(1, 100)
plt.scatter(x, y)
parameters = init_parameters([1, 25, 1])
al = 0
for i in range(4000):
caches, al = forward(x, parameters)
grades = backward(parameters, caches, al, y)
parameters = update_grades(parameters, grades, learning_rate=0.3)
if i % 100 == 0:
print(compute_loss(al, y))
plt.scatter(x, al)
plt.show()
手撕 numpy写线性回归的随机梯度下降
评论
679 views
目录: