简述:使用两种数据集,多种方法,多向对比
- 分类任务使用手写数字数据集,小批量梯度下降法,全连接神经网络的输入层为784个神经元,隐藏层为100个神经元,输出层10个神经元。损失函数为交叉熵代价函数,激活函数为sigmoid函数。
- 回归任务使用自构随机数数据集,全连接神经网络的输入层为1000个神经元,隐藏层为100个神经元,输出层10个神经元。损失函数为均方误差代价函数,激活函数为y=x函数。
- numpy实现
import numpy as np
import torch
x = np.random.randn(64, 1000)#正态分布
y = np.random.randn(10)
w1 = np.random.randn(1000, 100)
w2 = np.random.randn(100, 10)
lr = 0.000001
epoxhs = 500
for i in range(500):
# forward
h = x.dot(w1)
h_relu = np.maximum(h, 0)
y_pred = h_relu.dot(w2)
# 计算损失
loss = np.square(y_pred - y).sum()
print(i, loss)
# backward
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.T.dot(grad_y_pred) # w2的梯度
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h<0] = 0
grad_w1 = x.T.dot(grad_h) # w1的梯度
# 更新参数
w1 -= lr * grad_w1
w2 -= lr * grad_w2
执行结果:
······
494 2.309283325916261e-05
495 2.2157265382087743e-05
496 2.1259504455147672e-05
497 2.0398317193778056e-05
498 1.9572198422014806e-05
499 1.8779967141503274e-05
- pytorch实现
由于pytorch对numpy进行了封装,所以很多属性方法同numpy类似
x = torch.randn(64, 1000)
y = torch.randn(10)
w1 = torch.randn(1000, 100,requires_grad = True)
w2 = torch.randn(100, 10,requires_grad = True)
lr = 0.000001
for i in range(500):
# forward
h = x.mm(w1)
h_relu = h.clamp(min=0)#激活函数ReLU
y_pred = h_relu.mm(w2)
# 计算损失
loss = (y_pred - y).pow(2).sum()
print(i, loss)
#手动求导更新
# # backward
# grad_y_pred = 2.0 * (y_pred - y)
# grad_w2 = h_relu.t().mm(grad_y_pred) # w2的梯度
# grad_h_relu = grad_y_pred.mm(w2.t())
# grad_h = grad_h_relu.clone()
# grad_h[h<0] = 0
# grad_w1 = x.t().mm(grad_h) # w1的梯度
# # 更新参数
# w1 -= lr * grad_w1
# w2 -= lr * grad_w2
#自动求导更新
# backward
loss.backward()
# 更新参数
w1.data.add_(-lr * w1.grad)
w2.data.add_(-lr * w2.grad)
w1.grad.zero_()
w2.grad.zero_()
二、 分类任务使用手写数字数据集(小批量梯度下降)执行结果:
······
492 tensor(1.7772e-05, grad_fn=)
493 tensor(1.7666e-05, grad_fn=)
494 tensor(1.7485e-05, grad_fn=)
495 tensor(1.7217e-05, grad_fn=)
496 tensor(1.7055e-05, grad_fn=)
497 tensor(1.6938e-05, grad_fn=)
498 tensor(1.6785e-05, grad_fn=)
499 tensor(1.6566e-05, grad_fn=)
- 使用 nn.Module 重构
相比于原来,封装整个框架
import torch
import numpy as np
class Mnist_Logistic(torch.nn.Module):
def __init__(self):
super().__init__()
self.weights1 = torch.nn.Parameter(torch.randn(784,100))#第一层权重,正态分布随机数
self.weights2 = torch.nn.Parameter(torch.randn(100,10))
self.bias1 = torch.nn.Parameter(torch.randn(100))#第一层偏置值
self.bias2 = torch.nn.Parameter(torch.randn(10))
def forward(self,x): #前向传播函数,不可改名
x = x@self.weights1+self.bias1 #@==*
x = torch.sigmoid(x) #激活函数
return x @self.weights2+self.bias2
def fit(self,x,y,lr,epoxhs,bs):
for i in range(epoxhs):
start = 0
end = bs
while end<=x.shape[0]:
xb = x[start:end]
yb = y[start:end]
start = end
end +=bs
pred = self(xb)
loss = torch.nn.functional.cross_entropy(pred,yb)#分类任务,交叉熵代价函数
loss.backward()
with torch.no_grad():
for p in self.parameters(): #更新每一个参数
p -=p.grad*lr
self.zero_grad()
return loss
#共用部分,下面将不再重复
lr = 0.0001#学习率
bs = 64 #小批量的样本数
epoxhs = 30 #迭代轮数
train_X, test_X, train_y, test_y = np.load('./mnist.npy', allow_pickle=True)
x_train = train_X.reshape(60000, 28*28).astype(np.float32)
x_test = test_X.reshape(10000, 28*28).astype(np.float32)
x_train, y_train, x_test, y_test = map(torch.tensor, (x_train, train_y, x_test, test_y))#转换类型ndarray->tonser
x,y = x_train, y_train
#实例化模型
model = Mnist_Logistic()
model.fit(x,y,lr,epoxhs,bs)
执行结果:
tensor(5.3542, grad_fn=)
- 使用 nn.Linear 重构
PyTorch 的 nn.Linear 类建立一个线性层,以替代手动定义和初始化 self.weights 和 self.bias、计算 xb @ self.weights + self.bias 等工作。
import torch
class Mnist_Logistic(torch.nn.Module):
def __init__(self,in_feartures,hid_feartures,out_feartures):
super().__init__()
self.layer1 = nn.Linear(in_feartures,hid_feartures) #第一个线性层
self.layer2 = nn.Linear(hid_feartures,out_feartures)
def forward(self,x):#前向传播函数
x = self.layer1(x)
x = torch.sigmoid(x)
return self.layer2(x)
#与上面重复
def fit(self,x,y,lr,epoxhs,bs):
for i in range(epoxhs):
start = 0
end = bs
while end<=x.shape[0]:
xb = x[start:end]
yb = y[start:end]
start = end
end +=bs
pred = self(xb)
loss = torch.nn.functional.cross_entropy(pred,yb)#分类任务
loss.backward()
with torch.no_grad():
for p in self.parameters():
p -=p.grad*lr
self.zero_grad()
return loss
modle = Mnist_Logistic(784,100,10)
modle.fit(x,y,lr,epoxhs,bs)
执行结果:
tensor(0.8863, grad_fn=)
- 使用 optim 重构
我们可以使用优化器中的 step 方法来执行前向步骤,而不是手动更新参数
import torch
class Mnist_Logistic(torch.nn.Module):
def __init__(self,in_feartures,hid_feartures,out_feartures):
super().__init__()
self.layer1 = nn.Linear(in_feartures,hid_feartures)
self.layer2 = nn.Linear(hid_feartures,out_feartures)
def forward(self,x):#前向传播函数
x = self.layer1(x)
x = torch.sigmoid(x)
return self.layer2(x)
def fit(self,x,y,lr,epoxhs,bs):
#变化部分
opt = torch.optim.SGD(self.parameters(), lr=lr)
for i in range(epoxhs,bs):
start = 0
end = bs
while end<=x.shape[0]:
xb = x[start:end]
yb = y[start:end]
start = end
end +=bs
pred = self(xb)
loss = torch.nn.functional.cross_entropy(pred,yb)#分类任务
loss.backward()
#变化部分
opt.step()
opt.zero_grad()
return loss
modle = Mnist_Logistic(784,100,10)
modle.fit(x,y,lr,epoxhs,bs)
执行结果:
tensor(0.8116, grad_fn=)
可以看出每一次重构都是在前一次的基础上进行简化步骤,所以一般线性层重构,并使用step来自动更新参数。至于优化模型的方法,还可以引入正则化项(L1,L2)和提前终止策略。



