分别使用numpy原生，pytorch原生以及面向对象的方式不断重构pytorch来实现简单的两层全连接神经网络

简述：使用两种数据集，多种方法，多向对比

分类任务使用手写数字数据集，小批量梯度下降法，全连接神经网络的输入层为784个神经元，隐藏层为100个神经元，输出层10个神经元。损失函数为交叉熵代价函数，激活函数为sigmoid函数。
回归任务使用自构随机数数据集，全连接神经网络的输入层为1000个神经元，隐藏层为100个神经元，输出层10个神经元。损失函数为均方误差代价函数，激活函数为y=x函数。

一、回归任务使用自构随机数数据集

numpy实现

import numpy as np
import torch
x = np.random.randn(64, 1000)#正态分布
y = np.random.randn(10)
w1 = np.random.randn(1000, 100)
w2 = np.random.randn(100, 10)

lr = 0.000001
epoxhs = 500
for i in range(500):
    # forward
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    # 计算损失
    loss = np.square(y_pred - y).sum()
    print(i, loss)

    # backward
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)  # w2的梯度
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h) # w1的梯度

    # 更新参数
    w1 -= lr * grad_w1
    w2 -= lr * grad_w2

执行结果：
······
494 2.309283325916261e-05
495 2.2157265382087743e-05
496 2.1259504455147672e-05
497 2.0398317193778056e-05
498 1.9572198422014806e-05
499 1.8779967141503274e-05

pytorch实现
由于pytorch对numpy进行了封装，所以很多属性方法同numpy类似

x = torch.randn(64, 1000)
y = torch.randn(10)
w1 = torch.randn(1000, 100,requires_grad = True)
w2 = torch.randn(100, 10,requires_grad = True)

lr = 0.000001
for i in range(500):
    # forward
    h = x.mm(w1)
    h_relu = h.clamp(min=0)#激活函数ReLU
    y_pred = h_relu.mm(w2)
    
    # 计算损失
    loss = (y_pred - y).pow(2).sum()
    print(i, loss)
#手动求导更新
#     # backward
#     grad_y_pred = 2.0 * (y_pred - y)
#     grad_w2 = h_relu.t().mm(grad_y_pred)  # w2的梯度
#     grad_h_relu = grad_y_pred.mm(w2.t())
#     grad_h = grad_h_relu.clone()
#     grad_h[h<0] = 0
#     grad_w1 = x.t().mm(grad_h) # w1的梯度

#     # 更新参数
#     w1 -= lr * grad_w1
#     w2 -= lr * grad_w2
#自动求导更新
    # backward
    loss.backward()

    # 更新参数
    w1.data.add_(-lr * w1.grad)
    w2.data.add_(-lr * w2.grad)
    w1.grad.zero_()
    w2.grad.zero_()

执行结果：
······
492 tensor(1.7772e-05, grad_fn=)
493 tensor(1.7666e-05, grad_fn=)
494 tensor(1.7485e-05, grad_fn=)
495 tensor(1.7217e-05, grad_fn=)
496 tensor(1.7055e-05, grad_fn=)
497 tensor(1.6938e-05, grad_fn=)
498 tensor(1.6785e-05, grad_fn=)
499 tensor(1.6566e-05, grad_fn=)

二、分类任务使用手写数字数据集(小批量梯度下降)

使用 nn.Module 重构
相比于原来，封装整个框架

import torch
import numpy as np
class Mnist_Logistic(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.weights1 = torch.nn.Parameter(torch.randn(784,100))#第一层权重，正态分布随机数
        self.weights2 = torch.nn.Parameter(torch.randn(100,10))
        self.bias1 = torch.nn.Parameter(torch.randn(100))#第一层偏置值
        self.bias2 = torch.nn.Parameter(torch.randn(10))
        
    def forward(self,x):    #前向传播函数，不可改名
        x = x@self.weights1+self.bias1     #@==*
        x =  torch.sigmoid(x)  #激活函数
        return x @self.weights2+self.bias2

    def fit(self,x,y,lr,epoxhs,bs):
        for i in range(epoxhs):
            start = 0
            end = bs
            while end<=x.shape[0]:
                xb = x[start:end]
                yb = y[start:end]
                start = end
                end +=bs
                pred = self(xb)
                loss = torch.nn.functional.cross_entropy(pred,yb)#分类任务，交叉熵代价函数
                loss.backward()
                with torch.no_grad():
                    for p in self.parameters(): #更新每一个参数
                        p -=p.grad*lr
                    self.zero_grad()
        return loss
        
#共用部分，下面将不再重复
lr = 0.0001#学习率
bs = 64 #小批量的样本数
epoxhs = 30 #迭代轮数
train_X, test_X, train_y, test_y = np.load('./mnist.npy', allow_pickle=True)
x_train = train_X.reshape(60000, 28*28).astype(np.float32)
x_test = test_X.reshape(10000, 28*28).astype(np.float32)
x_train, y_train, x_test, y_test = map(torch.tensor, (x_train, train_y, x_test, test_y))#转换类型ndarray->tonser
x,y = x_train, y_train

#实例化模型
model = Mnist_Logistic()       
model.fit(x,y,lr,epoxhs,bs)

执行结果：
tensor(5.3542, grad_fn=)

使用 nn.Linear 重构
PyTorch 的 nn.Linear 类建立一个线性层，以替代手动定义和初始化 self.weights 和 self.bias、计算 xb @ self.weights + self.bias 等工作。

import torch

class Mnist_Logistic(torch.nn.Module):
    def __init__(self,in_feartures,hid_feartures,out_feartures):
        super().__init__()
        self.layer1 = nn.Linear(in_feartures,hid_feartures)  #第一个线性层
        self.layer2 = nn.Linear(hid_feartures,out_feartures)
        
    def forward(self,x):#前向传播函数
        x = self.layer1(x)
        x = torch.sigmoid(x)
        return self.layer2(x)

	#与上面重复
    def fit(self,x,y,lr,epoxhs,bs):
        for i in range(epoxhs):
            start = 0
            end = bs
            while end<=x.shape[0]:
                xb = x[start:end]
                yb = y[start:end]
                start = end
                end +=bs
                pred = self(xb)
                loss = torch.nn.functional.cross_entropy(pred,yb)#分类任务
                loss.backward()
                with torch.no_grad():
                    for p in self.parameters():
                        p -=p.grad*lr

                    self.zero_grad()
        return loss
    
modle = Mnist_Logistic(784,100,10)
modle.fit(x,y,lr,epoxhs,bs)

执行结果：
tensor(0.8863, grad_fn=)

使用 optim 重构
我们可以使用优化器中的 step 方法来执行前向步骤，而不是手动更新参数

import torch
class Mnist_Logistic(torch.nn.Module):
    def __init__(self,in_feartures,hid_feartures,out_feartures):
        super().__init__()
        self.layer1 = nn.Linear(in_feartures,hid_feartures)
        self.layer2 = nn.Linear(hid_feartures,out_feartures)
    def forward(self,x):#前向传播函数
        x = self.layer1(x)
        x = torch.sigmoid(x)
        return self.layer2(x)

    def fit(self,x,y,lr,epoxhs,bs): 
    	#变化部分
        opt = torch.optim.SGD(self.parameters(), lr=lr)
        for i in range(epoxhs,bs):
            start = 0
            end = bs
            while end<=x.shape[0]:
                xb = x[start:end]
                yb = y[start:end]
                start = end
                end +=bs
                pred = self(xb)
                loss = torch.nn.functional.cross_entropy(pred,yb)#分类任务
                loss.backward()
                #变化部分
                opt.step()
                opt.zero_grad()
        return loss
  
modle = Mnist_Logistic(784,100,10)
modle.fit(x,y,lr,epoxhs,bs)

执行结果：
tensor(0.8116, grad_fn=)

可以看出每一次重构都是在前一次的基础上进行简化步骤，所以一般线性层重构，并使用step来自动更新参数。至于优化模型的方法，还可以引入正则化项（L1,L2）和提前终止策略。

分别使用numpy原生，pytorch原生以及面向对象的方式不断重构pytorch来实现简单的两层全连接神经网络

Python相关栏目本月热门文章