手写一层神经网络来拟合函数y = x1^2 + x2 + 100

手写神经网络来拟合函数y = x1^2 + x2 + 100 摘要

本文首先生成数据，然后搭建包含一层隐藏层的神经网络训练数据，最后利用测试数据评价模型的拟合效果。本文没有讲神经网络的原理以及推导过程，适合读过神经网络原理，想要动手实现的同学参考。

数据生成

import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


# y = x1^2 + x2 + 100
# 生成训练数据
def generate_data(size=1000, seed_num=7):
    x1 = np.linspace(10, 20, size)
    x2 = np.linspace(100, 400, size)
    y = x1 ** 2 + x2 + 100
    # 绘制3d图
    # figure = plt.figure()
    # ax = Axes3D(figure)
    # xx1, xx2 = np.meshgrid(x1, x2)
    # yy = xx1 ** 2 + xx2 + 100
    # ax.plot_surface(xx1, xx2, yy, cmap="rainbow")
    # plt.show()
    # 返回组合后的数据
    data = np.vstack((x1, x2, y)).T
    return data

随机生成1000组(x1, x2, y)，其中y = x1^2 + x2 + 100，其3D分布图如下所示：

搭建网络模型

本文将搭建221型神经网络，来学习y = x1^2 + x2 + 100，其示意图如下：

网络使用均方误差作为损失函数，分批次训练。代码如下：

class Network(object):
    def __init__(self, seed_num=7):
        # 构建 2 2 1 型神经网络
        # 随机生成参数
        np.random.seed(seed_num)
        self.w1 = np.random.randn(2, 2)
        self.b1 = np.random.randn(1, 2)
        self.w2 = np.random.randn(2, 1)
        self.b2 = np.random.randn(1, 1)

    def forward(self, x):
        # 前向计算
        self.x2 = np.dot(x, self.w1) + self.b1
        self.x3 = np.dot(self.x2, self.w2) + self.b2
        return self.x3

    def loss(self, y):
        # 使用均方误差作为损失函数
        error = self.x3 - y
        cost = np.power(error, 2)
        return np.mean(cost)

    def update(self, x, y, learn_rate):
        # 用numpy并行计算w2中每个分量的误差偏导，注意是*而不是dot
        gradient_w2 = (self.x3 - y) * self.x2 * 2
        # 求和 + 平均，使每组输入都对w2产生作用
        gradient_w2 = np.mean(gradient_w2, axis=0)
        gradient_w2 = gradient_w2[:, np.newaxis]

        gradient_b2 = (self.x3 - y) * 2
        gradient_b2 = np.mean(gradient_b2)

        gradient_w1 = (self.x3 - y) * 2 * x
        gradient_w1 = np.mean(gradient_w1, axis=0)
        gradient_w1 = gradient_w1[:, np.newaxis].T
        gradient_w1 = np.dot(self.w2, gradient_w1).T
        
        gradient_b1 = (self.x3 - y) * 2
        gradient_b1 = np.mean(gradient_b1)
        gradient_b1 = gradient_b1 * self.w2.T
        # 更新参数
        self.w1 -= learn_rate * gradient_w1
        self.b1 -= learn_rate * gradient_b1
        self.w2 -= learn_rate * gradient_w2
        self.b2 -= learn_rate * gradient_b2

    def train(self, x, y, iterations=50, learn_rate=0.01):
        losses = []
        data_size = x.shape[0]
        batch_size = 50
        # 每次取batch_size个训练
        for i in range(iterations):
            for k in range(0, data_size, batch_size):
                mini_x = x[k: k+batch_size, :]
                mini_y = y[k: k+batch_size, :]
                self.forward(mini_x)
                l = self.loss(mini_y)
                self.update(mini_x, mini_y, learn_rate)
                losses.append(l)
        return losses

训练并画出损失值下降图

# 构建模型
train_x = train_data[:, :-1]
train_y = train_data[:, -1:]
network = Network()
# 迭代20次
iterations = 20
learn_rate = 0.01
losses = network.train(train_x, train_y, iterations, learn_rate)

# 画梯度下降图
plot_x = np.arange(len(losses))
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.show()

测试集测试

# 测试集测试
test_x = test_data[:, :-1]
test_y = test_data[:, -1:]
predict_y = network.forward(test_x)
# 展平并反归一化
scale_factor = maximums[-1] - minimums[-1]
test_y = test_y.reshape(-1) * scale_factor + minimums[-1]
predict_y = predict_y.reshape(-1) * scale_factor + minimums[-1]
# 排序并画图
fig, ax = plt.subplots()
plot_test_x = np.arange(test_x.shape[0])
ax.plot(plot_test_x, np.sort(test_y), label='real')
ax.plot(plot_test_x, np.sort(predict_y), label='predict')
ax.legend()
plt.show()

模型预测与实际值的对比图如下：

存在的问题

模型的预测并不准确且泛化性不好，可能是每层都没激活函数的缘故。

# 预测函数 输入x1, x2 返回预测y
def predict_function(x1, x2):
    input_x = np.array([x1, x2])
    input_x = input_x.reshape(1, 2)
    # 归一化
    for i in range(2):
        input_x[:, i] = (input_x[:, i] - minimums[i]) / (maximums[i] - minimums[i])
    output_y = network.forward(input_x)
    # 反归一化
    scale_factor = maximums[-1] - minimums[-1]
    output_y = output_y.reshape(-1)[0] * scale_factor + minimums[-1]
    return output_y

print(predict_function(20, 400))
# 288.02699335793426 != 300

完整代码

import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


# y = x1^2 + x2 + 100
# 生成训练数据
def generate_data(size=1000, seed_num=7):
    x1 = np.linspace(10, 20, size)
    x2 = np.linspace(100, 400, size)
    y = x1 ** 2 + x2 + 100
    # 绘制3d图
    # figure = plt.figure()
    # ax = Axes3D(figure)
    # xx1, xx2 = np.meshgrid(x1, x2)
    # yy = xx1 ** 2 + xx2 + 100
    # ax.plot_surface(xx1, xx2, yy, cmap="rainbow")
    # plt.show()
    # 返回组合后的数据
    data = np.vstack((x1, x2, y)).T
    return data


class Network(object):
    def __init__(self, seed_num=7):
        # 构建 2 2 1 型神经网络
        # 随机生成参数
        np.random.seed(seed_num)
        self.w1 = np.random.randn(2, 2)
        self.b1 = np.random.randn(1, 2)
        self.w2 = np.random.randn(2, 1)
        self.b2 = np.random.randn(1, 1)

    def forward(self, x):
        # 前向计算
        self.x2 = np.dot(x, self.w1) + self.b1
        self.x3 = np.dot(self.x2, self.w2) + self.b2
        return self.x3

    def loss(self, y):
        # 使用均方误差作为损失函数
        error = self.x3 - y
        cost = np.power(error, 2)
        return np.mean(cost)

    def update(self, x, y, learn_rate):
        # 用numpy并行计算w2中每个分量的误差偏导，注意是*而不是dot
        gradient_w2 = (self.x3 - y) * self.x2 * 2
        # 求和 + 平均，使每组输入都对w2产生作用
        gradient_w2 = np.mean(gradient_w2, axis=0)
        gradient_w2 = gradient_w2[:, np.newaxis]

        gradient_b2 = (self.x3 - y) * 2
        gradient_b2 = np.mean(gradient_b2)

        gradient_w1 = (self.x3 - y) * 2 * x
        gradient_w1 = np.mean(gradient_w1, axis=0)
        gradient_w1 = gradient_w1[:, np.newaxis].T
        gradient_w1 = np.dot(self.w2, gradient_w1).T

        gradient_b1 = (self.x3 - y) * 2
        gradient_b1 = np.mean(gradient_b1)
        gradient_b1 = gradient_b1 * self.w2.T
        # 更新参数
        self.w1 -= learn_rate * gradient_w1
        self.b1 -= learn_rate * gradient_b1
        self.w2 -= learn_rate * gradient_w2
        self.b2 -= learn_rate * gradient_b2

    def train(self, x, y, iterations=50, learn_rate=0.01):
        losses = []
        data_size = x.shape[0]
        batch_size = 50
        # 每次取batch_size个训练
        for i in range(iterations):
            for k in range(0, data_size, batch_size):
                mini_x = x[k: k + batch_size, :]
                mini_y = y[k: k + batch_size, :]
                self.forward(mini_x)
                l = self.loss(mini_y)
                self.update(mini_x, mini_y, learn_rate)
                losses.append(l)
        return losses


if __name__ == '__main__':
    # 生成数据 shape = (1000, 3)
    data = generate_data(1000)
    # 数据归一化
    maximums = np.max(data, axis=0)
    minimums = np.min(data, axis=0)
    for i in range(data.shape[1]):
        data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])
    # 打乱数据，前一百条作为测试集，剩下为训练集
    np.random.shuffle(data)
    test_data = data[:100, :]
    train_data = data[100:, :]

    # 构建模型
    train_x = train_data[:, :-1]
    train_y = train_data[:, -1:]
    network = Network()
    iterations = 20
    learn_rate = 0.01
    losses = network.train(train_x, train_y, iterations, learn_rate)

    # 画梯度下降图
    # plot_x = np.arange(len(losses))
    # plot_y = np.array(losses)
    # plt.plot(plot_x, plot_y)
    # plt.show()

    # # 测试集测试
    test_x = test_data[:, :-1]
    test_y = test_data[:, -1:]
    predict_y = network.forward(test_x)
    # 展平并反归一化
    scale_factor = maximums[-1] - minimums[-1]
    test_y = test_y.reshape(-1) * scale_factor + minimums[-1]
    predict_y = predict_y.reshape(-1) * scale_factor + minimums[-1]
    # 排序并画图
    fig, ax = plt.subplots()
    plot_test_x = np.arange(test_x.shape[0])
    ax.plot(plot_test_x, np.sort(test_y), label='real')
    ax.plot(plot_test_x, np.sort(predict_y), label='predict')
    ax.legend()
    plt.show()

    # 预测函数 输入x1, x2 返回预测y
    def predict_function(x1, x2):
        input_x = np.array([x1, x2])
        input_x = input_x.reshape(1, 2)
        # 归一化
        for i in range(2):
            input_x[:, i] = (input_x[:, i] - minimums[i]) / (maximums[i] - minimums[i])
        output_y = network.forward(input_x)
        # 反归一化
        scale_factor = maximums[-1] - minimums[-1]
        output_y = output_y.reshape(-1)[0] * scale_factor + minimums[-1]
        return output_y

    print(predict_function(20, 400))
    # 288.02699335793426 != 300

参考链接

用python和numpy实现神经网络

手写一层神经网络来拟合函数y = x1^2 + x2 + 100

Python相关栏目本月热门文章