RBFNN三种权值更新策略及Python实现

RBFNNRBFNN三种权值更新策略及Python实现

- Experimental content
- Experimental results
- - Gradient descent method
  - Self-origanised method
  - - random
    - k-means
- Training data sets are provided
- - First method
  - Second method
  - Third method
- Experimental analysis
- Conclusions

Experimental content

Experiment One: complete the coding for RBFNN
Experiment Two: complete the coding for RBFNN by using the following method of calculating mu and sigma.
Training data sets are provided
There are three data sets: xor data, moon data and circle data. You can use one of the provided data to train and test your RBFNN, or you can conduct experiments on all the three data sets.

Experimental results Gradient descent method

# -*- coding: utf-8 -*-
# @Time    : 2022/5/6 8:22
# @Author  : sido
# @FileName: RBF5.py
# @Software: PyCharm

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
# 设置matplotlib正常显示中文和负号
matplotlib.rcParams['font.sans-serif']=['SimHei']   # 用黑体显示中文
matplotlib.rcParams['axes.unicode_minus']=False     # 正常显示负号


np.random.seed(20)


class RBF:
    def __init__(self, input_dim, hidden_node, out_dim, threshold=1.8, a=0.1, K=10):
        self.input_dim = input_dim  # 输入层的维度
        self.hidden_node = hidden_node  # 隐藏层的单元个数
        self.out_dim = out_dim  # 输出层的维度
        self.threshold = threshold  # 阈值，用于将输出结果处理为标签
        self.a = a  # 学习率
        self.Sigma = np.random.randn(hidden_node)  # 初始化sigma
        self.U = np.random.randn(hidden_node, input_dim)  # 初始化mu
        self.W = np.random.randn(hidden_node)  # 隐藏层到输出层的权重矩阵
        self.K = 10  # 训练退出条件

    def _basisfunc(self, c, d, ci):  # 激活函数
        return np.exp(-0.5 * (1 / self.Sigma[ci] ** 2) * np.linalg.norm(c - d) ** 2)

    def _calcAct(self, X):  # 计算通过激活函数的输出
        G = np.zeros(self.hidden_node, dtype=np.float64)
        for ci, c in enumerate(self.U):
            G[ci] = self._basisfunc(c, X, ci)
        return G

    def train(self, X, Y):  # 训练模型
        for k in range(self.K):  # 训练k次后退出
            for i in range(len(X)):
                G = self._calcAct(X[i])
                y_pred = G @ self.W
                if y_pred > self.threshold:
                    y_pred = 2
                else:
                    y_pred = 1
                error = Y[i] - y_pred
                for j in range(self.hidden_node):  # 计算梯度，并对参数进行更新
                    u = -self.W[j] / self.Sigma[j] ** 2 * error * G[j] * (X[i] - self.U[j])
                    sigma = -self.W[j] / self.Sigma[j] ** 3 * error * G[j] * np.dot((X[i] - self.U[j]), (X[i] - self.U[j]).T)
                    w = -error * G[j]

                    self.U[j] = self.U[j] - self.a * u
                    self.Sigma[j] = self.Sigma[j] - self.a * sigma
                    self.W[j] = self.W[j] - self.a * w
            if k % 1 == 0:
                print("n-------- k: ", k+1,
                      "n-- centers: ", self.U,
                      "n---- Sigma: ", self.Sigma,
                      "n-------- W: ", self.W,
                      "n")
            # self.W = np.linalg.pinv(G) @ Y

    def get_loss(self, Y_real, Y_pred):
        assert len(Y_real) == len(Y_pred), "标签Shape 不等于 预测Shape"
        loss = (Y_real - Y_pred) @ (Y_real - Y_pred).T / len(Y_real)
        return loss


    def predict(self, X):  # 预测输出
        G = np.zeros((X.shape[0], self.hidden_node), dtype=np.float64)
        for ci, c in enumerate(self.U):
            for xi, x in enumerate(X):
                G[xi, ci] = self._basisfunc(c, x, ci)
        Y = G @ self.W
        for i in range(len(Y)):
            Y[i] = 1 if Y[i] < self.threshold else 2
        return Y


if __name__ == '__main__':
    # ---------- 读取数据----------
    X_File_Path = "./Data/moon data/X.txt"
    Y_File_Path = "./Data/moon data/y.txt"

    with open(X_File_Path) as fx, open(Y_File_Path) as fy:
        X = fx.readlines()
        Y = fy.readlines()

        for i in range(len(X)):
            X[i] = list(X[i].split())
            X[i] = list(map(float, X[i]))
        Y = list(map(float, Y))

        X = np.array(X)
        Y = np.array(Y)

    # ---------- 训练模型并预测结果 ----------
    rbf = RBF(2, 5, 1)  # 实例化对象
    rbf.train(X, Y)  # 训练模型
    z = rbf.predict(X)  # 预测结果
    loss = rbf.get_loss(Y, z)
    print("loss: ", loss)

    # ---------- 结果可视化 ----------
    plt.figure(figsize=(6, 8))
    plt.suptitle("RBFNN", fontsize=28, color='r')
    ax = plt.subplot(211)
    ax.set_title("原始", loc="left", fontsize=18)
    plt.scatter(X[:, 0], X[:, 1], c=Y)

    ax = plt.subplot(212)
    ax.set_title("预测", loc="left", fontsize=18)
    plt.scatter(X[:, 0], X[:, 1], c=z)
    plt.show()

Self-origanised method

random

# -*- coding: utf-8 -*-
# @Time    : 2022/5/5 23:16
# @Author  : sido
# @FileName: RBF4.py
# @Software: PyCharm
import numpy
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
# 设置matplotlib正常显示中文和负号
matplotlib.rcParams['font.sans-serif']=['SimHei']   # 用黑体显示中文
matplotlib.rcParams['axes.unicode_minus']=False     # 正常显示负号


np.random.seed(20)


class RBF:
    def __init__(self, input_dim, num_centers, out_dim, threshold=1.8):
        self.input_dim = input_dim
        self.num_centers = num_centers
        self.out_dim = out_dim
        self.beta = np.zeros(num_centers)
        self.centers = [np.random.uniform(-1, 1, input_dim) for i in range(num_centers)]
        self.W = np.random.random((num_centers, out_dim))
        self.threshold = threshold

    def _basisfunc(self, c, d, ci):
        return np.exp(-self.beta[ci] * np.linalg.norm(c-d)**2)

    def _calcAct(self, X):
        G = np.zeros((X.shape[0], self.num_centers), dtype=np.float64)
        for ci, c in enumerate(self.centers):
            for xi, x in enumerate(X):
                G[xi, ci] = self._basisfunc(c, x, ci)
        return G

    def train(self, X, Y):
        rnd_idx = np.random.permutation(X.shape[0])[:self.num_centers]
        self.centers = [X[i, :] for i in rnd_idx]

        for i in range(self.num_centers):
            for j in range(self.num_centers):
                c = np.linalg.norm(self.centers[i]-self.centers[j])
                self.beta[i] = c / pow(2*self.num_centers, 0.5) if c > self.beta[i] else self.beta[i]

        G = self._calcAct(X)

        self.W = np.linalg.pinv(G) @ Y

    def predict(self, X):  # 预测输出
        G = np.zeros((X.shape[0], self.num_centers), dtype=np.float64)
        for ci, c in enumerate(self.centers):
            for xi, x in enumerate(X):
                G[xi, ci] = self._basisfunc(c, x, ci)
        Y = G @ self.W
        for i in range(len(Y)):
            Y[i] = 1 if Y[i] < self.threshold else 2
        return Y

    def get_loss(self, Y_real, Y_pred):
        assert len(Y_real) == len(Y_pred), "标签Shape 不等于 预测Shape"
        loss = (Y_real - Y_pred) @ (Y_real - Y_pred).T / len(Y_real)
        return loss

if __name__ == '__main__':
    # ---------- 读取数据----------
    X_File_Path = "./Data/moon data/X.txt"
    Y_File_Path = "./Data/moon data/y.txt"

    with open(X_File_Path) as fx, open(Y_File_Path) as fy:
        X = fx.readlines()
        Y = fy.readlines()

        for i in range(len(X)):
            X[i] = list(X[i].split())
            X[i] = list(map(float, X[i]))
        Y = list(map(float, Y))

        X = np.array(X)
        Y = np.array(Y)

    # ---------- 训练模型并预测结果 ----------
    rbf = RBF(2, 100, 1)  # 实例化对象
    rbf.train(X, Y)  # 训练模型
    z = rbf.predict(X)  # 预测结果
    loss = rbf.get_loss(Y, z)
    print("loss: ", loss)

    # ---------- 结果可视化 ----------
    plt.figure(figsize=(6, 8))
    plt.suptitle("RBFNN", fontsize=28, color='r')
    ax = plt.subplot(211)
    ax.set_title("原始", loc="left", fontsize=18)
    plt.scatter(X[:, 0], X[:, 1], c=Y)

    ax = plt.subplot(212)
    ax.set_title("预测", loc="left", fontsize=18)
    plt.scatter(X[:, 0], X[:, 1], c=z)
    plt.show()

k-means

# -*- coding: utf-8 -*-
# @Time    : 2022/5/5 23:16
# @Author  : sido
# @FileName: RBF4.py
# @Software: PyCharm
import numpy
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.cluster import KMeans
# 设置matplotlib正常显示中文和负号
matplotlib.rcParams['font.sans-serif']=['SimHei']   # 用黑体显示中文
matplotlib.rcParams['axes.unicode_minus']=False     # 正常显示负号


np.random.seed(20)


class RBF:
    def __init__(self, input_dim, num_centers, out_dim, threshold=1.5):
        self.input_dim = input_dim
        self.num_centers = num_centers
        self.out_dim = out_dim
        self.beta = np.zeros(num_centers)
        self.centers = [np.random.uniform(-1, 1, input_dim) for i in range(num_centers)]
        self.W = np.random.random((num_centers, out_dim))
        self.threshold = threshold

    def _basisfunc(self, c, d, ci):
        return np.exp(-self.beta[ci] * np.linalg.norm(c-d)**2)

    def _calcAct(self, X):
        G = np.zeros((X.shape[0], self.num_centers), dtype=np.float64)
        for ci, c in enumerate(self.centers):
            for xi, x in enumerate(X):
                G[xi, ci] = self._basisfunc(c, x, ci)
        return G

    def train(self, X, Y):
        kmeans = KMeans(n_clusters=self.num_centers, random_state=0).fit(X)
        self.centers = kmeans.cluster_centers_

        for i in range(self.num_centers):
            for j in range(self.num_centers):
                c = np.linalg.norm(self.centers[i]-self.centers[j])
                self.beta[i] = c / pow(2*self.num_centers, 0.5) if c > self.beta[i] else self.beta[i]

        G = self._calcAct(X)

        self.W = np.linalg.pinv(G) @ Y

    def predict(self, X):  # 预测输出
        G = np.zeros((X.shape[0], self.num_centers), dtype=np.float64)
        for ci, c in enumerate(self.centers):
            for xi, x in enumerate(X):
                G[xi, ci] = self._basisfunc(c, x, ci)
        Y = G @ self.W
        for i in range(len(Y)):
            Y[i] = 1 if Y[i] < self.threshold else 2
        return Y

    def get_loss(self, Y_real, Y_pred):
        assert len(Y_real) == len(Y_pred), "标签Shape 不等于 预测Shape"
        loss = (Y_real - Y_pred) @ (Y_real - Y_pred).T / len(Y_real)
        return loss

if __name__ == '__main__':
    # ---------- 读取数据----------
    X_File_Path = "./Data/moon data/X.txt"
    Y_File_Path = "./Data/moon data/y.txt"

    with open(X_File_Path) as fx, open(Y_File_Path) as fy:
        X = fx.readlines()
        Y = fy.readlines()

        for i in range(len(X)):
            X[i] = list(X[i].split())
            X[i] = list(map(float, X[i]))
        Y = list(map(float, Y))

        X = np.array(X)
        Y = np.array(Y)

    # ---------- 训练模型并预测结果 ----------
    rbf = RBF(2, 50, 1)  # 实例化对象
    rbf.train(X, Y)  # 训练模型
    z = rbf.predict(X)  # 预测结果
    loss = rbf.get_loss(Y, z)
    print("loss: ", loss)

    # ---------- 结果可视化 ----------
    plt.figure(figsize=(6, 8))
    plt.suptitle("RBFNN", fontsize=28, color='r')
    ax = plt.subplot(211)
    ax.set_title("原始", loc="left", fontsize=18)
    plt.scatter(X[:, 0], X[:, 1], c=Y)

    ax = plt.subplot(212)
    ax.set_title("预测", loc="left", fontsize=18)
    plt.scatter(X[:, 0], X[:, 1], c=z)
    plt.show()

Training data sets are provided First method

moon data
circle data
xor data

Second method

moon data
circle data
xor data

Third method

moon data
circle data
xor data

Experimental analysis

从实验的结果来看，在所给数据集下，第三种方法优于第二种方法，第二种方法优于第一种方法。
三种方法在前两种类型的数据的效果比较不错
第一中方法在moon data和circle data可以快速的收敛，只需要训练四到五轮就可以达到较好的预测结果，但在xor data上表现不佳，第一种方法需要调节四个参数，学习率a，推出条件k, 阈值threshhold和神经元个数hidden_out，在拟合第三种数据时，调节良久都未调节出一个好的预测结果，我以为有以下两个原因：
- 一是需要调节的参数过多，难以调节
- 二是没有实战经验，对参数调节没有直觉上的认识
- 三是有可能这种方法不适合这种类型的数据（可能是我太菜了）
第二种方法在对第三种数据的拟合效果优于第一种方法，但我通过仅仅调节神经元个数没法进一步提高拟合的效果，仅调节阈值和可能会使效果更差。同时调节二者难以找到一个平衡位置来使分类效果最好。
第三种方法在三种数据上都有较好的性能，相较于第二种方法开销会更大，需要使用k-Means算法来找到mu。
实验实现起来整体比较困难，无论是梯度下降中的前向传播和反向传播，公式的推导和矩阵等的运算都极其容易出错

Conclusions

虽然勉强完成实验的内容，但是实验还很多的缺陷。由于个人能力的问题，可能在公式推导、代码实现上还有很大的问题。如在梯度下降中没有计算训练过程中的损失，也没有可视化训练的损失。代码比较冗余，三种方法分别采用了三个python文件来实现，其中还有很多共用的代码。可以采用继承和方法重写等方法来减少重复的代码量。
收获：

在自己不断参考前人的博客，笔记，老师的ppt后，经过一下午不断的写代码，调试代码，终于基本实现的实验的内容，这是一个很大的收获。
本来自己很不习惯用类，用面向对象的方法来写代码的，鉴于前面做实验都没有使用面向对象的方法来写代码，所以强制自己学并使用面向对象的方法来完成实验，期间也在b站，博客上学习了许多与面向对象相关的视频和文章，对面向对象的方法有了更深的理解。
在代码的编写和调试的过程中也收获了许多，一是提高了代码的编写能力，二是尝试用pytharm中的debug来调试代码，减少了在需要查看的地方写print语句，导致代码量上升，代码也不美观。
最后也在写代码的过程中有一些感悟，尽量不要去修改已有代码来适应输入的变化，尽量修改输入的样式来适应代码，不然可能导致代码越来越来，越改越错。

RBFNN三种权值更新策略及Python实现

Python相关栏目本月热门文章