三层BP神经网络如上图所示。其中,
x
i
x_i
xi表示第
i
i
i个输入层节点的输入值,也是其输出值,
z
j
z_j
zj表示第
j
j
j个隐藏层节点的输出值,
y
k
y_k
yk表示第
k
k
k个输出层节点的输出值,
v
i
j
v_{ij}
vij表示从第i个输入层节点到第j个隐藏层节点的权重,
w
j
k
w_{jk}
wjk表示从第
j
j
j个隐藏层节点到第
k
k
k个输出层节点的权重,第
j
j
j个隐藏层节点的阈值为
θ
j
theta_j
θj,第
k
k
k个输出层节点的阈值为
γ
k
gamma_k
γk,激活函数采用Sigmoid函数:
f
(
x
)
=
1
1
+
e
−
x
fleft(xright)=frac{1}{1+e^{-x}}
f(x)=1+e−x1 Sigmoid函数的导数:
f
′
(
x
)
=
f
(
x
)
(
1
−
f
(
x
)
)
f'(x)=f(x)big(1-f(x)big)
f′(x)=f(x)(1−f(x)) 第
j
j
j个隐藏层节点的输入:
α
j
=
∑
i
v
i
j
x
i
alpha_j=sum_{i}{v_{ij}x_i}
αj=i∑vijxi第j个隐藏层节点的输出:
z
j
=
f
(
α
j
−
θ
j
)
z_j=fleft(alpha_j-theta_jright)
zj=f(αj−θj) 第k个输出层节点的输入:
β
k
=
∑
j
w
j
k
z
j
beta_k=sum_{j}{w_{jk}z_j}
βk=j∑wjkzj 第k个输出层节点的输出:
y
k
=
f
(
β
k
−
γ
k
)
y_k=fleft(beta_k-gamma_kright)
yk=f(βk−γk) 输出层的误差函数:
E
=
∑
k
(
y
k
−
t
k
)
2
E=sum_{k}left(y_k-t_kright)^2
E=k∑(yk−tk)2 其中
t
k
t_k
tk是训练集的真实标签。根据反向传播的误差调整节点之间的连接权重,每一个权重的修正方向是误差函数梯度的反方向。设
η
eta
η为学习率,
Δ
w
j
k
Delta w_{jk}
Δwjk的计算公式:
Δ
w
j
k
=
−
η
∂
E
∂
w
j
k
=
−
η
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
z
j
begin{aligned} Delta w_{jk}&=-etafrac{partial E}{partial w_{jk}} \ &=-etaleft(y_k-t_kright)y_k(1-y_k)z_j end{aligned}
Δwjk=−η∂wjk∂E=−η(yk−tk)yk(1−yk)zj
w
j
k
w_{jk}
wjk的更新公式:
w
j
k
n
e
w
=
w
j
k
+
Δ
w
j
k
w_{jk new}=w_{jk}+Delta w_{jk}
wjk new=wjk+Δwjk
Δ
γ
k
Delta gamma_k
Δγk的计算公式:
Δ
γ
k
=
−
η
∂
E
∂
γ
k
=
η
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
begin{aligned} Delta gamma_k&=-etafrac{partial E}{partial gamma_k} \ &=etaleft(y_k-t_kright)y_k(1-y_k) end{aligned}
Δγk=−η∂γk∂E=η(yk−tk)yk(1−yk)
γ
k
gamma_k
γk的更新公式:
γ
k
n
e
w
=
γ
k
+
Δ
γ
k
gamma_{k new}=gamma_k+Delta gamma_k
γk new=γk+Δγk
Δ
v
i
j
Delta v_{ij}
Δvij的计算公式:
Δ
v
i
j
=
−
η
∂
E
∂
v
i
j
=
−
η
∑
k
(
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
w
j
k
)
z
j
(
1
−
z
j
)
x
i
begin{aligned} Delta v_{ij}&=-etafrac{partial E}{partial v_{ij}} \ &=-eta sum_k big((y_k-t_k)y_k(1-y_k)w_{jk}big)z_j(1-z_j)x_i end{aligned}
Δvij=−η∂vij∂E=−ηk∑((yk−tk)yk(1−yk)wjk)zj(1−zj)xi
v
i
j
v_{ij}
vij的更新公式:
v
i
j
n
e
w
=
v
i
j
+
Δ
v
i
j
v_{ij new}=v_{ij}+Delta v_{ij}
vij new=vij+Δvij
Δ
θ
j
Delta theta_j
Δθj的计算公式:
Δ
θ
j
=
−
η
∂
E
∂
θ
j
=
η
∑
k
(
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
w
j
k
)
z
j
(
1
−
z
j
)
begin{aligned} Delta theta_j&=-etafrac{partial E}{partial theta_j} \ &=eta sum_k big((y_k-t_k)y_k(1-y_k)w_{jk}big)z_j(1-z_j) end{aligned}
Δθj=−η∂θj∂E=ηk∑((yk−tk)yk(1−yk)wjk)zj(1−zj)
θ
j
theta_j
θj的更新公式:
θ
j
n
e
w
=
θ
j
+
Δ
θ
j
theta_{j new}=theta_j+Delta theta_j
θj new=θj+Δθj 数据预处理采用 Z-score 算法:
x
n
e
w
=
x
−
μ
σ
x_{new}=frac{x-mu}{sigma}
xnew=σx−μ 其中
μ
mu
μ是样本均值,
σ
sigma
σ是样本的标准差,该算法使样本数据符合均值为0,标准差为1的标准正态分布。
BP神经网络的训练步骤如下图所示。
//
#ifndef BPNN_H
#define BPNN_H
#define MAX_NUM_INPUT 260 // maximum nodes number of input layer
#define MAX_NUM_HIDDEN 100 // maximum nodes number of hidden layer
#define MAX_NUM_OUTPUT 1 // maximum nodes' number of output layer
#define MAX_NUM_LAYER_OUT 260
typedef struct BPNN
{
int trained; // 0 untrained, 1 trained
int num_input; // nodes number of input layer
int num_hidden; // nodes number of hidden layer
int num_output; // nodes number of output layer
double rate; // learning rate
double weight_input_hidden[MAX_NUM_INPUT][MAX_NUM_HIDDEN]; // weight of the input layer to the hidden layer
double weight_hidden_output[MAX_NUM_HIDDEN][MAX_NUM_OUTPUT]; // weight of the hidden layer to the output layer
double threshold_hidden[MAX_NUM_HIDDEN]; // threshold of hidden layer
double threshold_output[MAX_NUM_OUTPUT]; // threshold of output layer
double error[MAX_NUM_OUTPUT]; // error of output of each node
double error_total; // total error
double mean_std[MAX_NUM_INPUT][2]; // mean and standard deviation of training data
}BPNN;
void bpnn_Init(BPNN *bpnn_ptr,int num_input,int num_hidden,int num_output,double learn_rate);
void bpnn_ForwardPropagation(BPNN *bpnn_ptr,const double *data,const double *label,double *layer_out);
void bpnn_BackPropagation(BPNN *bpnn_ptr,const double *layer_out);
void bpnn_Train(BPNN *bpnn_ptr,double *data,double *label,int num_sample,int num_input,int num_hidden,int num_output,double learn_rate,int num_iter);
void bpnn_Predict(BPNN *bpnn_ptr,double *data,double *label,int num_sample);
void bpnn_FileOutput(BPNN *bpnn_ptr,char *model);
void bpnn_LoadModel(BPNN *bpnn_ptr,char *model);
void bpnn_Normalize(BPNN *bpnn_ptr,double *x,int row,int col);
void Min_Max(double *x,int row,int col);
double Zscore(double x,double mean,double std);
double Sigmoid(double x);
#endif
//
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "math.h"
#include "time.h"
#include "BPNN.h"
void bpnn_Init(BPNN *bpnn_ptr,int num_input,int num_hidden,int num_output,double learn_rate)
{
int i,j;
bpnn_ptr->trained = 0;
bpnn_ptr->num_input = num_input;
bpnn_ptr->num_hidden = num_hidden;
bpnn_ptr->num_output = num_output;
bpnn_ptr->rate = learn_rate;
bpnn_ptr->error_total = 0;
srand((unsigned)time(NULL));// set random number seed
for (i=0;i
for(j=0;j
bpnn_ptr->weight_input_hidden[i][j] = ((double)rand())/RAND_MAX-0.5; // init weight to [-0.5, 0.5]
}
}
for (i=0;i
bpnn_ptr->threshold_hidden[i] = 0; // init threshold of hidden layer to 0
for(j=0;j
bpnn_ptr->weight_hidden_output[i][j] = ((double)rand())/RAND_MAX-0.5; // init weight to [-0.5, 0.5]
}
}
for(j=0;j
bpnn_ptr->threshold_output[j] = 0; // init threshold of output layer to 0
bpnn_ptr->error[j] = 0; // init error of output to 0
}
}
void bpnn_ForwardPropagation(BPNN *bpnn_ptr,const double *data,const double *label,double *layer_out)
{
int i,j;
double temp;
for(i=0;inum_input;i++) // calculate output of input layer
{
layer_out[i] = data[i];
}
for(j=0;jnum_hidden;j++) // calculate output of hidden layer.
{
temp = -(bpnn_ptr->threshold_hidden[j]);
for(i=0;inum_input;i++)
{
temp += (bpnn_ptr->weight_input_hidden[i][j])*layer_out[i];
}
layer_out[MAX_NUM_LAYER_OUT+j] = Sigmoid(temp);
}
bpnn_ptr->error_total = 0;
for(j=0;jnum_output;j++) // calculate output of output layer.
{
temp = -(bpnn_ptr->threshold_output[j]);
for(i=0;inum_hidden;i++)
{
temp += (bpnn_ptr->weight_hidden_output[i][j])*layer_out[MAX_NUM_LAYER_OUT+i];
}
layer_out[2*MAX_NUM_LAYER_OUT+j] = Sigmoid(temp);
bpnn_ptr->error[j] = layer_out[2*MAX_NUM_LAYER_OUT+j]-label[j];
bpnn_ptr->error_total += 0.5l*(bpnn_ptr->error[j])*(bpnn_ptr->error[j]);
}
}
void bpnn_BackPropagation(BPNN *bpnn_ptr,const double *layer_out)
{
double g[MAX_NUM_OUTPUT],e[MAX_NUM_HIDDEN],t;
double rate;
int i,j;
rate = (bpnn_ptr->rate);
for(i=0;inum_output;i++)
{
g[i] = (bpnn_ptr->error[i])*(layer_out[2*MAX_NUM_LAYER_OUT+i])*(1-layer_out[2*MAX_NUM_LAYER_OUT+i]);
bpnn_ptr->threshold_output[i] += rate*g[i];
}
for(i=0;inum_hidden;i++)
{
for(j=0;jnum_output;j++)
{
bpnn_ptr->weight_hidden_output[i][j] += -rate*g[j]*layer_out[MAX_NUM_LAYER_OUT+i];
}
}
for(i=0;inum_hidden;i++)
{
t = 0;
for(j=0;jnum_output;j++)
{
t += (bpnn_ptr->weight_hidden_output[i][j])*g[j];
}
e[i] = t*layer_out[MAX_NUM_LAYER_OUT+i]*(1-layer_out[MAX_NUM_LAYER_OUT+i]);
bpnn_ptr->threshold_hidden[i] += rate*e[i];
}
for(i=0;inum_input;i++)
{
for(j=0;jnum_hidden;j++)
{
bpnn_ptr->weight_input_hidden[i][j] += -rate*e[j]*layer_out[i];
}
}
}
void bpnn_Train(BPNN *bpnn_ptr,double *data,double *label,int num_sample,int num_input,int num_hidden,int num_output,double learn_rate,int num_iter)
{
int iter,sample,i;
double layer_out[3][MAX_NUM_LAYER_OUT]; // layer_out[i][j] output of node j in layer i, i = 0 input, i = 1 hidden, i = 2 output
printf("Training...rn");
bpnn_Init(bpnn_ptr,num_input,num_hidden,num_output,learn_rate);
bpnn_Normalize(bpnn_ptr,data,num_sample,num_input);
Min_Max(label,num_sample,num_output);
for(iter=0;iter
for(sample=0;sample
bpnn_ForwardPropagation(bpnn_ptr,&data[sample*num_input],&label[sample*num_output],&layer_out[0][0]);
bpnn_BackPropagation(bpnn_ptr,&layer_out[0][0]);
}
if(bpnn_ptr->error_total<0.0000001)
break;
}
bpnn_ptr->trained = 1;
printf("Training over!rnerror rate: %.4frniteration times: %drn",bpnn_ptr->error_total,iter);
}
void bpnn_Predict(BPNN *bpnn_ptr,double *data,double *label,int num_sample)
{
double layer_out[3][MAX_NUM_LAYER_OUT]; // layer_out[i][j] output of node j in layer i, i = 0 input, i = 1 hidden, i = 2 output
int i,j;
if(bpnn_ptr->trained == 0)
{
printf("Network untrained!");
return;
}
bpnn_Normalize(bpnn_ptr,data,num_sample,bpnn_ptr->num_input); // data have to be normalized
for(i=0;i
bpnn_ForwardPropagation(bpnn_ptr,&data[i*(bpnn_ptr->num_input)],&label[i*(bpnn_ptr->num_output)],&layer_out[0][0]);
for(j=0;jnum_output;j++)
{
label[i*(bpnn_ptr->num_output)+j] = layer_out[2][j];
}
}
}
void bpnn_FileOutput(BPNN *bpnn_ptr,char *model)
{
FILE *file = NULL;
int i,j;
file = fopen("bpnn_out.txt","w");
if(file == NULL)
{
printf("Error!");
exit(1);
}
fprintf(file,"Number of nodes in input layer: %dn",bpnn_ptr->num_input);
fprintf(file,"Number of nodes in hidden layer: %dn",bpnn_ptr->num_hidden);
fprintf(file,"Number of nodes in output layer: %dn",bpnn_ptr->num_output);
fprintf(file,"nHidden layer threshold: ");
for(i=0;inum_hidden;i++)
{
fprintf(file," %.2lf ",(bpnn_ptr->threshold_hidden[i]));
}
fprintf(file,"nOutput layer threshold: ");
for(i=0;inum_output;i++)
{
fprintf(file," %.2lf ",(bpnn_ptr->threshold_output[i]));
}
fprintf(file,"nnWeight of input layer to hidden layer: ");
for(i=0;inum_input;i++)
{
fprintf(file,"n%d row: ",i);
for(j=0;jnum_hidden;j++)
{
fprintf(file," %.2lf ",(bpnn_ptr->weight_input_hidden[i][j]));
}
}
fprintf(file,"nnWeight of input layer to hidden layer: ");
for(i=0;inum_hidden;i++)
{
fprintf(file,"n%d row: ",i);
for(j=0;jnum_output;j++)
{
fprintf(file," %.3lf ",(bpnn_ptr->weight_hidden_output[i][j]));
}
}
fprintf(file,"nn"%s" is network model.",model);
fclose(file);
file = fopen(model,"wb");
if(file == NULL)
{
printf("Error!");
exit(1);
}
fwrite(bpnn_ptr,sizeof(BPNN),1,file);
fclose(file);
}
void bpnn_LoadModel(BPNN *bpnn_ptr,char *model)
{
FILE *file = NULL;
file = fopen(model,"rb");
if(file == NULL)
{
printf("Error!");
exit(1);
}
fread(bpnn_ptr,sizeof(BPNN),1,file);
fclose(file);
}
void bpnn_Normalize(BPNN *bpnn_ptr,double *x,int row,int col)
{
double sum1,sum2,mean,std;
int i,j;
if(bpnn_ptr->trained)
{
for(j=0;j
for(i=0;i
x[i*col+j] = Zscore(x[i*col+j],bpnn_ptr->mean_std[j][0],bpnn_ptr->mean_std[j][1]);
}
}
return;
}
for(j=0;j
sum1 = 0;
sum2 = 0;
for(i=0;i
sum1 += x[i*col+j];
sum2 += x[i*col+j]*x[i*col+j];
}
mean = sum1/row;
std = pow((sum2/row)-(mean*mean),0.5);
bpnn_ptr->mean_std[j][0] = mean; // mean value
bpnn_ptr->mean_std[j][1] = std; // standard deviation
for(i=0;i
x[i*col+j] = Zscore(x[i*col+j],mean,std);
}
}
}
void Min_Max(double *x,int row,int col)
{
double max,min,temp;
int i,j;
for(j=0;j
max = x[j];
min = x[j];
for(i=0;i
temp = x[i*col+j];
max = (temp>max)?temp:max;
min = (temp
temp = x[i*col+j];
x[i*col+j] = (temp-min)/(max-min);
}
}
}
double Zscore(double x,double mean,double std)
{
return (x-mean)/std;
}
double Sigmoid(double x)
{
return 1.0l/(1.0l+exp(-x));
}
核心代码都贴出来了,代码里每个函数都有注释,因为vscode的中文时不时有乱码,所以翻译成英文了,机翻的可能不标准。使用时,先将训练数据集和测试数据集读到数组(数组格式请看代码注释)里,然后调用bpnn_Train函数训练网络,再调用bpnn_predict函数预测测试数据,其他函数功能请看注释。
完整代码下载地址:三层BP神经网络C语言代码 。



