【Python代码】局部线性回归（随机梯度下降方法）

原文在这里

'''
 这部分代码是lwls.py文件，基于随机梯度下降方法，
 这与CSDN大部分相关文章求解正规方程（中间需要求逆）的方法不同，
 这里不需要求矩阵的逆。
 但是，它带来的另外一个问题是如何调参，如何找到最好的参数…………
 我暂时还没找到好方法……
 欢迎留言评论
 '''
# Locally Weighted Linear Regression is very inefficient because Parameters are calculated again for each test case
# But, it should give good results after tuning the hyper-parameter tau

import csv
import math
import numpy

def converge(t):
	for i in t:
		if abs(i) > epsilon:
			return False
	return True

def stochastic_gradient_descent(w,theta):
	for _ in range(max_n):
		for i in range(len(X_s)):
			x = numpy.array(X_s[i])
			t = [0]*len(theta)
			for j in range(len(theta)):
				t[j] = alpha*w[i]*(Y_s[i]-numpy.dot(numpy.array(theta),x))*x[j]
			for j in range(len(theta)):
				theta[j] = theta[j] + t[j]
				# print(theta)
			# if converge(t):
			# 	return theta
	return theta

def get_data(name):
	data = []
	with open(name, 'r') as csv_file:
		reader = csv.reader(csv_file)
		for row in reader:
			for i in range(len(row)):
				row[i] = float(row[i])
			data = data + [row]
	return data # 返回一个二维列表，里面的一个列表表示一个样本，样本不含常数项（1），含y值标签，且该标签在各样本的末尾处

def arrange_data(data): #把data文件中样本集（含特征和标签），划分成特征、标签，并将其返回
	Xs = [[]]*len(data) # 二维列表
	Ys = [[]]*len(data)
	for i in range(len(data)):
		Xs[i] = data[i][:-1]+[1] # 添加常数项
		Ys[i] = data[i][-1]
	return Xs,Ys

def weight(x_i,x): # 计算局部测试数据点特征x，与，训练数据的特征x_i，的权重
	x_i = numpy.array(x_i)
	x = numpy.array(x)
	temp = x_i-x
	temp = numpy.dot(temp,temp)
	return math.exp(-1.0*temp/(2*tau*tau))

def get_weights(Xs,x): # 计算局部测试数据点特征x，与，整个训练集特征Xs，的权重，即该测试点的权重
	# return [1]*len(Xs) # Uncomment If you want standard Linear Regression
	weights = [0]*len(Xs)
	for i in range(len(weights)):
		weights[i] = weight(Xs[i],x)
	return weights

def get_parameters(w,n): # 计算参数theta,以备后续get_prediction
	theta = [0]*n
	theta = stochastic_gradient_descent(w,theta) # 使用随机梯度下降更新每个测试数据点的参数，这里是不同于大多数代码的地方！
	# print(theta)
	return theta

def get_prediction(w,x):
	theta = get_parameters(w,len(x))
	prediction = numpy.dot(numpy.array(theta),numpy.array(x))
	return prediction

data_train = get_data('hw_data_train.csv')
data_test = get_data('hw_data_test.csv')

X_s,Y_s = arrange_data(data_train)
Xts,Yts = arrange_data(data_test)

# (HYPER-)PARAMETERS
tau = 0.1 # Weight Parameter
alpha = 0.01 # Learning Rate
max_n = 1000 # Stochastic Gradient Descent Loops
epsilon = 0.0001 # Stochastic Gradient Descent Tolerance [not using here, though]


variance = float(0)
for i in range(len(Xts)):
	x = Xts[i]
	y = Yts[i]
	w = get_weights(X_s,x)
	prediction = get_prediction(w,x)
	print("Actual: " + str(y) + " Predicted: " + str(prediction))
	variance = variance + (prediction-y)**2
variance = variance/len(Xts)
print("Variance: ",variance)

# 这部分用来生成训练和测试数据
import numpy

num_train = 100
num_test = 20

with open("data_train.csv",'a') as file:
	x = -5
	for i in range(num_train):
		y = (x)**2 + 0*numpy.random.normal(0,1)
		file.write(str(x)+","+str(y)+"n")
		x = x + 10/num_train

with open("data_test.csv",'a') as file:
	x = -5
	for i in range(num_test):
		y = (x)**2 + 0*numpy.random.normal(0,1)
		file.write(str(x)+","+str(y)+"n")
		x = x + 10/num_test

【Python代码】 局部线性回归（随机梯度下降方法）

Python相关栏目本月热门文章

【Python代码】局部线性回归（随机梯度下降方法）