栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

K近邻法简单尝试

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

K近邻法简单尝试

还是不使用sklearn:

class KNN():
	def __init__(self,X_train,Y_train,n_neighbors=3,p=2):
		self.n=n_neighbors # 邻居个数默认为3
		self.p=p #默认为欧式距离
		self.X_train=X_train
		self.Y_train=Y_train

	def prediction(self,X):
		knn_list=[]
		#先取出训练集中n个点,knn_list存放测试集点与训练集点的距离及其对应标签
		for i in range(self.n):
			dist=np.linalg.norm(X-self.X_train[i],ord=self.p)
			#linalg=linear(线性)+algebra(代数),norm则表示范数。
			knn_list.append((dist,self.Y_train[i]))
		#再取出训练集剩下的点,然后与n_neighbors个点比较大小,将距离大的点更新
        #保证knn_list列表中的点是距离最小的点
		for i in range(self.n,len(self.X_train)):
			max_index=knn_list.index(max(knn_list,key=lambda x:x[0]))
			dist=np.linalg.norm(X-self.X_train[i],ord=self.p)
			if knn_list[max_index][0]>dist:
				knn_list[max_index]=(dist,self.Y_train[i])
		# 统计分类最多的点,确定预测数据的分类
		# 应该是多数表决
		knn=[k[-1] for k in knn_list ]
		count1=Counter(knn)
		max_count=sorted(count1.items(),key=lambda x:x[1])[-1][0]
		return max_count

	def score(self,X_test,Y_test):
		right_counts=0
		#self.n=10
		for X,Y in zip(X_test,Y_test):
			y_pre=self.prediction(X)
			#返回数据分类,判断正确率
			if y_pre==Y:
				right_counts+=1
		return right_counts/len(X_test)

sklearn还是更简单些:

clf_sk = KNeighborsClassifier()
clf_sk.fit(X_train, y_train)

XD
完整代码:

from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.neighbors import KNeighborsClassifier

iris = datasets.load_iris()
X_train,X_test,Y_train,Y_test = train_test_split(iris.data,iris.target,test_size=0.2,shuffle=True)
#print(X_train[:50][:,0])
"""plt.scatter(X_train[:50][:,0],X_train[:50][:,1],label=0)
plt.scatter(X_train[50:100][:,0],X_train[50:100][:,1],label=1)
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()
"""
X_train = X_train[:][:,0:2]

Y_train = Y_train[:]

X_test = X_test[:20][:,0:2]
Y_test = Y_test[:20]
#print(np.shape(X_train))
#print(np.shape(Y_train))
class KNN():
	def __init__(self,X_train,Y_train,n_neighbors=3,p=2):
		self.n=n_neighbors # 邻居个数默认为3
		self.p=p #默认为欧式距离
		self.X_train=X_train
		self.Y_train=Y_train

	def prediction(self,X):
		knn_list=[]
		#先取出n个点,knn_list存放预测点与训练集点的距离及其对应标签
		for i in range(self.n):
			dist=np.linalg.norm(X-self.X_train[i],ord=self.p)
			#linalg=linear(线性)+algebra(代数),norm则表示范数。
			knn_list.append((dist,self.Y_train[i]))
		#再取出训练集剩下的点,然后与n_neighbors个点比较大小,将距离大的点更新
            		#保证knn_list列表中的点是距离最小的点
		for i in range(self.n,len(self.X_train)):
			max_index=knn_list.index(max(knn_list,key=lambda x:x[0]))
			dist=np.linalg.norm(X-self.X_train[i],ord=self.p)
			if knn_list[max_index][0]>dist:
				knn_list[max_index]=(dist,self.Y_train[i])
		# 统计分类最多的点,确定预测数据的分类
		knn=[k[-1] for k in knn_list ]
		count1=Counter(knn)
		max_count=sorted(count1.items(),key=lambda x:x[1])[-1][0]
		return max_count

	def score(self,X_test,Y_test):
		right_counts=0
		#self.n=10
		for X,Y in zip(X_test,Y_test):
			y_pre=self.prediction(X)
			if y_pre==Y:
				right_counts+=1
		return right_counts/len(X_test)

Model = KNN(X_train,Y_train)
print(Model.score(X_test,Y_test))
#——————————————————————————————————————————————————
clf_sk = KNeighborsClassifier()
clf_sk.fit(X_train,Y_train)
print(clf_sk.score(X_test,Y_test))

代码参考来自:https://github.com/fengdu78/lihang-code

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/488242.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号