对于二分类问题,我们通过观察二维的二分类问题容易想到存在一条直线能将其最优分类,观察三维的,我们也能想到存在一个平面。由此我们想到,对于n维数据集,存在一个超平面,能将其二分类。这就是SVM的背景,那怎样求这个超平面哪?
代码如下:对此仍然有疑惑,比如若P矩阵不增加一个单位矩阵,其不正定,无法调用标准库。
LBP处理数据:
import numpy as np
from matplotlib import colors, pyplot as plt
from numpy.core.fromnumeric import transpose
import random
from cvxopt import solvers
from numpy.lib.function_base import append
import qpsolvers
# image=np.zeros((28,28),dtype=int)
inilabel=[]
inidata=[]
File=open('C:/Users/18110/Desktop/ML/lab5/data5/test-01-images.svm')
for i in range(0,2115):
image=np.zeros((28,28),dtype=int)
nowline=File.readline()
# nowline=nowline.split("n")[0]
nowline=nowline.split(" ")
inilabel.append(int(nowline[0]))
for element in nowline:
if(element==nowline[0] or element==nowline[len(nowline)-1]):continue
nowelement=element.split(":")
row=int(int(nowelement[0])/28)
col=int(int(nowelement[0])%28)
image[row][col]=int(nowelement[1])
inidata.append(image)
prodata=np.zeros((2115,256),dtype=int)
for index in range(0,2115):
for row in range(1,27):
for col in range(1,27):
nownum=0
if(inidata[index][row][col]>inidata[index][row-1][col-1]):nownum+=1
if(inidata[index][row][col]>inidata[index][row-1][col]):nownum+=2
if(inidata[index][row][col]>inidata[index][row-1][col+1]):nownum+=4
if(inidata[index][row][col]>inidata[index][row][col-1]):nownum+=8
if(inidata[index][row][col]>inidata[index][row][col+1]):nownum+=16
if(inidata[index][row][col]>inidata[index][row+1][col-1]):nownum+=32
if(inidata[index][row][col]>inidata[index][row+1][col]):nownum+=64
if(inidata[index][row][col]>inidata[index][row+1][col+1]):nownum+=128
prodata[index][nownum]+=1
inilabel=np.array(inilabel,dtype=int)
prodata=np.array(prodata,dtype=int)
l=np.c_[inilabel,prodata]
np.savetxt("C:/Users/18110/Desktop/ML/lab5/data5/lab5-2testdata.txt",l)
5-1:
import numpy as np
from matplotlib import colors, pyplot as plt
from numpy.core.fromnumeric import transpose
import random
from cvxopt import solvers
import qpsolvers
x=np.loadtxt('C:/Users/18110/Desktop/ML/lab5/data5/training_2.txt')
test=np.loadtxt('C:/Users/18110/Desktop/ML/lab5/data5/test_2.txt')
m=len(x)
u=test[:,0]
v=test[:,1]
y=x[:,2]
y=y.reshape(1500,)
x=x[:,0:2]
plt.scatter(u,v,s=4,c=test[:,2])
P=np.zeros((m,m))
for i in range(0,m):
for j in range(0,m):
P[i][j]=y[i]*y[j]*np.dot(x[i],x[j])
if(i==j):
P[i][j]+=10
G=np.r_[np.identity(m),-np.identity(m)]
h=np.r_[np.ones((m,)),np.zeros((m,))]
# h=np.r_[np.zeros((m,)),np.ones((m,))]
q=-np.ones((m,))
b=np.zeros((1,))
alfa=qpsolvers.solve_qp(P=P,q=q,G=G,h=h,A=y,b=b).reshape(1500,1)
w=np.zeros(2)# w=alfa[0]*y[0]*x[0]
num1=0
finalb=0
for i in range(0,m):
w+=alfa[i]*y[i]*x[i]
for i in range(0,m):
if(alfa[i]>=0):
finalb+=y[i]-np.dot(x[i],w)
num1+=1
finalb/=num1
tx=[]
ty1=[]
ty2=[]
ty3=[]
for a in range(50,160,10):
tx.append(a)
ty1.append(-a*w[0]/w[1]-(finalb-1)/w[1])
ty2.append(-a*w[0]/w[1]-(finalb+1)/w[1])
ty3.append(-a*w[0]/w[1]-(finalb)/w[1])
plt.plot(tx,ty1)
plt.plot(tx,ty2)
plt.plot(tx,ty3)
test=np.loadtxt('C:/Users/18110/Desktop/ML/lab5/data5/test_2.txt')
num=0
sum=len(test)
for i in test:
now=i[0:2]
if((np.dot(w,now)>0 and i[2]==1 )or(np.dot(w,now)<0 and i[2]==-1) ):
num+=1
print(sum,num)
print(num/sum)
plt.show()
lab5 -2
import numpy as np
from matplotlib import colors, pyplot as plt
from numpy.core.fromnumeric import transpose
import random
from cvxopt import solvers
from numpy.core.numeric import tensordot
from numpy.lib.function_base import append
import qpsolvers
f=np.loadtxt("C:/Users/18110/Desktop/ML/lab5/data5/lab5-2traindata.txt")
y=f[0:1000,0]
x=f[0:1000,1:257]
m=len(y)
P=np.zeros((m,m))
for i in range(0,m):
for j in range(0,m):
P[i][j]=y[i]*y[j]*np.dot(x[i],x[j])
# print(P[i][j])
# print(np.dot(x[i],x[j]))
if(i==j):
P[i][j]+=1
G=np.r_[np.identity(m),-np.identity(m)]
h=np.r_[0.05*np.ones((m,)),np.zeros((m,))]
q=-np.ones((m,))
b=np.zeros((1,))
alfa=qpsolvers.solve_qp(P=P,q=q,G=G,h=h,A=y,b=b).reshape((1000,1))
w=np.zeros(256)
num1=0
finalb=0
for i in range(0,m):
w+=alfa[i]*y[i]*x[i]
for i in range(0,m):
if(alfa[i]>=0):
finalb+=y[i]-np.dot(x[i],w)
num1+=1
finalb/=num1
#print(w,finalb)
test=np.loadtxt("C:/Users/18110/Desktop/ML/lab5/data5/lab5-2testdata.txt")
num1=0
num2=0
for i in test:
now=i[1:257]
if((np.dot(w,now)+finalb>0 and i[0]==1 )or(np.dot(w,now)+finalb<0 and i[0]==-1) ):
num1+=1
else: print(num1)
for i in f:
now=i[1:257]
if((np.dot(w,now)+finalb> 0 and i[0]==1 )or(np.dot(w,now)+finalb<0 and i[0]==-1) ):
num2+=1
print("测试数据的准确率为",num1/len(test))
print("训练数据的准确率为",num2/12665)
lab5-3
import numpy as np
from matplotlib import colors, pyplot as plt
from numpy.core.fromnumeric import transpose
import random
from cvxopt import solvers
from numpy.core.numeric import tensordot
from numpy.lib.function_base import append
import qpsolvers
def knerl(a,b):
return np.exp(-100*np.linalg.norm(a-b)**2)
def cal(alfa,b,y,x,nowx,nowy):
result=0
for i in range (0,211):
if(alfa[i]>0):
result+=alfa[i]*y[i]*knerl(x[i],[nowx,nowy])
return abs(result+b)
def svm(train,C=1,a=1):
y=train[:,2]
x=train[:,0:2]
m=len(train)
P=np.zeros((m,m))
for i in range(0,m):
for j in range(0,m):
P[i][j]=y[i]*y[j]*knerl(x[i],x[j])
if(i==j):
P[i][j]+=a
G=np.r_[np.identity(m),-np.identity(m)]
h=np.r_[C*np.ones((m,)),np.zeros((m,))]
q=-np.ones((m,))
b=np.zeros((1,))
alfa=qpsolvers.solve_qp(P=P,q=q,G=G,h=h,A=y,b=b).reshape((211,1))
w=np.zeros(2)
num1=0
finalb=0
for i in range(0,m):
finalb+=y[i]
for j in range(0,m):
if(alfa[j]>=0):
finalb-=y[j]*alfa[j]*knerl(x[i],x[j])
finalb/=m
print(finalb)
tx=[]
ty=[]
for a in np.arange(-0.6,0.4,0.01):
print(a)
for j in np.arange(-0.6,0.6,0.01):
calnum=cal(alfa,finalb,y,x,a,j)
if(calnum<0.1 and calnum!=0):
tx.append(a)
ty.append(j)
plt.scatter(tx,ty)
plt.show()
num2=0
for i in train:
now=i[0:2]
if((np.dot(w,now)>0 and i[2]==1 )or(np.dot(w,now)<0 and i[2]==-1) ):
num2+=1
x=np.loadtxt('C:/Users/18110/Desktop/ML/lab5/data5/training_3.text')
u=x[:,0]
v=x[:,1]
co=x[:,2]
plt.scatter(u,v,c=co)
a=svm(x,1,0.01)



