栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

pytorch搭建MLP实现wine数据集分类(内含源码)

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

pytorch搭建MLP实现wine数据集分类(内含源码)

数据集

wine数据集包含三种葡萄酒类别,总共178个样本,每个样本具有13个特征,样本数据格式如下图所示。

数据读取及预处理

从wine.data中读取数据(loadDateSet)并进行降维(LL)处理

def loadDateSet(filename):
 dataMat = []
 labelMat = []
 fr = open(filename)
 for line in fr.readlines():
    curLine = line.strip().split(',')
    fltline = list(map(float,curLine[1:]))
    dataMat.append(fltline)
    labelline = int(curLine[0])
    labelMat.append(labelline)
 return np.array(dataMat),np.array(labelMat)

def LL(x,y):
    x_norm = preprocessing.normalize(x,norm = 'l2')
    lda = LinearDiscriminantAnalysis(n_components=2)
    x_new = lda.fit_transform(x_norm,y)
    return x_new```
构建tensor格式训练集及测试集

构建tensor数据集函数(Data.TensorDataset)

dataMat, labelMat = loadDateSet('wine.data')
dataMat = LL(dataMat,labelMat)
pindex=np.random.permutation(dataMat.shape[0])
dataMat = dataMat[pindex,:]
labelMat = labelMat[pindex]
dataMat = torch.from_numpy(dataMat)
labelMat = torch.from_numpy(labelMat)
torch_dataset = Data.TensorDataset(dataMat[28:], labelMat[28:])
loader = Data.DataLoader(
    dataset=torch_dataset,
    batch_size=15,
    shuffle=True,
    num_workers=2
)
torch_testset = Data.TensorDataset(dataMat[0:27], labelMat[0:27])
loader2 = Data.DataLoader(
    dataset=torch_testset,
    batch_size=29,
    shuffle=True,
    num_workers=2
)
搭建MLP
class MLP(torch.nn.Module):
    def __init__(self):
        super(MLP,self).__init__()
        self.fc1 = torch.nn.Linear(2,5)
        self.fc2 = torch.nn.Linear(5,3)

    def forward(self,x):
        y = F.sigmoid(self.fc1(x))
        y = F.softmax(self.fc2(y),dim=1)
        return y
model = MLP()
训练模型(含可视化)
def train():
    a_data = []
    b_data = []
    c_data = []
    lossfunc = torch.nn.CrossEntropyLoss()
    optimzer = torch.optim.SGD(params=model.parameters(),lr = 1)
    for epoch in range(n_epoch):
        train_loss = 0
        for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
          optimzer.zero_grad()
          output = model(batch_dataMat)
          # print(output)
          loss = lossfunc(output,batch_lableMat)
          loss.backward()
          optimzer.step()
          train_loss += loss.item()*batch_dataMat.size(0)
        train_loss = train_loss / len(loader.dataset)
        # print('epoch{}:{:.6f}'.format(epoch+1,train_loss))
        a_data.append(epoch+1)
        b_data.append(train_loss)
        c_data.append(test())
    plt.plot(a_data, b_data, ls="-.", lw=2, c="c", label="plot figure")
    plt.xlabel('num of train')
    plt.ylabel('loss')
    plt.grid()  # 网格
    plt.show()
    plt.plot(a_data, c_data, ls="-.", lw=2, c="c", label="plot figure")
    plt.xlabel('num of train')
    plt.ylabel('acc')
    plt.grid()  # 网格
    plt.show()
测试模型
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
            output = model(batch_dataMat)
            _,p = torch.max(output.data,1)
            total += batch_lableMat.size(0)
            correct += (p == batch_lableMat).sum().item()
    print(100*correct/total)
    return 100*correct/total
测试结果


完整代码
import numpy as np
import  matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn.functional as F
import torch.utils.data as Data
import  torchvision.transforms as transaforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from  sklearn import preprocessing

torch.set_default_tensor_type(torch.DoubleTensor)
n_epoch=10
def loadDateSet(filename):
 dataMat = []
 labelMat = []
 fr = open(filename)
 for line in fr.readlines():
    curLine = line.strip().split(',')
    fltline = list(map(float,curLine[1:]))
    dataMat.append(fltline)
    labelline = int(curLine[0])
    labelMat.append(labelline)
 return np.array(dataMat),np.array(labelMat)

def LL(x,y):
    x_norm = preprocessing.normalize(x,norm = 'l2')
    lda = LinearDiscriminantAnalysis(n_components=2)
    x_new = lda.fit_transform(x_norm,y)
    return x_new

dataMat, labelMat = loadDateSet('wine.data')
dataMat = LL(dataMat,labelMat)
pindex=np.random.permutation(dataMat.shape[0])
dataMat = dataMat[pindex,:]
labelMat = labelMat[pindex]
dataMat = torch.from_numpy(dataMat)
labelMat = torch.from_numpy(labelMat)
torch_dataset = Data.TensorDataset(dataMat[28:], labelMat[28:])
loader = Data.DataLoader(
    dataset=torch_dataset,
    batch_size=15,
    shuffle=True,
    num_workers=2
)
torch_testset = Data.TensorDataset(dataMat[0:27], labelMat[0:27])
loader2 = Data.DataLoader(
    dataset=torch_testset,
    batch_size=29,
    shuffle=True,
    num_workers=2
)
class MLP(torch.nn.Module):
    def __init__(self):
        super(MLP,self).__init__()
        self.fc1 = torch.nn.Linear(2,5)
        # self.fc3 = torch.nn.Linear(5,5)
        self.fc2 = torch.nn.Linear(5,3)

    def forward(self,x):
        y = F.sigmoid(self.fc1(x))
        # y = F.sigmoid(self.fc3(y))
        y = F.softmax(self.fc2(y),dim=1)
        return y
model = MLP()
def train():
    a_data = []
    b_data = []
    c_data = []
    lossfunc = torch.nn.CrossEntropyLoss()
    optimzer = torch.optim.SGD(params=model.parameters(),lr = 1)
    for epoch in range(n_epoch):
        train_loss = 0
        for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
          optimzer.zero_grad()
          output = model(batch_dataMat)
          # print(output)
          loss = lossfunc(output,batch_lableMat)
          loss.backward()
          optimzer.step()
          train_loss += loss.item()*batch_dataMat.size(0)
        train_loss = train_loss / len(loader.dataset)
        # print('epoch{}:{:.6f}'.format(epoch+1,train_loss))
        a_data.append(epoch+1)
        b_data.append(train_loss)
        c_data.append(test())
    plt.plot(a_data, b_data, ls="-.", lw=2, c="c", label="plot figure")
    plt.xlabel('num of train')
    plt.ylabel('loss')
    plt.grid()  # 网格
    plt.show()
    plt.plot(a_data, c_data, ls="-.", lw=2, c="c", label="plot figure")
    plt.xlabel('num of train')
    plt.ylabel('acc')
    plt.grid()  # 网格
    plt.show()
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
            output = model(batch_dataMat)
            _,p = torch.max(output.data,1)
            total += batch_lableMat.size(0)
            correct += (p == batch_lableMat).sum().item()
    print(100*correct/total)
    return 100*correct/total
def main():
    train()
    test()

main()

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/757267.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号