导入需要的包
import pandas as pd import numpy as np
读取csv文件
path = r"D:postgraduatestudydata_miningtestguanxinbing.csv" data_datafram = pd.read_csv(path,encoding="gbk")
由于读取到的数据是datafram,所以先将pandas读取的数据转化为array
data_array = np.array(data_datafram)
然后转化为list形式
data_list =data_array.tolist()
获取列名(表头)
name = data_datafram.columns.values
按照0,1对应为列名
medicine = []
for i in range(356):
num = data_list[i]
case = zip(name,num)
temp_list = []
for j in case:
if j[1] == 1.0:
temp_list.append(j[0])
medicine.append(temp_list)
导入包,进行关联规则分析
from efficient_apriori import apriori
transactions = medicine
itemsets,rules = apriori(transactions,
min_support = 0.1,
min_confidence= 0.7)
print(itemsets,'n')
for i in rules:
print(i)
使用 pip install pip install efficient_apriori 安装pip install efficient_apriori
输出结果为



