源码里留了多种选择,有置信度判定需要完善getLk0()函数,无置信度快一些,8w条1分钟左右
#include #include #include #include #include #include #include using namespace std; class Apriori{ private: string FileName; float minSup; map< long, set > Database; map< string,set > mp; //string -- 所有行数 void sub(set &set1, set &set2); set add(set &set1, set &set2); public: Apriori(string FileName, float MinSup){ this->FileName = FileName; this->minSup = MinSup; } bool buildData(); map< string, int> getC1(); map< set, int > getL1(); set< set> keySet(map,int> &Lk); set< set > aprioriGen(int m, set< set > &); map< set, int > getLk(int k, set>); map< set, int > getLk0(int k, set>); int printsetSet(set< set > &); }; int line_num; int Apriori::printsetSet(set> &setSet) { int cnt=0; for(set< set >::iterator it = setSet.begin(); it!=setSet.end(); it++,cnt++){ set::iterator It = (*it).begin(); while(It != (*it).end()){ cout<< *It++ <<" "; } cout<>的数据结构) bool Apriori::buildData() { ifstream inFile; inFile.open(FileName.c_str()); if(!inFile){ cout<<"文件"< lines_of_text; getline(inFile,textline);//多读取一行文本,消除文件开头标号 while(getline(inFile,textline)) lines_of_text.push_back(textline); //产生数据库 for(line_num=0; line_num != lines_of_text.size(); line_num++){ istringstream line(lines_of_text[line_num]); string word; while(line >> word){ if(word.size()>1){ Database[line_num].insert(word); mp[word].insert(line_num); } } } cout<<"项目总数: "< map Apriori::getC1() { map C1; for( map< long,set >::iterator mapIt = Database.begin(); mapIt != Database.end();){ for(set::iterator setIt = mapIt->second.begin(); setIt != mapIt->second.end(); ){ pair::iterator,bool> ret = C1.insert(make_pair(*setIt,1)); //没有true,有false if(!ret.second) //ans中已有setIt对应的元素 ret.first->second++; setIt++; } mapIt++; } return C1; } //获取频繁1项集:频繁元素-个数:map,int> map, int> Apriori::getL1() { if(minSup<1){ minSup *= line_num; minSup = (int)minSup; } map, int> L1; map C1 = getC1(); for(map::iterator it=C1.begin(); it!=C1.end(); it++){ if(it->second >= minSup){ set Key; Key.insert(it->first); //这样写下一步才放得进去 L1[Key] = it->second; } } return L1; } //获取map里所有频繁项集的串已经做成的set,再一起做成set set< set > Apriori::keySet(map,int> &Lk) { set< set > ans; for( map,int>::iterator it = Lk.begin(); it!=Lk.end(); it++) ans.insert(it->first); return ans; } //集合差:set1-set2 void Apriori::sub(set &set1, set &set2) { for(set::iterator it = set2.begin(); it != set2.end(); it++){ set1.erase(*it); if(set1.size() == 0) break; } } //并集:set1+set2 set Apriori::add(set &set1, set &set2) { set ans(set1.begin(),set1.end()); for(set::iterator it = set2.begin(); it != set2.end(); it++) ans.insert(*it); return ans; } bool fun1(set &a,set &b){ set t=a; for(set::iterator it=b.begin();it!=b.end();it++){ t.erase(*it); } if(t.size()==1) return true; else return false; } //L(k-1)获取Ck set< set > Apriori::aprioriGen(int m, set> &Lk0) { set< set > Ck; for(set< set >::iterator it=Lk0.begin(); it != Lk0.end(); ){ set Li = *it; for(set< set >::iterator itr = ++it; itr != Lk0.end(); itr++){ set Lj = *itr; if(fun1(Li,Lj)){ set Ci = add(Li,Lj); if(Ci.size() == m+1) if(!Lk0.count(Ci)) Ck.insert(Ci); } } } cout<<"完成"< intersec(set &a,set &b){ set t; set_intersection(a.begin(),a.end(), b.begin(),b.end(), inserter(t,t.begin())); return t; } //根据频繁k-1项集键集,获取频繁k项集 map< set, int > Apriori::getLk(int k, set > Lk0) { map< set,int> Lk,Ck; set< set > CkSet = aprioriGen(k-1, Lk0); //Ck的set-string,再集合成set for(set< set >::iterator i=CkSet.begin(); i!=CkSet.end(); i++){ //每一个set string head = *(*i).begin(); set temp = mp[head]; for(set::iterator j = ++(*i).begin(); j!=(*i).end(); j++){ set t = mp[*j]; //j是set里的每一个string,找到对应的int temp=intersec(temp,t); } if(temp.size()>=minSup) Lk[*i]=(int)temp.size(); } return Lk; } //根据频繁k-1项集键集,获取频繁k项集,并统计每个候选的个数 map< set, int > Apriori::getLk0(int k, set< set > Lk0) { map< set,int> Lk,Ck; set< set > CkSet = aprioriGen(k-1, Lk0); //生成候选k项集的set-string,再集合成set for(map >::iterator i = Database.begin(); i != Database.end(); i++){ set strData = i->second; //数据库第i项的string for(set< set >::iterator j = CkSet.begin(); j != CkSet.end(); j++){ set strCk = *j, strTemp = *j; //Ck的第j项string sub(strTemp,strData); if(strTemp.size() == 0){ pair< map< set, int >::iterator ,bool > ret = Ck.insert(make_pair(strCk,1)); if(!ret.second) ++ret.first->second; } } } for(map< set, int>::iterator it = Ck.begin(); it != Ck.end(); it++) if(it->second >= minSup) Lk[it->first] = it->second; return Lk; } int main() { float min_sup; cout<<"请输入最小支持数/度:"; cin>>min_sup; Apriori apriori("test.txt", min_sup); apriori.buildData(); map,int> L1 = apriori.getL1(); set> Set = apriori.keySet(L1); //频繁1项集的所有串,做成set map> > L; L.insert(make_pair(1,Set)); for(int k=2; ;k++){ cout<<"k="<,int> setLk = apriori.getLk(k,Set); //获取频繁k项集的所有string和相应个数 //map,int> setLk = apriori.getLk0(k,Set); if(setLk.size()){ Set = apriori.keySet(setLk); L.insert(make_pair(k,Set)); } else break; } ofstream fcout; fcout.open("res.txt"); //fcout.open("res_0.01.txt"); int cnt=0; for(map>>::iterator it = L.begin();it!=L.end();it++){ cout<<"频繁"<first <<"项集: "<first <<"项集: "<second); for(set< set >::iterator j = (it->second).begin(); j!=(it->second).end(); j++){ set::iterator It = (*j).begin(); while(It != (*j).end()){ fcout<< *It++ <<" "; } fcout< 测试输入test.txt内容(预留了一行空行,防止读取文件标识): 1 2 3 5 3 4 5 2 5 6 8 3 4 2 8 3 2 1 9
测试输入test.txt内容(预留了一行空行,防止读取文件标识):
1 2 3 5 3 4 5 2 5 6 8 3 4 2 8 3 2 1 9
上一篇 Linux C语言实现DNS请求
下一篇 C++ 11 深度学习(十七)condition
版权所有 (c)2021-2022 MSHXW.COM
ICP备案号:晋ICP备2021003244-6号