美文网首页
python实现Apriori算法

python实现Apriori算法

作者: volition4_4 | 来源:发表于2018-11-18 20:07 被阅读17次
# coding: utf-8

# 利用python实现apriori算法

# In[1]:


#导入需要的库
from numpy import *


# In[2]:


def loadDataSet():
    return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]


# In[3]:


def createC1(dataSet):
    C1=[]
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
    C1.sort()
    return map(frozenset,C1)


# In[4]:


#计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集
def scanD(D,Ck,minSupport):
    ssCnt={}
    for tid in D:
        for can in Ck:
            if can.issubset(tid):
                if can not in ssCnt.keys():
                    ssCnt[can]=1
                else :
                    ssCnt[can]+=1
    numItems=float(len(D))
    retList=[]
    supportData={}
    for key in ssCnt:
        support=ssCnt[key]/numItems
        if support>= minSupport:
            retList.insert(0,key)
        supportData[key]=support
    return retList,supportData


# In[15]:


def aprioriGen(Lk,k):
    retList=[]
    lenLk=len(Lk)
    for i in range(lenLk):
        for j in range(i+1,lenLk):
            L1=list(Lk[i])[:k-2]
            L2=list(Lk[j])[:k-2]
            L1.sort()
            L2.sort()
            if L1==L2:
                retList.append(Lk[i] | Lk[j])
    return retList
        


# In[14]:


def apriori(dataSet, minSupport=0.5):
    C1=createC1(dataSet)
    D=list(map(set,dataSet))
    print('D:',D)
    L1,supportData= scanD(D,C1,minSupport)
    L=[L1]
    k=2
    while (len(L[k-2])>0):
        Ck=aprioriGen(L[k-2], k)
        Lk,supK= scanD(D,Ck,minSupport)
        supportData.update(supK)
        if len(Lk)==0:
            break
        L.append(Lk)
        k+=1
    return L,supportData


# In[19]:


def calConf(freqSet,H,supportData,brl,minConf=0.7):
    prunedH=[]
    for conseq in H:
        conf=supportData[freqSet]/supportData[freqSet-conseq]
        if conf >= minConf:
            print(freqSet-conseq, '-->',conseq,'conf',conf)
            brl.append((freqSet-conseq,conseq,conf))
            prunedH.append(conseq)
    return prunedH


# In[21]:


def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):
    m=len(H[0])
    if(len(freqSet)>(m+1)):
        Hmpl=aprioriGen(H,m+1)
        Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf)
        print('Hmpl=',Hmpl)
        print('len(Hmpl)=',len(Hmpl),'len(freqSet)=',len(freqSet))
        if(len(Hmpl)>1):
            rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf)


# In[9]:


def generateRules(L,supportData,minConf=0.7):
    bigRuleList=[]
    for i in range(1,len(L)):
        for freqSet in L[i]:
            H1=[frozenset([item]) for item in freqSet]
            if(i>1):
                rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf)
            else:
                calConf(freqSet,H1,supportData,bigRuleList,minConf)
    return bigRuleList


# In[10]:


def testApriori():
    dataSet=loadDataSet()
    print('dataSet:',dataSet)
    L1,supportData1=apriori(dataSet,minSupport=0.7)
    print('L(0.7):',L1)
    print('supportData(0.7):',supportData1)
    print('------------------------------------------')
    L2,supportData2=apriori(dataSet,minSupport=0.5)
    print('L(0.5):',L2)
    print('supportData(0.5:).supportData2')
    print('------------------------------------------')


# In[11]:


def testGenerateRules():
    dataSet=loadDataSet()
    L1,supportData1=apriori(dataSet,minSupport=0.2)
    print('L(0.2):',L1)
    print('minSupport(0.2):',supportData1)
    rules=generateRules(L1,supportData1,minConf=1.1)
    print('Rules:',rules)


# In[12]:


def main():
    testApriori()
    testGenerateRules()


# In[22]:


if __name__=="__main__":
    main()

相关文章

网友评论

      本文标题:python实现Apriori算法

      本文链接:https://www.haomeiwen.com/subject/mlzgfqtx.html