美文网首页
朴素贝叶斯的实现

朴素贝叶斯的实现

作者: 付剑飞 | 来源:发表于2017-08-13 22:57 被阅读0次
    '''
    Created on 2017年8月10日
    
    @author: fujianfei
    '''
    import numpy as np
    
    def loadDataSet():
        postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
                     ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                     ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
                     ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                     ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                     ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
        classVec = [0,1,0,1,0,1]    #1 is abusive, 0 not
        return postingList,classVec
    
    def createVocabList(dataSet):
        '''
        .产生一个词汇set,里面包含了所有的词,但不重复
        '''
        vocaList = set([])
        for vocaRow in dataSet:
            vocaList = vocaList | set(vocaRow)#与每行不重复词汇取并,表示如果vocaList原先有这些词汇,则过,没有则加上
        return list(vocaList)
    
    def setOfWords2Vec(vocaList, inputSet):
        '''
        .将输入的词组变成0,1向量,vocaList中某个词在inputSet中出现,则为1,否则为0
        '''
        numVect = [0] * len(vocaList)
        for input in inputSet:
            if input in vocaList:
                numVect[vocaList.index(input)] = 1
        return numVect
    
    def trainNB(trainMat, trainCategory):
        '''
        .用来训练数据,生成类别C的先验概率P(C)和条件概率P(X|C)
        '''
        numTrain = len(trainMat)
        numWords = len(trainMat[0])
        pc = sum(trainCategory)/float(numTrain)
        p0Num = np.zeros(numWords);p1Num = np.zeros(numWords)
        p0Denom = 0.0;p1Denom = 0.0
        for i in range(numTrain):
            if trainCategory[i] == 1:
                p1Num += trainMat[i]
                p1Denom += np.sum(trainMat[i])
            else:
                p0Num += trainMat[i]
                p0Denom += np.sum(trainMat[i])    
        p1Vect = p1Num/p1Denom
        p0Vect = p0Num/p0Denom
        return p0Vect,p1Vect,pc        
    
    def classifyNB(vec2Classify, p0Vect, p1Vect, pClass1):
        p1 = np.sum(vec2Classify * p1Vect) + np.log(pClass1)
        p0 = np.sum(vec2Classify * p0Vect) + np.log(1-pClass1)
        print(p1,p0)
        if p1 > p0:
            return 1
        else:
            return 0
    
    def testingNB():
        listOPosts,listClasses = loadDataSet()
        myVocabList = createVocabList(listOPosts)
        trainMat=[]
        for postinDoc in listOPosts:
            trainMat.append(setOfWords2Vec(myVocabList, postinDoc))
        p0V,p1V,pAb = trainNB(np.array(trainMat),np.array(listClasses))
    #     print(p0V,p1V,pAb)
        testEntry = ['love', 'my', 'dalmation']
        thisDoc = np.array(setOfWords2Vec(myVocabList, testEntry))
        print (testEntry,'classified as: ',classifyNB(thisDoc,p0V,p1V,pAb))
        testEntry = ['stupid', 'garbage']
        thisDoc = np.array(setOfWords2Vec(myVocabList, testEntry))
        print (testEntry,'classified as: ',classifyNB(thisDoc,p0V,p1V,pAb))
        
    testingNB()
    

    相关文章

      网友评论

          本文标题:朴素贝叶斯的实现

          本文链接:https://www.haomeiwen.com/subject/gthyrxtx.html