美文网首页
python数据挖掘test5

python数据挖掘test5

作者: 在做算法的巨巨 | 来源:发表于2018-08-05 20:53 被阅读0次

    内容:根据信用卡持卡人背景信息(年龄、教育水平、当前工作年限、当前居住年限、家庭收入、债务占收入比例、信用卡负债、其他负债 )预测还款拖欠情况。用分类算法来建模预测


    数据导入;

    import pandas as pd
    data = pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\train__UnB.csv')
    train_data=data.iloc[:,0:-1]
    train_label=data.iloc[:,-1]
    data1=pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\test__UnB.csv')
    test_data=data1.iloc[:,0:-1]
    test_label=data1.iloc[:,-1]
    

    数据源:



    机器学习分类预测

    train_label.unique()
    

    从标签数值看出,这是一个二分类问题。

    • KNN算法
    from sklearn.model_selection import cross_val_score
    from sklearn import neighbors
    knnModel = neighbors.KNeighborsClassifier(n_neighbors=2)
    knnModel.fit(train_data,train_label)
    score=np.mean(
            cross_val_score(
            knnModel,
            train_data,train_label,cv=5
            )
    )
    result=knnModel.predict(test_data)
    ##计算accuracy,precision,recall,F1
    TP=0;FP=0;FN=0;TN=0
    for i in range(len(test_label)):
        if test_label[i]==1 and result[i]==1:
            TP+=1
        elif test_label[i]==1 and result[i]==0:
            FN+=1
        elif test_label[i]==0 and result[i]==1:
            FP+=1
        elif test_label[i]==0 and result[i]==0:
            TN+=1
    accuracy=(TP+TN)/len(test_label)
    precision=TP/(TP+FP)
    recall=TP/(TP+FN)
    F1=2*precision*recall/(precision+recall)   
    print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
    
    • 朴素贝叶斯
    from sklearn.naive_bayes import MultinomialNB
    MNBModel = MultinomialNB()
    MNBModel.fit(train_data,train_label)
    score=np.mean(
            cross_val_score(
            MNBModel,
            train_data,train_label,cv=5
            )
    )
    result=MNBModel.predict(test_data)
    ##计算accuracy,precision,recall,F1
    TP=0;FP=0;FN=0;TN=0
    for i in range(len(test_label)):
        if test_label[i]==1 and result[i]==1:
            TP+=1
        elif test_label[i]==1 and result[i]==0:
            FN+=1
        elif test_label[i]==0 and result[i]==1:
            FP+=1
        elif test_label[i]==0 and result[i]==0:
            TN+=1
    accuracy=(TP+TN)/len(test_label)
    precision=TP/(TP+FP)
    recall=TP/(TP+FN)
    F1=2*precision*recall/(precision+recall)   
    print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
    
    • SVM
    from sklearn import svm
    svcModel=svm.SVC(kernel='rbf')
    svcModel.fit(train_data,train_label)
    score=np.mean(
            cross_val_score(
            svcModel,
            train_data,train_label,cv=5
            )
    )
    result = svcModel.predict(test_data)
    ##计算accuracy,precision,recall,F1
    TP=1;FP=1;FN=1;TN=1
    for i in range(len(test_label)):
        if test_label[i]==1 and result[i]==1:
            TP+=1
        elif test_label[i]==1 and result[i]==0:
            FN+=1
        elif test_label[i]==0 and result[i]==1:
            FP+=1
        elif test_label[i]==0 and result[i]==0:
            TN+=1
    accuracy=(TP+TN)/len(test_label)
    precision=TP/(TP+FP)
    recall=TP/(TP+FN)
    F1=2*precision*recall/(precision+recall)   
    print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
    
    • 决策树算法
    from sklearn.tree import DecisionTreeClassifier
    dtModel = DecisionTreeClassifier(max_leaf_nodes=8)
    dtModel.fit(train_data, train_label)
    score=np.mean(
            cross_val_score(
            dtModel,
            train_data,train_label,cv=5
            )
    )
    result=dtModel.predict(test_data)
    ##计算accuracy,precision,recall,F1
    TP=1;FP=1;FN=1;TN=1
    for i in range(len(test_label)):
        if test_label[i]==1 and result[i]==1:
            TP+=1
        elif test_label[i]==1 and result[i]==0:
            FN+=1
        elif test_label[i]==0 and result[i]==1:
            FP+=1
        elif test_label[i]==0 and result[i]==0:
            TN+=1
    accuracy=(TP+TN)/len(test_label)
    precision=TP/(TP+FP)
    recall=TP/(TP+FN)
    F1=2*precision*recall/(precision+recall)   
    print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
    
    • 随机森林
    from sklearn.ensemble import RandomForestClassifier
    rfcModel = RandomForestClassifier(n_estimators=8, max_leaf_nodes=None)
    rfcModel.fit(train_data,train_label)
    score=np.mean(
            cross_val_score(
            rfcModel,
            train_data,train_label,cv=5
            )
    )
    result=rfcModel.predict(test_data)
    ##计算accuracy,precision,recall,F1
    TP=1;FP=1;FN=1;TN=1
    for i in range(len(test_label)):
        if test_label[i]==1 and result[i]==1:
            TP+=1
        elif test_label[i]==1 and result[i]==0:
            FN+=1
        elif test_label[i]==0 and result[i]==1:
            FP+=1
        elif test_label[i]==0 and result[i]==0:
            TN+=1
    accuracy=(TP+TN)/len(test_label)
    precision=TP/(TP+FP)
    recall=TP/(TP+FN)
    F1=2*precision*recall/(precision+recall)   
    print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))  
    

    相关文章

      网友评论

          本文标题:python数据挖掘test5

          本文链接:https://www.haomeiwen.com/subject/vlcivftx.html