python tree

作者: 只有香如故 | 来源:发表于2018-04-19 15:55 被阅读342次

    决策树理论

    决策树
    ID3 信息增益
    C4.5 信息增益率
    CART 基尼系数
    前剪枝,后剪枝

    from math import log
    import operator
    import treePlotter
    import matplotlib.pyplot as plt

    sklearn决策树

    sklearn.tree

    插件graphviz和插件pydotplus安装

    在决策树中需要使用插件graphviz和插件pydotplus
    第一步是安装graphviz。下载地址在:http://www.graphviz.org/。无论是linux还是windows,装完后都要设置环境变量,将graphviz的bin目录加到PATH,比如我是windows,将C:/Program Files (x86)/Graphviz2.38/bin/加入了PATH


    import os
    os.environ["PATH"] += os.pathsep + 'D:/Program Files (x86)/Graphviz2.38/bin/'


    第二步是安装python插件pydotplus, 在anaconda3的命令行执行一下命令
    conda install -c conda-forge pydotplus


    import pydotplus

    参数说明
    DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
    max_features=None, max_leaf_nodes=None,
    min_impurity_decrease=0.0, min_impurity_split=None,
    min_samples_leaf=1, min_samples_split=2,
    min_weight_fraction_leaf=0.0, presort=False, random_state=None,
    splitter='best')

    训练集和测试集选择:交叉验证

    y = data['class']
    X = data.drop('class', axis=1)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)
    columns = X_train.columns

    数据标准化

    sdata = (data -data.min())/ (data.max()- data.min())
    或者
    from sklearn.preprocessing import StandardScaler
    ss_X = StandardScaler()
    ss_y = StandardScaler()
    X_train = ss_X.fit_transform(X_train)
    X_test = ss_X.transform(X_test)

    tree

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split

    from sklearn.model_selection import GridSearchCV

    from sklearn.metrics import accuracy_score
    from sklearn.metrics import roc_auc_score
    from sklearn import tree
    import pydotplus
    import os

    dataSet = pd.read_csv("trainData.csv", encoding = "gbk")
    target = dataSet["class"]

    data = dataSet.drop(["class"],axis =1)

    data = dataSet.loc[:,"ele_ind":"alarm_ind"]

    数据标准化

    sdata = (data -data.min())/ (data.max()- data.min())
    X_train, X_test, y_train, y_test = train_test_split(sdata, target, random_state=0, test_size=0.2)
    columns = X_train.columns

    mode = tree.DecisionTreeClassifier()

    mode = tree.DecisionTreeClassifier(criterion='entropy')
    mode = mode.fit(X_train,y_train)
    y_prob = mode.predict_proba(X_test)[:,1]
    y_pred = np.where(y_prob > 0.5, 1, 0)
    mode.score(X_test, y_pred)

    data_feature_name = data.columns[:]
    data_target_name = [str(i) for i in np.unique(dataSet["class"])] #列表int转化str列表
    import graphviz
    import pydotplus
    from sklearn import tree
    import os
    os.environ["PATH"] += os.pathsep + 'D:/Program Files (x86)/Graphviz2.38/bin/'
    dot_tree = tree.export_graphviz(mode,out_file=None,feature_names=data_feature_name,class_names=data_target_name,filled=True, rounded=True,special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_tree)
    graph.write_pdf("out.pdf")

    相关文章

      网友评论

        本文标题:python tree

        本文链接:https://www.haomeiwen.com/subject/ofbwkftx.html