从树到森林
示例文件下载地址:
http://archive.ics.uci.edu/ml/machine-learning-databases/adult/
一,代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import graphviz
from sklearn.tree import export_graphviz
from matplotlib.colors import ListedColormap
from sklearn import tree, datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
'''
wine = datasets.load_wine()
X, y = wine.data[:, :2], wine.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
forest = RandomForestClassifier(n_estimators=6, random_state=3)
forest.fit(X_train, y_train)
# 定义图像中分区的颜色和散点的颜色
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
# 分别用样本的两个特征值创建图像的横轴和纵轴
x_min, x_max = X_train[:, 0].min() -1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() -1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),
np.arange(y_min, y_max, .02))
Z = forest.predict(np.c_[xx.ravel(), yy.ravel()])
# 给每个分类中的样本分配 不同的颜色
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
#用散点把样本表示出来
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolors='k', s=20)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title('Classifier: RandomForest')
plt.show()
'''
data = pd.read_csv('D:\\tmp\\adult.data', header=None, index_col=False,
names=['年龄', '单位性质', '权重', '学历', '受教育时长',
'婚姻状况', '职业', '家庭情况', '种族', '性别',
'资产所得', '资产损失', '周工作时长', '原籍', '收入'])
data_lite = data [['年龄', '单位性质', '学历', '性别', '周工作时长', '职业', '收入']]
data_dummies = pd.get_dummies(data_lite)
print('样本原始特征:', list(data_lite.columns))
print('虚拟变量特征:', list(data_dummies.columns))
features = data_dummies
X = features.values
y = data_dummies['收入_ >50K'].values
print('特征形态: {} 标签形态: {}'.format(X.shape, y.shape))
X_Train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
go_dating_tree = tree.DecisionTreeClassifier(max_depth=5)
go_dating_tree.fit(X_Train, y_train)
print('模型得分:{:.2f}'.format(go_dating_tree.score(X_test, y_test)))
二,效果
C:\Users\ccc\AppData\Local\Programs\Python\Python38\python.exe D:/Code/Metis-Org/app/service/time_series_detector/algorithm/ai_test.py
样本原始特征: ['年龄', '单位性质', '学历', '性别', '周工作时长', '职业', '收入']
虚拟变量特征: ['年龄', '周工作时长', '单位性质_ ?', '单位性质_ Federal-gov', '单位性质_ Local-gov', '单位性质_ Never-worked', '单位性质_ Private', '单位性质_ Self-emp-inc', '单位性质_ Self-emp-not-inc', '单位性质_ State-gov', '单位性质_ Without-pay', '学历_ 10th', '学历_ 11th', '学历_ 12th', '学历_ 1st-4th', '学历_ 5th-6th', '学历_ 7th-8th', '学历_ 9th', '学历_ Assoc-acdm', '学历_ Assoc-voc', '学历_ Bachelors', '学历_ Doctorate', '学历_ HS-grad', '学历_ Masters', '学历_ Preschool', '学历_ Prof-school', '学历_ Some-college', '性别_ Female', '性别_ Male', '职业_ ?', '职业_ Adm-clerical', '职业_ Armed-Forces', '职业_ Craft-repair', '职业_ Exec-managerial', '职业_ Farming-fishing', '职业_ Handlers-cleaners', '职业_ Machine-op-inspct', '职业_ Other-service', '职业_ Priv-house-serv', '职业_ Prof-specialty', '职业_ Protective-serv', '职业_ Sales', '职业_ Tech-support', '职业_ Transport-moving', '收入_ <=50K', '收入_ >50K']
特征形态: (32561, 46) 标签形态: (32561,)
模型得分:1.00
Process finished with exit code 0
2022-04-26 09_24_29-MessageCenterUI.png
2022-04-26 09_24_12-MessageCenterUI.png
2022-04-26 09_16_20-MessageCenterUI.png
网友评论