美文网首页
sklearn层次聚类对种子特性数据的分析

sklearn层次聚类对种子特性数据的分析

作者: 一路向后 | 来源:发表于2021-06-10 22:30 被阅读0次

    1.数据文件seeds-less-rows.csv

    area,perimeter,compactness,length,width,asymmetry_coefficient,groove_length,grain_variety
    14.88,14.57,0.8811,5.5539999999999985,3.333,1.018,4.956,Kama
    14.69,14.49,0.8799,5.563,3.259,3.5860000000000003,5.2189999999999985,Kama
    14.03,14.16,0.8796,5.438,3.201,1.717,5.001,Kama
    13.99,13.83,0.9183,5.119,3.383,5.234,4.7810000000000015,Kama
    14.11,14.26,0.8722,5.52,3.168,2.688,5.2189999999999985,Kama
    13.02,13.76,0.8641,5.395,3.026,3.373,4.825,Kama
    15.49,14.94,0.8724,5.7570000000000014,3.3710000000000004,3.412,5.228,Kama
    16.2,15.27,0.8734,5.8260000000000005,3.464,2.823,5.527,Kama
    13.5,13.85,0.8852,5.351,3.158,2.249,5.176,Kama
    15.36,14.76,0.8861,5.7010000000000005,3.393,1.367,5.1320000000000014,Kama
    15.78,14.91,0.8923,5.674,3.434,5.593,5.136,Kama
    14.46,14.35,0.8818,5.388,3.377,2.802,5.044,Kama
    11.23,12.63,0.884,4.902,2.879,2.269,4.703,Kama
    14.34,14.37,0.8726,5.63,3.19,1.3130000000000002,5.15,Kama
    16.84,15.67,0.8623,5.997999999999998,3.484,4.675,5.877000000000002,Rosa
    17.32,15.91,0.8599,6.064,3.403,3.824,5.9220000000000015,Rosa
    18.72,16.19,0.8977,6.006,3.857,5.324,5.879,Rosa
    18.88,16.26,0.8969,6.084,3.764,1.649,6.109,Rosa
    18.76,16.2,0.8984,6.1720000000000015,3.796,3.12,6.053,Rosa
    19.31,16.59,0.8815,6.341,3.81,3.477,6.238,Rosa
    17.99,15.86,0.8992,5.89,3.694,2.068,5.8370000000000015,Rosa
    18.85,16.17,0.9056,6.152,3.806,2.843,6.2,Rosa
    19.38,16.72,0.8716,6.303,3.791,3.678,5.965,Rosa
    18.96,16.2,0.9077,6.051,3.897,4.334,5.75,Rosa
    18.14,16.12,0.8772,6.059,3.563,3.619,6.011,Rosa
    18.65,16.41,0.8698,6.285,3.594,4.391,6.102,Rosa
    18.94,16.32,0.8942,6.144,3.825,2.908,5.949,Rosa
    17.36,15.76,0.8785,6.145,3.574,3.526,5.971,Rosa
    13.32,13.94,0.8613,5.541,3.073,7.035,5.44,Canadian
    11.43,13.13,0.8335,5.176,2.719,2.221,5.1320000000000014,Canadian
    12.01,13.52,0.8249,5.405,2.776,6.992000000000001,5.27,Canadian
    11.34,12.87,0.8596,5.053,2.849,3.347,5.003,Canadian
    12.02,13.33,0.8503,5.35,2.81,4.271,5.308,Canadian
    12.44,13.59,0.8462,5.319,2.897,4.924,5.27,Canadian
    11.55,13.1,0.8455,5.167000000000002,2.845,6.715,4.956,Canadian
    11.26,13.01,0.8355,5.186,2.71,5.335,5.092,Canadian
    12.46,13.41,0.8706,5.2360000000000015,3.017,4.987,5.147,Canadian
    11.81,13.45,0.8198,5.4129999999999985,2.716,4.898,5.352,Canadian
    11.27,12.86,0.8563,5.091,2.804,3.985,5.001,Canadian
    12.79,13.53,0.8786,5.224,3.054,5.483,4.958,Canadian
    12.67,13.32,0.8977,4.984,3.135,2.3,4.745,Canadian
    11.23,12.88,0.8511,5.14,2.795,4.325,5.003,Canadian
    

    2.源码实现

    import pandas as pd
    from scipy.cluster.hierarchy import linkage, dendrogram
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator
    
    # 读取数据集
    seeds_df = pd.read_csv('./seeds-less-rows.csv')
    
    print(seeds_df.head())
    print(seeds_df.grain_variety.value_counts())
    
    # 去除标识行及类别行
    varieties = list(seeds_df.pop('grain_variety'))
    samples = seeds_df.values
    
    # 进行层次聚类
    mergings = linkage(samples, method='complete')
    
    # 树状图结果
    plt.figure(figsize=(10,8), dpi=80)
    ax = plt.subplot(111)
    dendrogram(mergings, labels=varieties, leaf_rotation=90, leaf_font_size=10)
    yminorLocator = MultipleLocator(0.1)
    ax.yaxis.set_minor_locator(yminorLocator)
    
    plt.savefig("1.png")
    

    3.运行及其结果

    $ python3 example.py
        area  perimeter  compactness  length  width  asymmetry_coefficient  \
    0  14.88      14.57       0.8811   5.554  3.333                  1.018   
    1  14.69      14.49       0.8799   5.563  3.259                  3.586   
    2  14.03      14.16       0.8796   5.438  3.201                  1.717   
    3  13.99      13.83       0.9183   5.119  3.383                  5.234   
    4  14.11      14.26       0.8722   5.520  3.168                  2.688   
    
       groove_length grain_variety  
    0          4.956          Kama  
    1          5.219          Kama  
    2          5.001          Kama  
    3          4.781          Kama  
    4          5.219          Kama  
    Rosa        14
    Canadian    14
    Kama        14
    Name: grain_variety, dtype: int64
    
    1.png

    相关文章

      网友评论

          本文标题:sklearn层次聚类对种子特性数据的分析

          本文链接:https://www.haomeiwen.com/subject/tsnpeltx.html