美文网首页
泰坦尼克号海难幸存状况分析

泰坦尼克号海难幸存状况分析

作者: b485c88ab697 | 来源:发表于2017-09-10 20:05 被阅读185次

    matplotlib可视化练习

    %matplotlib inline
    import matplotlib as mpl
    from matplotlib import pyplot as plt
    import seaborn as sns
    import pandas as pd
    import numpy as np
    

    练习4:泰坦尼克号海难幸存状况分析

    • 不同仓位等级中幸存和遇难的乘客比例(堆积柱状图)
    • 不同性别的幸存比例(堆积柱状图)
    • 幸存和遇难乘客的票价分布(分类箱式图)
    • 幸存和遇难乘客的年龄分布(分类箱式图)
    • 不同上船港口的乘客仓位等级分布(分组柱状图)
    • 幸存和遇难乘客堂兄弟姐妹的数量分布(分类箱式图)
    • 幸存和遇难乘客父母子女的数量分布(分类箱式图)
    • 单独乘船与否和幸存之间有没有联系(堆积柱状图或者分组柱状图)
    data = sns.load_dataset("titanic")
    data.head()
    # 幸存与否,仓位等级,性别,年龄,堂兄弟姐妹数,父母子女数,票价,上船港口缩写,仓位等级,人员分类,是否成年男性,所在甲板,上船港口,是否幸存,是否单独乘船
    
    Paste_Image.png
    survived_pclass= data[['survived', 'pclass']].groupby(['survived','pclass']).size()
    survived_pclass = survived_pclass.unstack(level=0)
    survived_pclass
    
    Paste_Image.png
    survived_pclass['total'] = survived_pclass[0] + survived_pclass[1]
    survived_pclass['yes_prop'] = survived_pclass[1] / survived_pclass['total']
    survived_pclass['no_prop'] = survived_pclass[0] / survived_pclass['total']
    survived_pclass
    
    Paste_Image.png
    # 绘制堆积柱状图
    def stackedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
        _, ax = plt.subplots()
        # 循环绘制堆积柱状图
        for i in range(0, len(y_data_list)):
            if i == 0:
                ax.bar(x_data, y_data_list[i], color = colors[i], align = 'center', label = y_data_names[i])
            else:
                # 采用堆积的方式,除了第一个分类,后面的分类都从前一个分类的柱状图接着画
                # 用归一化保证最终累积结果为1
                ax.bar(x_data, y_data_list[i], color = colors[i], bottom = y_data_list[i - 1], align = 'center', label = y_data_names[i])
        ax.set_ylabel(y_label)
        ax.set_xlabel(x_label)
        ax.set_title(title)
        ax.legend(loc = 'upper right') # 设定图例位置
    
    # 调用绘图函数
    stackedbarplot(x_data = survived_pclass.index.values
                   , y_data_list = [survived_pclass['yes_prop'], survived_pclass['no_prop']]
                   , y_data_names = ['Survived', 'Not survived']
                   , colors = ['#539caf', '#7663b0']
                   , x_label = 'pclass'
                   , y_label = 'Survival rate'
                   , title = 'pclass and Survival Rate')
    
    output_60_0.png

    不同性别幸存比例

    sex_survived=data.groupby(['sex','survived']).size().unstack()
    sex_survived
    
    Paste_Image.png
    sex_survived['sum']=sex_survived[0]+sex_survived[1]
    sex_survived['yes_prop']=sex_survived[1]/sex_survived['sum']
    sex_survived['no_prop']=sex_survived[0]/sex_survived['sum']
    sex_survived
    
    Paste_Image.png
    # 调用绘图函数
    stackedbarplot(x_data = [0,1]
                   , y_data_list = [sex_survived['yes_prop'], sex_survived['no_prop']]
                   , y_data_names = ['Survived', 'Not survived']
                   , colors = ['#539caf', '#7663b0']
                   , x_label = 'Sex'
                   , y_label = 'Survival rate'
                   , title = 'Sex and Survival Rate')
    
    output_64_0.png

    幸存和遇难乘客的票价分布(分类箱式图)

    def boxplot(x_data, y_data, base_color, median_color, x_label, y_label, title):
        _, ax = plt.subplots()
        ax.boxplot(y_data
                   # 箱子是否颜色填充
                   , patch_artist = True
                   # 中位数线颜色
                   , medianprops = {'color': base_color}
                   # 箱子颜色设置,color:边框颜色,facecolor:填充颜色
                   , boxprops = {'color': base_color, 'facecolor': median_color}
                   # 猫须颜色whisker
                   , whiskerprops = {'color': median_color}
                   # 猫须界限颜色whisker cap
                   , capprops = {'color': base_color})
        # 箱图与x_data保持一致
        ax.set_xticklabels(x_data)
        ax.set_ylabel(y_label)
        ax.set_xlabel(x_label)
        ax.set_title(title)
    
    survived=data['survived'].unique()
    bp_data=[data[data['survived']==survived[0]]['fare'],data[data['survived']==survived[1]]['fare']]
    
    # 调用绘图函数
    boxplot(x_data = survived
            , y_data = bp_data
            , base_color = 'b'
            , median_color = 'r'
            , x_label = 'survived'
            , y_label = 'fare'
            , title = 'Distribution of Fare By Survived')
    
    output_67_0.png

    幸存和遇难乘客的年龄分布(分类箱式图)

    data['age'].fillna(0,inplace=True)
    survived=data['survived'].unique()
    bp_data=[data[data['survived']==survived[0]]['age'],data[data['survived']==survived[1]]['age']]
    # 调用绘图函数
    boxplot(x_data=survived
            , y_data = bp_data
            , base_color = 'b'
            , median_color = 'r'
            , x_label = 'survived'
            , y_label = 'age'
            , title = 'Distribution of Age By Survived')
    
    output_69_0.png

    不同上船港口的乘客仓位等级分布(分组柱状图)

    embark_pclass=data.groupby(['embark_town','pclass']).size().unstack()
    embark_pclass.fillna(0,inplace=True)
    embark_pclass
    
    Paste_Image.png
    pclass_list=[embark_pclass.iloc[:,0],embark_pclass.iloc[:,1],embark_pclass.iloc[:,2]]
    pclass_list
    
    [embark_town
     Cherbourg       85
     Queenstown       2
     Southampton    127
     Name: 1, dtype: int64, embark_town
     Cherbourg       17
     Queenstown       3
     Southampton    164
     Name: 2, dtype: int64, embark_town
     Cherbourg       66
     Queenstown      72
     Southampton    353
     Name: 3, dtype: int64]
    
    # 绘制分组柱状图的函数
    def groupedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label,title):
        _, ax = plt.subplots()
        # 设置每一组柱状图的宽度
        total_width = 0.8
        # 设置每一个柱状图的宽度
        ind_width = total_width / len(y_data_list)
        # 计算每一个柱状图的中心偏移
        alteration = np.arange(-total_width/2+ind_width/2, total_width/2+ind_width/2, ind_width)
    
        # 分别绘制每一个柱状图
        for i in range(0, len(y_data_list)):
            # 横向散开绘制
            ax.bar(x_data + alteration[i], y_data_list[i], color = colors[i], label = y_data_names[i], width = ind_width)
        ax.set_ylabel(y_label)
        ax.set_xlabel(x_label)
        ax.set_title(title)
        ax.legend(loc = 'upper right')
    
    
    # 调用绘图函数
    groupedbarplot(x_data = range(3)
                   , y_data_list = pclass_list
                   , y_data_names = embark_pclass.columns
                   , colors = ['#539caf', '#7663b0','#00ff00']
                   , x_label = 'embark_town'
                   , y_label = 'counts of pclass'
                   ,title = 'Counts of Pclass vs Embark Town')
    
    ax=plt.gca()
    ax.set_xticks(range(3))
    ax.set_xticklabels(embark_pclass.index.values)
    
    output_74_1.png

    幸存和遇难乘客堂兄弟姐妹的数量分布(分类箱式图)

    survived=data['survived'].unique()
    bp_data=[data[data['survived']==survived[0]]['sibsp'],data[data['survived']==survived[1]]['sibsp']]
    # 调用绘图函数
    boxplot(x_data=survived
            , y_data = bp_data
            , base_color = 'b'
            , median_color = 'r'
            , x_label = 'survived'
            , y_label = 'sibsp'
            , title = 'Distribution of Sibsp By Survived')
    
    output_76_0.png

    幸存和遇难乘客父母子女的数量分布(分类箱式图)

    survived=data['survived'].unique()
    bp_data=[data[data['survived']==survived[0]]['parch'],data[data['survived']==survived[1]]['parch']]
    # 调用绘图函数
    boxplot(x_data=survived
            , y_data = bp_data
            , base_color = 'b'
            , median_color = 'r'
            , x_label = 'survived'
            , y_label = 'parch'
            , title = 'Distribution of Parch By Survived')
    
    output_78_0.png

    单独乘船与否和幸存之间有没有联系(堆积柱状图或者分组柱状图)

    alone_survived=data.groupby(['alone','survived']).size().unstack()
    alone_survived
    
    Paste_Image.png
    _, ax = plt.subplots()
    width=0.4
    index=alone_survived.index.values
    ax.bar(index, alone_survived[0], color = '#ff0000', label = 'Not survived', width = width)
    ax.bar(index+width, alone_survived[1], color = '#00ff00', label = 'Survived', width = width)
    
    ax.set_ylabel('numbers of People')
    ax.set_xlabel('alone')
    ax.set_title('People Survived vs Alone')
    ax.legend(loc = 'upper right')
    plt.xticks(index+width,index)
    
    output_81_1.png

    相关文章

      网友评论

          本文标题:泰坦尼克号海难幸存状况分析

          本文链接:https://www.haomeiwen.com/subject/ldutsxtx.html