美文网首页
2022-06-08-特征处理

2022-06-08-特征处理

作者: 破阵子沙场秋点兵 | 来源:发表于2022-06-08 11:20 被阅读0次

    删除异常值

    def outliers_proc(data, col_name, scale=3):
        """
        用于清洗异常值,默认用 box_plot(scale=3)进行清洗
        :param data: 接收 pandas 数据格式
        :param col_name: pandas 列名
        :param scale: 尺度
        :return:
        """
    
        def box_plot_outliers(data_ser, box_scale):
            """
            利用箱线图去除异常值
            :param data_ser: 接收 pandas.Series 数据格式
            :param box_scale: 箱线图尺度,
            :return:
            """
            iqr = box_scale * (data_ser.quantile(0.75) - data_ser.quantile(0.25))
            val_low = data_ser.quantile(0.25) - iqr
            val_up = data_ser.quantile(0.75) + iqr
            rule_low = (data_ser < val_low)
            rule_up = (data_ser > val_up)
            return (rule_low, rule_up), (val_low, val_up)
    
        data_n = data.copy()
        data_series = data_n[col_name]
        rule, value = box_plot_outliers(data_series, box_scale=scale)
        index = np.arange(data_series.shape[0])[rule[0] | rule[1]]
        print("Delete number is: {}".format(len(index)))
        data_n = data_n.drop(index)
        data_n.reset_index(drop=True, inplace=True)
        print("Now column number is: {}".format(data_n.shape[0]))
        index_low = np.arange(data_series.shape[0])[rule[0]]
        outliers = data_series.iloc[index_low]
        print("Description of data less than the lower bound is:")
        print(pd.Series(outliers).describe())
        index_up = np.arange(data_series.shape[0])[rule[1]]
        outliers = data_series.iloc[index_up]
        print("Description of data larger than the upper bound is:")
        print(pd.Series(outliers).describe())
        
        fig, ax = plt.subplots(1, 2, figsize=(10, 7))
        sns.boxplot(y=data[col_name], data=data, palette="Set1", ax=ax[0])
        sns.boxplot(y=data_n[col_name], data=data_n, palette="Set1", ax=ax[1])
        return data_n
    

    相关文章

      网友评论

          本文标题:2022-06-08-特征处理

          本文链接:https://www.haomeiwen.com/subject/ktlumrtx.html