python 实现样本抽样

作者: samYau | 来源:发表于2017-03-24 13:41 被阅读0次

单纯随机抽样


    def  __randomSampling(self,df_index,scale):
        try:
            len_df_index = len(df_index)
            df_index_choice = np.random.choice(df_index, int( len_df_index* scale),p=[1/(len_df_index)]*len_df_index,replace=False)   
            # print (df_index_choice)
            return df_index_choice
        except Exception as e:
             print (e)
             return None


    def RandomSampling(self,scale):
        """随机抽样

        """
        df_choice_index = self.__randomSampling(self.df.index,scale)
        df_choice = self.df.iloc[df_choice_index,:]
        df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
        return (df_choice,df_not_choice)

重复随机抽样


    def __repetitionRandomSampling(self,df_index,scale):
        try:
            df_index_choice=df_index[np.random.randint(0,len(df_index)-1,size=int(len(df_index) * scale))]  
            return df_index_choice
        except Exception as e:
            print (e)
            return None

    def RepetitionRandomSampling(self,scale):
        """重复随机抽样
        """

        df_choice_index = self.__repetitionRandomSampling(self.df.index,scale)
        df_choice = self.df.iloc[df_choice_index,:]
        df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
        return (df_choice,df_not_choice)

系统抽样


    def __systematicSampling(self,df_index,scale):
        df_index_choice = []
        try:
            len_df_index = len(df_index)
            len_choice = int(len_df_index * scale)
            index = 0
            k = 1/scale
            while len(df_index_choice)<len_choice:
                df_index_choice.append(df_index[int(0+index*k) % len_df_index])
                index  = index + 1 
            return df_index_choice
        except Exception as e:
            print (e)
            return None

    def SystematicSampling(self,scale):
        """系统抽样

        """

        df_choice_index = self.__systematicSampling(self.df.index,scale)
        df_choice = self.df.iloc[df_choice_index,:]
        df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
        return (df_choice,df_not_choice)

分层抽样
先按对观察指标影响较大的某种特征，将总体分为若干个类别，再从每一层内按上述抽样方法抽取一定比例的观察单位，合起来组成样本。


    def StratifiedSampling(self,sampling_type,scale):
        """分层抽取样本

        Args:
            sampling_type: 随机类型，仅支持 rs，rrs，ss，分别是随机抽样，重复随机抽样，系统抽样
            scale：抽取样本比例，值域为 (0,1)

        """
        df_choice = None
        df_values = list(set(self.df_col[0].values))
        for i in range(len(df_values)):
            df_index = self.df_col[self.df_col[0]==df_values[i]].index
            if sampling_type == 'rs':
                df_choice_index = self.__randomSampling(df_index,scale)
            elif sampling_type == 'rrs':
                df_choice_index = self.__repetitionRandomSampling(df_index,scale)
            elif sampling_type == 'ss':
                df_choice_index = self.__systematicSampling(df_index,scale)
            else :
                raise Exception('不支持的随机类型。')
            if df_choice is None:
                df_choice = self.df.iloc[df_choice_index]
            else:
                df_temp = self.df.iloc[df_choice_index]
                df_choice=df_choice.append(df_temp)


        df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
        return (df_choice,df_not_choice)

网友评论

本文标题：python 实现样本抽样

本文链接：https://www.haomeiwen.com/subject/qqqfottx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

python 实现样本抽样

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读