美文网首页
python 实现样本抽样

python 实现样本抽样

作者: samYau | 来源:发表于2017-03-24 13:41 被阅读0次
    • 单纯随机抽样
    
        def  __randomSampling(self,df_index,scale):
            try:
                len_df_index = len(df_index)
                df_index_choice = np.random.choice(df_index, int( len_df_index* scale),p=[1/(len_df_index)]*len_df_index,replace=False)   
                # print (df_index_choice)
                return df_index_choice
            except Exception as e:
                 print (e)
                 return None
    
    
        def RandomSampling(self,scale):
            """随机抽样
    
            """
            df_choice_index = self.__randomSampling(self.df.index,scale)
            df_choice = self.df.iloc[df_choice_index,:]
            df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
            return (df_choice,df_not_choice)
    
    • 重复随机抽样
    
        def __repetitionRandomSampling(self,df_index,scale):
            try:
                df_index_choice=df_index[np.random.randint(0,len(df_index)-1,size=int(len(df_index) * scale))]  
                return df_index_choice
            except Exception as e:
                print (e)
                return None
    
        def RepetitionRandomSampling(self,scale):
            """重复随机抽样
            """
    
            df_choice_index = self.__repetitionRandomSampling(self.df.index,scale)
            df_choice = self.df.iloc[df_choice_index,:]
            df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
            return (df_choice,df_not_choice)
    
    • 系统抽样
    
        def __systematicSampling(self,df_index,scale):
            df_index_choice = []
            try:
                len_df_index = len(df_index)
                len_choice = int(len_df_index * scale)
                index = 0
                k = 1/scale
                while len(df_index_choice)<len_choice:
                    df_index_choice.append(df_index[int(0+index*k) % len_df_index])
                    index  = index + 1 
                return df_index_choice
            except Exception as e:
                print (e)
                return None
    
        def SystematicSampling(self,scale):
            """系统抽样
    
            """
    
            df_choice_index = self.__systematicSampling(self.df.index,scale)
            df_choice = self.df.iloc[df_choice_index,:]
            df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
            return (df_choice,df_not_choice)
    
    • 分层抽样
      先按对观察指标影响较大的某种特征,将总体分为若干个类别,再从每一层内按上述抽样方法抽取一定比例的观察单位,合起来组成样本。
    
        def StratifiedSampling(self,sampling_type,scale):
            """分层抽取样本
    
            Args:
                sampling_type: 随机类型,仅支持 rs,rrs,ss,分别是随机抽样,重复随机抽样,系统抽样
                scale:抽取样本比例,值域为 (0,1)
    
            """
            df_choice = None
            df_values = list(set(self.df_col[0].values))
            for i in range(len(df_values)):
                df_index = self.df_col[self.df_col[0]==df_values[i]].index
                if sampling_type == 'rs':
                    df_choice_index = self.__randomSampling(df_index,scale)
                elif sampling_type == 'rrs':
                    df_choice_index = self.__repetitionRandomSampling(df_index,scale)
                elif sampling_type == 'ss':
                    df_choice_index = self.__systematicSampling(df_index,scale)
                else :
                    raise Exception('不支持的随机类型。')
                if df_choice is None:
                    df_choice = self.df.iloc[df_choice_index]
                else:
                    df_temp = self.df.iloc[df_choice_index]
                    df_choice=df_choice.append(df_temp)
    
    
            df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
            return (df_choice,df_not_choice)
    

    相关文章

      网友评论

          本文标题:python 实现样本抽样

          本文链接:https://www.haomeiwen.com/subject/qqqfottx.html