1.连续变量离散化后重新编码
def replace_woe(series,cut,woe):
list=[]
i=0
while i<len(series):
value=series[i]
j=len(cut)-2
m=len(cut)-2
while j>=0:
if value>cut[j]: # 有的代码写的是“>=”,但实际转换后发现有的变量出现错误
j=-1
else:
j -=1
m -= 1
list.append(woe[m])
i += 1
df=df.reset_index(drop=True,inplace=False) #不能遗忘,否则会出错
data[变量名']=Series(replace_woe(data['变量名'], cutx1(变量对应的分箱), woex1(变量对应的)))
-----------------------------------------------------------------------------------------------
2.分类变量woe转换
def replace_woe2(series,cut,woe):
list=[]
i=0
while i<len(series):
value=series[i]
j=len(cut)-1
m=len(cut)-1
while j>=0:
if value==cut[j]:
j=-1
else:
j-=1
m-= 1
list.append(woe[m])
i+= 1
return list
网友评论