以下是李航的《统计学习方法》P50 例题的python实现,分为贝叶斯估计和极大似然估计。
import pandas as pd
import numpy as np
from collections import Counter
#分贝叶斯估计与极大似然估计两种计算结果
class MLE():
def Bayesian_estimation(self,np_arr):
#print(np_arr)
#已知类标记为最后一列
list_y_Ck = list(np_arr[-1])
#赋值给y_Ck变量并去重
y_Ck = list(set(np_arr[-1]))
#print(y_Ck)
y_Count = len(list_y_Ck)
#分类数
#print(y_Count)
y_dict = {}
for i in y_Ck:
y_dict[i] = list_y_Ck.count(i)/y_Count
#print(y_dict)
yx_CNT = []
for i in y_dict.keys():
y_Ck_CNT = list_y_Ck.count(i)
#print(y_Ck_CNT)
for j in range(0,np_arr.shape[0]-1):
list_Temp = list(np_arr[j])
list_Temp_new = []
for k in range(len(list_Temp)):
if list(np_arr[-1])[k] == i:
list_Temp_new.append(list_Temp[k])
#print(list_Temp_new)
x_j_count = Counter(list_Temp_new)
yx_CNT.append([i, y_Ck_CNT, j, dict(x_j_count)])
#print(yx_CNT)
for i in yx_CNT:
for j in i[3].keys():
i[3][j] = i[3][j]/i[1]
return y_dict,yx_CNT
def forecast(self,features,path):
data = pd.read_csv(path)
print(data)
np_arr = np.array(data.values)
y_dict,yx_CNT = self.Bayesian_estimation(np_arr.T)
print('1. 极大似然估计法')
print('用于预测的特征值为: {}'.format(features))
yx_p_arr = np.array(yx_CNT)
#print(yx_p_arr)
res1={}
for key in y_dict.keys():
res1[key]=1*y_dict[key]
for i in range(0,len(features)):
res1[key]=res1[key]*yx_p_arr[(yx_p_arr[:,0]==key)&(yx_p_arr[:,2]==i)][:,3][0][features[i]]
print(res1)
return '预测结果为 {}'.format(max(res1,key = res1.get))
class BE():
def Bayesian_estimation(self,np_arr):
#print(np_arr)
#已知类标记为最后一列
list_y_Ck = list(np_arr[-1])
#赋值给y_Ck变量并去重
y_Ck = list(set(np_arr[-1]))
#print(y_Ck)
y_Count = len(list_y_Ck)
#分类数
#print(y_Count)
y_dict = {}
lamda = 1
for i in y_Ck:
y_dict[i] = (list_y_Ck.count(i) + lamda)/(y_Count + len(set(list_y_Ck)))
#print(len(y_dict))
k_ = len(y_dict)
k_lamda = k_ * lamda
yx_CNT = []
for i in y_dict.keys():
y_Ck_CNT = list_y_Ck.count(i)
#print(y_Ck_CNT)
for j in range(0,np_arr.shape[0]-1):
list_Temp = list(np_arr[j])
list_Temp_new = []
for k in range(len(list_Temp)):
if list(np_arr[-1])[k] == i:
list_Temp_new.append(list_Temp[k])
#print(list_Temp_new)
x_j_count = Counter(list_Temp_new)
dit_x_j = dict(x_j_count)
for z in dit_x_j.keys():
dit_x_j[z] = dit_x_j[z] + lamda
yx_CNT.append([i, y_Ck_CNT + lamda, j, dit_x_j])
#print(yx_CNT)
for i in yx_CNT:
for j in i[3].keys():
i[3][j] = i[3][j]/(i[1] + k_lamda)
return y_dict,yx_CNT
def forecast(self,features,path):
data = pd.read_csv(path)
#print(data)
np_arr = np.array(data.values)
y_dict,yx_CNT = self.Bayesian_estimation(np_arr.T)
#print(y_dict)
#print(yx_CNT)
print('2. 贝叶斯估计法')
print('用于预测的特征值为: {}'.format(features))
yx_p_arr = np.array(yx_CNT)
#print(yx_p_arr)
res1={}
for key in y_dict.keys():
res1[key]=1*y_dict[key]
for i in range(0,len(features)):
res1[key]=res1[key]*yx_p_arr[(yx_p_arr[:,0]==key)&(yx_p_arr[:,2]==i)][:,3][0][features[i]]
print(res1)
return '预测结果为 {}'.format(max(res1,key = res1.get))
if __name__ == '__main__':
t = MLE()
path = 'E:\\leetcode\\朴素贝叶斯分类器\\bayes.csv'
features = [2,'S']
print(t.forecast(features,path))
print('\n')
b = BE()
print(b.forecast(features,path))
运算结果为:
x1 x2 Y
0 1 S -1
1 1 M -1
2 1 M 1
3 1 S 1
4 1 S -1
5 2 S -1
6 2 M -1
7 2 M 1
8 2 L 1
9 2 L 1
10 3 L 1
11 3 M 1
12 3 M 1
13 3 L 1
14 3 L -1
1. 极大似然估计法
用于预测的特征值为: [2, 'S']
{1: 0.02222222222222222, -1: 0.06666666666666667}
预测结果为 -1
2. 贝叶斯估计法
用于预测的特征值为: [2, 'S']
{1: 0.0326797385620915, -1: 0.06100217864923746}
预测结果为 -1
蟹蟹
网友评论