机器学习技法作业二,Experiment with Bagging Ridge Regression.
13~14题为linear LSSVM算法,代码如下,与RBF kernel的LSSVM不同的只是kernel的计算部分:
import numpy as np
from math import *
import matplotlib.pyplot as plt
def loaddata(file):
f = open(file)
try:
lines = f.readlines()
finally:
f.close()
example_num = len(lines)
dimension = len(lines[0].strip().split())
features = np.zeros((example_num, dimension))
labels = np.zeros((example_num, 1))
features[:,0] = 1
for index, line in enumerate(lines):
item = lines[index].strip().split()
features[index,1:] = [float(feature) for feature in item[0:-1]]
labels[index] = float(item[-1])
return features, labels
def LSSVM(X, Y, gama, lamb):
N = len(Y)
Lamb = np.eye(N) * lamb ##lambda矩阵
K = np.zeros((N,N)) ##初始化K矩阵 N×N
for i in range(N): ##计算K矩阵
for j in range(N):
#K[i,j] = exp(-gama * np.dot((X[i] - X[j]).T, (X[i] - X[j]))) ##RBF
K[i,j] = np.dot(X[i].T, X[j]) ##linear
beta = np.dot(np.linalg.inv(Lamb + K), Y)
return beta
def predict(Xmodel, Xtest, Ytest, beta, gama):
p = np.zeros((len(Ytest),1))
py = np.zeros((len(Ytest),1))
temp = np.zeros((len(Xmodel),1))
for i in range(len(Ytest)): #i--第i个测试数据
for j in range(len(Xmodel)): #j--第j个训练数据
temp[j] = np.dot(Xtest[i].T, Xmodel[j]) #linear
#temp[j] = exp(-gama * np.dot((Xtest[i] - Xmodel[j]).T, (Xtest[i] - Xmodel[j]))) #temp--1~N训练数据与第i个数据求Kernel
p[i] = np.dot(temp.T, beta)
py[i] = [1 if np.dot(temp.T, beta)>=0 else -1] #sigma(1~N):beta(N)*Kenel(xn,x)
accuracy = sum(py == Ytest)/len(Ytest)
return p,accuracy
X, Y = loaddata('hw2_lssvm_all.dat.txt')
gama = 0.125
lamb = 0.01
beta1 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 0.1
beta2 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 1
beta3 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 10
beta4 = LSSVM(X[:400,:], Y[:400], gama, lamb)
lamb = 100
beta5 = LSSVM(X[:400,:], Y[:400], gama, lamb)
p,acc = predict(X[:400,:], X[401:,:], Y[401:], beta3, gama)
##acc = array([0.63636364])
15~16题是LSSVM与Bootstrapping结合,代码如下:
import numpy as np
from math import *
import matplotlib.pyplot as plt
def loaddata(file):
f = open(file)
try:
lines = f.readlines()
finally:
f.close()
example_num = len(lines)
dimension = len(lines[0].strip().split()) #features添加了x0 = 1
features = np.zeros((example_num, dimension))
labels = np.zeros((example_num, 1))
features[:,0] = 1 #初始化features的x0 = 1
for index, line in enumerate(lines):
item = lines[index].strip().split()
features[index,1:] = [float(feature) for feature in item[0:-1]]
labels[index] = float(item[-1])
return features, labels
class Bagging(object):
def __init__(self, interation, X, Y): ##X, Y为训练数据,为了引入训练数据维度
self.__inter = interation
self.__beta = np.zeros((interation, len(Y), 1)) ##保存gt的参数beta
self.__Xbar = np.zeros((interation, len(Y), X.shape[1])) ##保存bootstrap采样得到的训练数据
self.__Ybar = np.zeros((interation, len(Y), 1))
def Bootstrap(self, X, Y): ##进行bootstrap采样,共进行self.__inter次
N = len(Y)
for inter in range(self.__inter):
for i in range(N): ##进行N次放回式随机采样
row = np.random.randint(N)
self.__Xbar[inter,i,:] = X[row,:]
self.__Ybar[inter,i,:] = Y[row,:]
def train(self, lamb): ##进行训练
N = self.__Ybar.shape[1]
Lamb = np.eye(N) * lamb ##lambda矩阵
for inter in range(self.__inter): ##训练interation次
K = np.zeros((N,N)) ##初始化K矩阵 N×N
for i in range(N): ##计算K矩阵
for j in range(N):
#K[i,j] = exp(-gama * np.dot((X[i] - X[j]).T, (X[i] - X[j]))) ##RBF
K[i,j] = np.dot(self.__Xbar[inter,i], self.__Xbar[inter,j].T) ##linear
self.__beta[inter] = np.dot(np.linalg.inv(Lamb + K), self.__Ybar[inter]) ##根据公式计算训练参数beta
def predict(self, Xtest, Ytest): ##进行预测,Ein,Eout均可
py = np.zeros((len(Ytest), 1)) ##初始化预测结果数组
for inter in range(self.__inter):
Xmodel = self.__Xbar[inter] ##读入第inter次bootstrap采样数据
temp = np.zeros((len(Xmodel),1))
for i in range(len(Ytest)): #i--第i个测试数据
for j in range(len(Xmodel)): #j--第j个训练数据
temp[j] = np.dot(Xtest[i], Xmodel[j].T) #linear
#temp[j] = exp(-gama * np.dot((Xtest[i] - Xmodel[j]).T, (Xtest[i] - Xmodel[j]))) #temp--1~N训练数据与第i个数据求Kernel
py[i,:] = py[i,:] + [1 if np.dot(temp.T, self.__beta[inter])>=0 else -1] ##将interation个hypotheses预测的结果累加,针对第i个test数据
for i in range(len(py)): ##voting,累加interation个假设函数的预测值,大于0说明预测为+1的函数较多,小于0相反
if py[i, :] >= 0:
py[i, :] = 1
else:
py[i, :] = -1
accuracy = sum(py == Ytest)/len(Ytest) ##计算预测精度
return py, accuracy
def get_Xbar(self):
return self.__Xbar
def get_Ybar(self):
return self.__Ybar
def get_beta(self):
return self.__beta
X, Y = loaddata('hw2_lssvm_all.dat.txt')
Bag = Bagging(250, X[:400,:], Y[:400]) ##初始化Bootstrap对象
Bag.Bootstrap(X[:400,:], Y[:400]) ##进行Bootstrap采样
Bag.train(0.01) ##进行训练
py_in, acc_in = Bag.predict(X[:400,:], Y[:400]) ##预测Ein
py_out,acc_out = Bag.predict(X[400:,:], Y[400:]) ##预测Eout
##acc_in = array([0.68])
##acc_out = array([0.63636364])
##发现lambda对结果的影响较小
网友评论