本着严谨的科研习惯,先上引用过的文章:
Reference: https://blog.csdn.net/u013337691/article/details/52433492
算法原理: 本质上就是利用bee 算法去寻找好的kernel scale和box constraint,这两个是支持向量机 (SVM)中非常重要的优化参数;而bee算法和蚁群算法, 蝙蝠算法,粒子群算法等遗传进化算法挺像的;cost function就是测试的一组参数在训练数集上十层交叉验证的loss.
算法挺慢的,参数没调,如果有需要读者可以尝试调调。
程序结构就是:
main function + 后面三个调用的函数。
代码如下:
%% main function
%% data
load DataSet
Y = DataSet(:,7); % result 0/1
X = DataSet(:,[1 2 3 4 5 6]); %features
% train:test ; %70:%30
n_train= 0.7*length(Y);
n_test = 0.3*length(Y);
rand_num = randperm(length(Y));
X_train = X(rand_num(1:n_train),:);
y_train = Y(rand_num(1:n_train),:);
X_test = X(rand_num(n_train:end),:);
y_test = Y(rand_num(n_train:end),:);
%% no matter what your data is, just give it to trainData, trainLabel, testData, testLabel
trainData = X_train;
trainLabel = y_train;
testData = X_test;
testLabel = y_test;
%% parameters for bee algorithm
NP=20; % population
FoodNumber=NP/2; %
limit=100; %
maxCycle=10; %
D=2; % c and g are parameters to be optimized
ub=ones(1,D)*100; % upper bound of parameters
lb=ones(1,D)*(0.01); % lower bound of parameters
runtime=2;
BestGlobalMins=zeros(1,runtime); % objective function: accuracy
BestGlobalParams=zeros(runtime,D); % the best parameters
%% Bee algorithm based SVM
for r=1:runtime
Range = repmat((ub-lb),[FoodNumber 1]);
Lower = repmat(lb, [FoodNumber 1]);
Foods = rand(FoodNumber,D) .* Range + Lower;
ObjVal=zeros(1,FoodNumber);
for k = 1:FoodNumber
ObjVal(k) = cost(Foods(k,:),trainLabel,trainData); % the objective is to minimize the loss
end
Fitness=ObjVal;
trial=zeros(1,FoodNumber);
% remember the index of minimal loss
BestInd=find(ObjVal==min(ObjVal));
BestInd=BestInd(end);
GlobalMin=ObjVal(BestInd); % update the optimal objective value
GlobalParams=Foods(BestInd,:); % update the best paras
iter=1;
while ((iter <= maxCycle))
for i=1:(FoodNumber)
Param2Change=fix(rand*D)+1;
neighbour=fix(rand*(FoodNumber))+1;
while(neighbour==i)
neighbour=fix(rand*(FoodNumber))+1;
end
sol=Foods(i,:);
% v_{ij}=x_{ij}+\phi_{ij}*(x_{kj}-x_{ij})
sol(Param2Change)=Foods(i,Param2Change)+(Foods(i,Param2Change)-Foods(neighbour,Param2Change))*(rand-0.5)*2;
ind=find(sol<lb);
sol(ind)=lb(ind);
ind=find(sol>ub);
sol(ind)=ub(ind);
% after variation
ObjValSol=cost(sol,trainLabel,trainData);
FitnessSol=ObjValSol;
% update current info
if (FitnessSol>Fitness(i))
Foods(i,:)=sol;
Fitness(i)=FitnessSol;
ObjVal(i)=ObjValSol;
trial(i)=0;
else
trial(i)=trial(i)+1;
end
end
% calculate the probability
prob=(0.9.*Fitness./max(Fitness))+0.1;
%
i=1;
t=0;
while(t<FoodNumber)
if(rand<prob(i))
t=t+1;
Param2Change=fix(rand*D)+1;
neighbour=fix(rand*(FoodNumber))+1;
while(neighbour==i)
neighbour=fix(rand*(FoodNumber))+1;
end
sol=Foods(i,:);
% v_{ij}=x_{ij}+\phi_{ij}*(x_{kj}-x_{ij})
sol(Param2Change)=Foods(i,Param2Change)+(Foods(i,Param2Change)-Foods(neighbour,Param2Change))*(rand-0.5)*2;
ind=find(sol<lb);
sol(ind)=lb(ind);
ind=find(sol>ub);
sol(ind)=ub(ind);
ObjValSol=cost(sol,trainLabel,trainData);
FitnessSol=ObjValSol;
if (FitnessSol>Fitness(i))
Foods(i,:)=sol;
Fitness(i)=FitnessSol;
ObjVal(i)=ObjValSol;
trial(i)=0;
else
trial(i)=trial(i)+1;
end
end
i=i+1;
if (i==(FoodNumber)+1)
i=1;
end
end
ind=find(ObjVal==min(ObjVal));
ind=ind(end);
if (ObjVal(ind)<GlobalMin)
GlobalMin=ObjVal(ind);
GlobalParams=Foods(ind,:);
end
ind=find(trial==max(trial));
ind=ind(end);
if (trial(ind)>limit)
Bas(ind)=0;
sol=(ub-lb).*rand(1,D)+lb;
ObjValSol=cost(sol,trainLabel,trainData);
FitnessSol=ObjValSol;
Foods(ind,:)=sol;
Fitness(ind)=FitnessSol;
ObjVal(ind)=ObjValSol;
end
iter=iter+1;
end
BestGlobalMins(r)=GlobalMin;
BestGlobalParams(r,:)=GlobalParams;
end
%% The final result
bestc=GlobalParams(1);
bestg=GlobalParams(2);
str=sprintf('Best c = %g,Best g = %g',bestc,bestg);
disp(str)
%% train with the chosen params
model_cs_svr=fitcsvm(trainData,trainLabel,'Standardize',true,...
'KernelFunction','rbf','BoxConstraint',bestc,'KernelScale',bestg);
% test
test_accuracy_opt = sum((predict(model_cs_svr,testData) == testLabel))/length(testData)*100;
disp('test accuracy is:');
disp(test_accuracy_opt);
%% svm train
% x:
% train_data: training set
% trainlabel: label of train dataset
% output: accuracy
%%
function [train_loss] = svm_train(x, train_label, train_data)
%% Get cost and gamma
[cost, gamma] = adjust_cost_gamma (x(1),x(2));
%% Set SVM parameters and get performance measure
% #1 for accuracy only, with inner cross-validation:
SVM_Model = fitcsvm(train_data,train_label, 'Standardize',true,...
'KernelFunction','rbf','BoxConstraint',cost,'KernelScale',gamma);
cro_Model = crossval(SVM_Model);
train_loss = kfoldLoss(cro_Model);
end
function o = cost(x, trainLabel, trainData)
o = svm_train(x, trainLabel, trainData);
end
function [adjusted_cost,adjusted_gamma] = adjust_cost_gamma (cost, gamma)
%% EQUATION: New value(adjusted) = (((Old Value - Old Min) * New Range) / Old Range) + New Min
%% Old range for both Cost and Gamma = (Old Max - Old Min)
old_range = (1 - 0.01);
old_min = 0.01;
%% Value of new Cost -- Adjust new minimum & new maximum cost here
new_min_cost = 0.01;
new_max_cost = 35000.0;
new_range_cost = (new_max_cost - new_min_cost);
adjusted_cost = ...
(((cost - old_min) * new_range_cost) / old_range) + new_min_cost;
%% Value of new Gamma -- Adjust new minimum & new maximum gamma here
new_min_gamma = 0.0001;
new_max_gamma = 32.0;
new_range_gamma = (new_max_gamma - new_min_gamma);
adjusted_gamma = ...
(((gamma - old_min) * new_range_gamma) / old_range) + new_min_gamma;
网友评论