美文网首页
2018-12-04-机器学习作业-降维

2018-12-04-机器学习作业-降维

作者: HollyMeng | 来源:发表于2018-12-04 15:41 被阅读0次

    数据集:https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html
    题目:

    image.png
    注:第三题若产生不连通图,就增加最近邻的数量,对数据集1,为6-NN

    代码:

    %% AML-2018作业——降维
    clear all;clc;
    data_train=importdata('./two_datasets/sonar-train.txt');
    data_test=importdata('./two_datasets/sonar-test.txt');
    
    % data_train=importdata('./two_datasets/splice-train.txt');
    % data_test=importdata('./two_datasets/splice-test.txt');
    
    [m_train,n_train]=size(data_train);
    [m_test,n_test]=size(data_test);
    x_train=data_train(:,1:n_train-1);
    x_test=data_test(:,1:n_test-1);
    label_train=data_train(:,n_train);
    label_test=data_test(:,n_test);
    
    %% PCA
    KNN=1;
    for k=10:10:30 % embed dimension
        k
        W=PCA_cm(x_train,k);
    %     W=pca(x_train);  % 自带pca函数效果比自己写的差!
        Z_train=x_train*W;
        Z_test=x_test*W;
        accuracy_PCA(k/10)=Evaluate_acc(Z_train,Z_test,label_train,label_test,KNN);
    end
    
    %% SVD
    KNN=1;
    for k=10:10:30
        Dk_train=svd_cm(x_train,k);
        Dk_test=svd_cm(x_test,k);
        accuracy_SVD(k/10)=Evaluate_acc(Dk_train,Dk_test,label_train,label_test,KNN);
    end
    
    %% ISOMAP
    KNN=6;
    X=[x_train;x_test];
    for k=10:10:30
        Iso=ISOMAP(X,KNN,k);
        I_train=Iso(1:m_train,:);
        I_test=Iso(m_train+1:m_train+m_test,:);
        accuracy_ISOMAP(k/10)=Evaluate_acc(I_train,I_test,label_train,label_test,1);
    end
    
    function U=PCA_cm(X,k)
    [m,n]=size(X);
    mu=mean(X);
    C=(X'*X)/m-mu'*mu; %相当于去中心化
    [u,s,v]=svd(C);
    U=u(:,1:k);
    
    function Dk=svd_cm(X,k)
    [q,sigma,p]=svd(X);
    D=q(:,1:k)*sigma(1:k,1:k)*p(:,1:k)'; %分别取前k列特征向量近似合成原数据
    Dk=D*p(:,1:k);
    
    function Z=ISOMAP(X,KNN,k)
    [m,n]=size(X);
    dist=zeros(m,m);
    for i=1:m
        for j=i+1:m
            dist(i,j)=pdist([X(i,:);X(j,:)],'minkowski',2);
            dist(j,i)=dist(i,j);
        end
        dist(i,i)=10000;
    end
    %%
    s=[];
    t=[];
    w=[];
    l=1;
    for i=1:m
        for j=1:KNN
            index=find(dist(i,:)==min(dist(i,:)));
            s(l)=i;
            t(l)=index;      
            w(l)=dist(i,index);
            l=l+1;
            s(l)=index;
            t(l)=i;      
            w(l)=w(l-1);
            l=l+1;
            dist(i,index)=1000;
        end
    end
    G = digraph(s,t,w);
    % p = plot(G,'EdgeLabel',G.Edges.Weight);
    dist=zeros(m,m);
    for i=1:m-1
        for j=i+1:m
            [path, d] = shortestpath(G,i,j);
            dist(i,j)=d;
            dist(j,i)=d;
        end
    end
    % highlight(p, path,'EdgeColor','red')
    %% MDS
    dist_2=dist.^2;
    dist_i2=mean(dist_2);
    dist_j2=mean(dist_2,2);
    dist_22=sum(sum(dist_2))/(m*m);
    B=zeros(m,m);
    for i=1:m
        for j=1:m
            B(i,j)=-0.5*(dist_2(i,j)-dist_i2(i)-dist_j2(j)+dist_22);
        end
    end
    
    [u,sigma,v]=svd(B);
    v_=v(:,1:k);
    sigma_=sigma(1:k,1:k);
    Z=v_*sqrt(sigma_);
    
    % 1NN
    % K-NN
    function accuracy=Evaluate_acc(Z_train,Z_test,label_train,label_test,KNN)
    predict=zeros(size(label_test));
    [m_train,~]=size(Z_train);
    [m_test,~]=size(Z_test);
    
    for i=1:m_test
        dist=zeros(1,m_train);
        for j=1:m_train
            dist(j)=pdist([Z_test(i,:);Z_train(j,:)],'minkowski',2);
        end
        dist_label=[dist',label_train];
        dist_sort=sortrows(dist_label,1); %如果是-1代表对第一列按降序排序
        if KNN==1
            predict(i)=dist_sort(1,2);
        else
            vec=dist_sort(1:KNN,2);
            result=tabulate(vec);
            if result(1,3)==100
                predict(i)=result(1,1);
            else
                result_sort=sortrows(result,-2);
                predict(i)=result_sort(1,1);
            end
        end
    end
    accuracy=sum(label_test==predict)/m_test;
    
    1. PCA
      实验结果:accuracy
      数据集1:[0.5825 0.5631 0.5631]
      数据集2:[ 0.7582 0.7628 0.7356]

    2. SVD
      实验结果:accuracy
      数据集1:[0.6505 0.6019 0.6408]
      数据集2:[ 0.5425 0.5113 0.5195]

    3.ISOMAP
    实验结果:accuracy
    数据集1:[0.4563 0.4272 0.4175]
    数据集2:未做,有bug

    相关文章

      网友评论

          本文标题:2018-12-04-机器学习作业-降维

          本文链接:https://www.haomeiwen.com/subject/guzmcqtx.html