gusucode.com > ​用mushrooms数据对模式识别课程讲述的各种模式分类方法matlab源码程序 > pattern-recognition-simulation/pca_bayes.m

    clc;
clear;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%读取数据,取16个特征
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
samples = textread('data2000.txt');
samples = samples(:,[1:6,9:15,19:22]);  %17列 第1列标号,16列特征

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%对样本进行归一化处理
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[ms ns]=size(samples);
TMax=max(samples);
TMin=min(samples);
% 第一列是样本标签,从第二列开始归一化
for i=2:ns
    samples(:,i)=(samples(:,i)-TMin(i))/(TMax(i)-TMin(i));
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%对样本进行降维,PCA变换
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

k_reduction=16;%降维后的维数
cov_data=cov(samples(:,2:end));
[pc,latent,explained] = pcacov(cov_data);
pc1=pc(:,1:k_reduction);
pc1=pc1';
y=pc1*samples(:,2:end)';
y = [samples(:,1)';y];
dimenReduct_samples = y';%带标签

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%将样本划分为10份,计算每一份的起始位置,并将结果放入矩阵ten_chong中
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
k = 10;
ten_chong(1,1)=1;
for i=2:k
ten_chong(i,1)=ten_chong(i-1,1)+ms/k;
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%开始k重交叉检验CTG测试,第i次交叉检验时,取第i(1<=i<=k)份样本做测试样本,其他样本为训练样本。
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

test_example=zeros(ms/k,ns);%测试样本
traning_example=zeros(ms/k*(k-1),ns);%训练样本
result_class=zeros(ms/k,1);%分类的结果
p_correct=zeros(k,1);%k次交叉检验的正确率

for i=1:k
        if i==k   
            test_example=dimenReduct_samples(ten_chong(k,1):end,:);%第10次交叉检验的测试样本
            traing_example=dimenReduct_samples(1:(ten_chong(k,1)-1),:);%第10次交叉检验的训练样本
        else
            test_example=dimenReduct_samples(ten_chong(i,1):(ten_chong(i+1,1)-1),:);%第1—9次时的训练样本
            traing_example1=dimenReduct_samples(1:(ten_chong(i,1)-1),:);
            traing_example2=dimenReduct_samples(ten_chong(i+1,1):end,:);
            traing_example=[traing_example1;traing_example2];%第1—9次时的测试样本
        end
     %计算所有每类的序列号,并存入indexi中
    index1=find(traing_example(:,1)==1);
    index2=find(traing_example(:,1)==2);

    %计算每类的均值,并存入meani中
    mean1=mean(traing_example(index1,2:end))';
    mean2=mean(traing_example(index2,2:end))';

    %计算每类的协方差,并存入covi中
    cov1=cov(traing_example(index1,2:end));
    cov2=cov(traing_example(index2,2:end));

    %每类先验概率
    p_1=0.482;%统计得到
    p_2=0.518;%统计得到

    p=[p_1,p_2];
    %计算分类函数的系数
    W_1=-1/2*inv(cov1);
    w_1=inv(cov1)*mean1;
    w_10=-1/2*mean1'*inv(cov1)*mean1-1/2*log(det(cov1))+log(p(1,1));
    W_2=-1/2*inv(cov2);
    w_2=inv(cov2)*mean2;
    w_20=-1/2*mean2'*inv(cov2)*mean2-1/2*log(det(cov2))+log(p(1,2));

    %根据分类的目标函数计算测试样本的分类情况,将测试样本的分类结果存于result_class中
    test_example=test_example';
    for j=1:ms/k
        G_1=test_example(2:end,j)'*W_1*test_example(2:end,j)+w_1'*test_example(2:end,j)+w_10;
        G_2=test_example(2:end,j)'*W_2*test_example(2:end,j)+w_2'*test_example(2:end,j)+w_20;
        G=[G_1,G_2]';
        g=max(G);
        index=find(G(:,:)==g);
        result_class(j,1)=index;
    end
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %分析结果
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    test_example=test_example';
    [correct(i,1),error(i,1),ROC(i,1:4)] = analyse_result(test_example,result_class);
end

%将结果放到一个数组中,便于观察数据
correct = correct';
error = error';
ROC = ROC';
result = [correct;error;ROC];
result = result';