测试代码文件

test.m

 

function test

 

file_name = 'discretization_data.xls';

 

[data, ~] = xlsread(file_name);

 

k = 4;

rows = size(data, 1);

 

% 调用自己实现的kmeans并绘制图像

idx = test_kmeans(data, k);

result_data = zeros(rows, 2);

result_data(:, 1) = data;

result_data(:, 2) = idx;

plot(result_data(:, 1), result_data(:, 2), 'o')

 

% 调用matlab自带kmeans并绘图进行对比

figure

idx = kmeans(data, k);

result_data = zeros(rows, 2);

result_data(:, 1) = data;

result_data(:, 2) = idx;

plot(result_data(:, 1), result_data(:, 2), 'o')

 

 

k-means实现

test_kmeans.m

 

function idx = test_kmeans(data, k)

 

    %初始化种子点,计算数据长度, 维度

    [dots, len, recInd] = initDots(data, k)

   

    %循环迭代

    flag = 1;

    while flag

        %调整点群所属

       idx = adjustIdx(data, dots, len, recInd, k);

       % 检查是否有种子点落空(即没有划分到点群里),如果落空则重新初始化种子点

       dots = checkDots(dots, idx, k, len);

       % 调整种子点到所属点群的中心点

       [dots, flag] = adjustDots(data, dots, len, k, recInd, idx);

    end

 

%初始化种子点,计算数据长度, 维度

function [dots, len, recInd] = initDots(data, k)

 

    recInd = 1;

    data_size = size(data);

    recInd = data_size(2);

    varMax = [];

    dots = zeros(k, recInd);

    for i = 1 : recInd

        varMax(i) = max(data(:, i));

    end

 

    for i = 1 : k

        for j = 1 : recInd

           dots(i, j) = rand * varMax(j);

        end

    end

    len = data_size(1);

   

 %调整点群所属

function idx = adjustIdx(data, dots, len, recInd, k)

 

    for i = 1 : len

        dis = 0;

        for j = 1 : recInd

            dis = power(data(i, j) - dots(1, j), 2) + dis;

        end

        idx(i) = 1;

       

        for e = 2 : k

            tmp_dis = 0;

            for j = 1 : recInd

                tmp_dis = power(data(i, j) - dots(e, j), 2) + tmp_dis;

            end

           

            if dis > tmp_dis

                idx(i) = e;

                dis = tmp_dis;

 

            end

        end

    end

 

 % 检查是否有种子点落空(即没有划分到点群里),如果落空则重新初始化种子点

function dots = checkDots(dots, idx, k, len)

 

    for i = 1 : k

       rand_bug = 1;

       for j = 1 : len

           if idx(j) == i

               rand_bug = 0;

               break;

           end

       end

      

       if rand_bug

           for j = 1 : recInd

            dots(i, j) = rand * varMax(j);

           end

           dots(i, :)

       end

    end

 

 % 调整种子点到所属点群的中心点

function [dots, flag] = adjustDots(data, dots, len, k, recInd, idx)

 

    flag = 1;

    tmp_dot = zeros(k, recInd);

    cal_flag = 0;

 

   for i = 1 : k

       cal = 0;

       rand_bug = 1;

       for j = 1 : len

           if idx(j) == i

               cal = cal + 1;

               rand_bug = 0;

               tmp_dot(i) = tmp_dot(i) + data(j, :);

           end

       end

      

       if rand_bug

           continue;

       end

      

       tmp_dot(i) = tmp_dot(i) / cal;

       if sum(abs(dots(i) - tmp_dot(i))) < 0.001

           cal_flag = cal_flag + 1;

       else

           dots(i) = tmp_dot(i);

       end

   end

  

   if cal_flag == k

       flag = 0;

   end

 

 

三次图像对比

 

通过三次测试, 可以看出k-means实现算法基本与matlab 自带的 kmeans 函数算法相同。而且误差非常小,几乎可以忽略不计。