测试代码文件
test.m
function test
file_name = 'discretization_data.xls';
[data, ~] = xlsread(file_name);
k = 4;
rows = size(data, 1);
% 调用自己实现的kmeans并绘制图像
idx = test_kmeans(data, k);
result_data = zeros(rows, 2);
result_data(:, 1) = data;
result_data(:, 2) = idx;
plot(result_data(:, 1), result_data(:, 2), 'o')
% 调用matlab自带kmeans并绘图进行对比
figure
idx = kmeans(data, k);
result_data = zeros(rows, 2);
result_data(:, 1) = data;
result_data(:, 2) = idx;
plot(result_data(:, 1), result_data(:, 2), 'o')
k-means实现
test_kmeans.m
function idx = test_kmeans(data, k)
%初始化种子点,计算数据长度, 维度
[dots, len, recInd] = initDots(data, k)
%循环迭代
flag = 1;
while flag
%调整点群所属
idx = adjustIdx(data, dots, len, recInd, k);
% 检查是否有种子点落空(即没有划分到点群里),如果落空则重新初始化种子点
dots = checkDots(dots, idx, k, len);
% 调整种子点到所属点群的中心点
[dots, flag] = adjustDots(data, dots, len, k, recInd, idx);
end
%初始化种子点,计算数据长度, 维度
function [dots, len, recInd] = initDots(data, k)
recInd = 1;
data_size = size(data);
recInd = data_size(2);
varMax = [];
dots = zeros(k, recInd);
for i = 1 : recInd
varMax(i) = max(data(:, i));
end
for i = 1 : k
for j = 1 : recInd
dots(i, j) = rand * varMax(j);
end
end
len = data_size(1);
%调整点群所属
function idx = adjustIdx(data, dots, len, recInd, k)
for i = 1 : len
dis = 0;
for j = 1 : recInd
dis = power(data(i, j) - dots(1, j), 2) + dis;
end
idx(i) = 1;
for e = 2 : k
tmp_dis = 0;
for j = 1 : recInd
tmp_dis = power(data(i, j) - dots(e, j), 2) + tmp_dis;
end
if dis > tmp_dis
idx(i) = e;
dis = tmp_dis;
end
end
end
% 检查是否有种子点落空(即没有划分到点群里),如果落空则重新初始化种子点
function dots = checkDots(dots, idx, k, len)
for i = 1 : k
rand_bug = 1;
for j = 1 : len
if idx(j) == i
rand_bug = 0;
break;
end
end
if rand_bug
for j = 1 : recInd
dots(i, j) = rand * varMax(j);
end
dots(i, :)
end
end
% 调整种子点到所属点群的中心点
function [dots, flag] = adjustDots(data, dots, len, k, recInd, idx)
flag = 1;
tmp_dot = zeros(k, recInd);
cal_flag = 0;
for i = 1 : k
cal = 0;
rand_bug = 1;
for j = 1 : len
if idx(j) == i
cal = cal + 1;
rand_bug = 0;
tmp_dot(i) = tmp_dot(i) + data(j, :);
end
end
if rand_bug
continue;
end
tmp_dot(i) = tmp_dot(i) / cal;
if sum(abs(dots(i) - tmp_dot(i))) < 0.001
cal_flag = cal_flag + 1;
else
dots(i) = tmp_dot(i);
end
end
if cal_flag == k
flag = 0;
end
三次图像对比
通过三次测试, 可以看出k-means实现算法基本与matlab 自带的 kmeans 函数算法相同。而且误差非常小,几乎可以忽略不计。
Comments