-
Notifications
You must be signed in to change notification settings - Fork 4
/
clustering_error.m
100 lines (77 loc) · 2.28 KB
/
clustering_error.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
% This function computes the clustering error, defined as:
%
% err(map) = 1 - sum_i delta(origlabel(i), map(label(i)))/N
%
% here map() is a permutation map for the cluster indices. The clustering
% error is defined as the minimum of err(map) over all possible permutation maps.
% The optimal matching is found via the Hungarian algorithm.
%
% the entries in origlabel and label must be in 1:L
%
% Reinhard Heckel, 2013
function err = clustering_error(label, origlabel)
if size(origlabel,2) == 1
origlabel = origlabel';
end
if size(label,2) == 1
label = label';
end
a = unique(origlabel);
b = unique(label);
% if length(unique(label))~=length(unique(origlabel))
% err = 1;
% return
% end
% construct the contingency table between the two labels, which contains
% the number of cluster label co-occurences
N = length(label);
[labelssorted, ind] = sort(origlabel);
origlabel = origlabel(ind); % now the original labels are sorted
label = label(ind);
%% beg, endd will contain the indices of the begin (end) indices of the respective cluster
beg(1) = 1;
k = 1;
beg(k) = 1;
for i =1:N % for each point
if origlabel(i) == k
else
endd(k) = i-1;
k = k+1;
beg(k) = i;
end
end
endd(k) = N;
%%
L = length(beg);
Q = length(unique(label));
A = zeros(max(L,Q),max(L,Q));
unique(origlabel);
unique(label);
for l = 1:L
T = beg(l):endd(l); % the indices of coeff within the same cluster
% construct a row of K,
S = label(T);
unv = unique(S);
his = histc(S,unv);
% labelssorted(T(1)) is the current row
A(labelssorted(T(1)), unv) = his;
end
A = -A;
[Matching,Cost] = Hungarian(A); % find the maximal weighted matching of the bipartite Graph defined by A;
% the rows correspond to the nodes on one
% side, the columns to the nodes on the
% other side
% ``Matching'' contains the optimal maping
% construct the vector containing the matching from ``Matching''
for l=1:max(L,Q)
if(find(Matching(:,l) == 1))
matchv(l) = find(Matching(:,l)==1);
else
matchv(l) = l;
end
end
for i = 1:N
labelreordered(i) = matchv(label(i));
end
err = sum(origlabel ~= labelreordered)/N;
end