0
|
1 ## Included from MLPY build 2.2.0
|
|
2 ## Attempts were made to contact Davide Albanese on 08-10-2012 and 09-19-2012 at albanese@fbk.eu
|
|
3
|
|
4 ## This code is written by Davide Albanese, <albanese@fbk.eu>
|
|
5 ## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.
|
|
6
|
|
7 ## This program is free software: you can redistribute it and/or modify
|
|
8 ## it under the terms of the GNU General Public License as published by
|
|
9 ## the Free Software Foundation, either version 3 of the License, or
|
|
10 ## (at your option) any later version.
|
|
11
|
|
12 ## This program is distributed in the hope that it will be useful,
|
|
13 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 ## GNU General Public License for more details.
|
|
16
|
|
17 ## You should have received a copy of the GNU General Public License
|
|
18 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
|
20
|
|
21 __all__= ['Kmedoids', 'Minkowski']
|
|
22
|
|
23
|
|
24 import numpy as np
|
|
25 import matplotlib
|
|
26 matplotlib.use( "Agg" )
|
|
27 import mlpy
|
|
28
|
|
29
|
|
30 def kmedoids_core(x, med, oth, clust, cost, dist):
|
|
31 """
|
|
32 * for each mediod m
|
|
33 * for each non-mediod data point n
|
|
34 Swap m and n and compute the total cost of the configuration
|
|
35 Select the configuration with the lowest cost
|
|
36 """
|
|
37
|
|
38 d = np.empty((oth.shape[0], med.shape[0]), dtype=float)
|
|
39 med_n = np.empty_like(med)
|
|
40 oth_n = np.empty_like(oth)
|
|
41 idx = np.arange(oth.shape[0])
|
|
42
|
|
43 med_cur = med.copy()
|
|
44 oth_cur = oth.copy()
|
|
45 clust_cur = clust.copy()
|
|
46 cost_cur = cost
|
|
47
|
|
48 for i, m in enumerate(med):
|
|
49 for j, n in enumerate(oth[clust == i]):
|
|
50
|
|
51 med_n, oth_n = med.copy(), oth.copy()
|
|
52
|
|
53 med_n[i] = n
|
|
54 tmp = oth_n[clust == i]
|
|
55 tmp[j] = m
|
|
56 oth_n[clust == i] = tmp
|
|
57
|
|
58 for ii, nn in enumerate(oth_n):
|
|
59 for jj, mm in enumerate(med_n):
|
|
60 d[ii, jj] = dist.compute(x[mm], x[nn])
|
|
61
|
|
62 clust_n = np.argmin(d, axis=1) # clusters
|
|
63 cost_n = np.sum(d[idx, clust_n]) # total cost of configuration
|
|
64
|
|
65 if cost_n <= cost_cur:
|
|
66 med_cur = med_n.copy()
|
|
67 oth_cur = oth_n.copy()
|
|
68 clust_cur = clust_n.copy()
|
|
69 cost_cur = cost_n
|
|
70
|
|
71 return med_cur, oth_cur, clust_cur, cost_cur
|
|
72
|
|
73
|
|
74 class Kmedoids:
|
|
75 """k-medoids algorithm.
|
|
76 """
|
|
77
|
|
78 def __init__(self, k, dist, maxloops=100, rs=0):
|
|
79 """Initialize Kmedoids.
|
|
80
|
|
81 :Parameters:
|
|
82
|
|
83 k : int
|
|
84 Number of clusters/medoids
|
|
85 dist : class
|
|
86 class with a .compute(x, y) method which
|
|
87 returns a distance
|
|
88 maxloops : int
|
|
89 maximum number of loops
|
|
90 rs : int
|
|
91 random seed
|
|
92
|
|
93 Example:
|
|
94
|
|
95 >>> import numpy as np
|
|
96 >>> import mlpy
|
|
97 >>> x = np.array([[ 1. , 1.5],
|
|
98 ... [ 1.1, 1.8],
|
|
99 ... [ 2. , 2.8],
|
|
100 ... [ 3.2, 3.1],
|
|
101 ... [ 3.4, 3.2]])
|
|
102 >>> dtw = mlpy.Dtw(onlydist=True)
|
|
103 >>> km = mlpy.Kmedoids(k=3, dist=dtw)
|
|
104 >>> km.compute(x)
|
|
105 (array([4, 0, 2]), array([3, 1]), array([0, 1]), 0.072499999999999981)
|
|
106
|
|
107 Samples 4, 0, 2 are medoids and represent cluster 0, 1, 2 respectively.
|
|
108
|
|
109 * cluster 0: samples 4 (medoid) and 3
|
|
110 * cluster 1: samples 0 (medoid) and 1
|
|
111 * cluster 2: sample 2 (medoid)
|
|
112 """
|
|
113
|
|
114 self.__k = k
|
|
115 self.__maxloops = maxloops
|
|
116 self.__rs = rs
|
|
117 self.__dist = dist
|
|
118
|
|
119 np.random.seed(self.__rs)
|
|
120
|
|
121
|
|
122 def compute(self, x):
|
|
123 """Compute Kmedoids.
|
|
124
|
|
125 :Parameters:
|
|
126 x : ndarray
|
|
127 An 2-dimensional vector (sample x features).
|
|
128
|
|
129 :Returns:
|
|
130 m : ndarray (1-dimensional vector)
|
|
131 medoids indexes
|
|
132 n : ndarray (1-dimensional vector)
|
|
133 non-medoids indexes
|
|
134 cl : ndarray 1-dimensional vector)
|
|
135 cluster membership for non-medoids.
|
|
136 Groups are in 0, ..., k-1
|
|
137 co : double
|
|
138 total cost of configuration
|
|
139 """
|
|
140
|
|
141 # randomly select k of the n data points as the mediods
|
|
142 idx = np.arange(x.shape[0])
|
|
143 np.random.shuffle(idx)
|
|
144 med = idx[0:self.__k]
|
|
145 oth = idx[self.__k::]
|
|
146
|
|
147 # compute distances
|
|
148 d = np.empty((oth.shape[0], med.shape[0]), dtype=float)
|
|
149 for i, n in enumerate(oth):
|
|
150 for j, m in enumerate(med):
|
|
151 d[i, j] = self.__dist.compute(x[m], x[n])
|
|
152
|
|
153 # associate each data point to the closest medoid
|
|
154 clust = np.argmin(d, axis=1)
|
|
155
|
|
156 # total cost of configuration
|
|
157 cost = np.sum(d[np.arange(d.shape[0]), clust])
|
|
158
|
|
159 # repeat kmedoids_core until there is no change in the medoid
|
|
160 for l in range(self.__maxloops):
|
|
161
|
|
162 med_n, oth_n, clust_n, cost_n = kmedoids_core(x, med, oth, clust, cost, self.__dist)
|
|
163
|
|
164 if (cost_n < cost):
|
|
165 med, oth, clust, cost = med_n, oth_n, clust_n, cost_n
|
|
166 else:
|
|
167 break
|
|
168
|
|
169 return med, oth, clust, cost
|
|
170
|
|
171
|
|
172 class Minkowski:
|
|
173 """
|
|
174 Computes the Minkowski distance between two vectors ``x`` and ``y``.
|
|
175
|
|
176 .. math::
|
|
177
|
|
178 {||x-y||}_p = (\sum{|x_i - y_i|^p})^{1/p}.
|
|
179 """
|
|
180
|
|
181 def __init__(self, p):
|
|
182 """
|
|
183 Initialize Minkowski class.
|
|
184
|
|
185 :Parameters:
|
|
186 p : float
|
|
187 The norm of the difference :math:`{||x-y||}_p`
|
|
188 """
|
|
189
|
|
190 self.__p = p
|
|
191
|
|
192
|
|
193 def compute(self, x, y):
|
|
194 """
|
|
195 Compute Minkowski distance
|
|
196
|
|
197 :Parameters:
|
|
198 x : ndarray
|
|
199 An 1-dimensional vector.
|
|
200 y : ndarray
|
|
201 An 1-dimensional vector.
|
|
202
|
|
203 :Returns:
|
|
204 d : float
|
|
205 The Minkowski distance between vectors ``x`` and ``y``
|
|
206 """
|
|
207
|
|
208 return (abs(x - y)**self.__p).sum() ** (1.0 / self.__p)
|