comparison src/breadcrumbs/src/KMedoids.py @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0de566f21448
1 ## Included from MLPY build 2.2.0
2 ## Attempts were made to contact Davide Albanese on 08-10-2012 and 09-19-2012 at albanese@fbk.eu
3
4 ## This code is written by Davide Albanese, <albanese@fbk.eu>
5 ## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.
6
7 ## This program is free software: you can redistribute it and/or modify
8 ## it under the terms of the GNU General Public License as published by
9 ## the Free Software Foundation, either version 3 of the License, or
10 ## (at your option) any later version.
11
12 ## This program is distributed in the hope that it will be useful,
13 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ## GNU General Public License for more details.
16
17 ## You should have received a copy of the GNU General Public License
18 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20
21 __all__= ['Kmedoids', 'Minkowski']
22
23
24 import numpy as np
25 import matplotlib
26 matplotlib.use( "Agg" )
27 import mlpy
28
29
30 def kmedoids_core(x, med, oth, clust, cost, dist):
31 """
32 * for each mediod m
33 * for each non-mediod data point n
34 Swap m and n and compute the total cost of the configuration
35 Select the configuration with the lowest cost
36 """
37
38 d = np.empty((oth.shape[0], med.shape[0]), dtype=float)
39 med_n = np.empty_like(med)
40 oth_n = np.empty_like(oth)
41 idx = np.arange(oth.shape[0])
42
43 med_cur = med.copy()
44 oth_cur = oth.copy()
45 clust_cur = clust.copy()
46 cost_cur = cost
47
48 for i, m in enumerate(med):
49 for j, n in enumerate(oth[clust == i]):
50
51 med_n, oth_n = med.copy(), oth.copy()
52
53 med_n[i] = n
54 tmp = oth_n[clust == i]
55 tmp[j] = m
56 oth_n[clust == i] = tmp
57
58 for ii, nn in enumerate(oth_n):
59 for jj, mm in enumerate(med_n):
60 d[ii, jj] = dist.compute(x[mm], x[nn])
61
62 clust_n = np.argmin(d, axis=1) # clusters
63 cost_n = np.sum(d[idx, clust_n]) # total cost of configuration
64
65 if cost_n <= cost_cur:
66 med_cur = med_n.copy()
67 oth_cur = oth_n.copy()
68 clust_cur = clust_n.copy()
69 cost_cur = cost_n
70
71 return med_cur, oth_cur, clust_cur, cost_cur
72
73
74 class Kmedoids:
75 """k-medoids algorithm.
76 """
77
78 def __init__(self, k, dist, maxloops=100, rs=0):
79 """Initialize Kmedoids.
80
81 :Parameters:
82
83 k : int
84 Number of clusters/medoids
85 dist : class
86 class with a .compute(x, y) method which
87 returns a distance
88 maxloops : int
89 maximum number of loops
90 rs : int
91 random seed
92
93 Example:
94
95 >>> import numpy as np
96 >>> import mlpy
97 >>> x = np.array([[ 1. , 1.5],
98 ... [ 1.1, 1.8],
99 ... [ 2. , 2.8],
100 ... [ 3.2, 3.1],
101 ... [ 3.4, 3.2]])
102 >>> dtw = mlpy.Dtw(onlydist=True)
103 >>> km = mlpy.Kmedoids(k=3, dist=dtw)
104 >>> km.compute(x)
105 (array([4, 0, 2]), array([3, 1]), array([0, 1]), 0.072499999999999981)
106
107 Samples 4, 0, 2 are medoids and represent cluster 0, 1, 2 respectively.
108
109 * cluster 0: samples 4 (medoid) and 3
110 * cluster 1: samples 0 (medoid) and 1
111 * cluster 2: sample 2 (medoid)
112 """
113
114 self.__k = k
115 self.__maxloops = maxloops
116 self.__rs = rs
117 self.__dist = dist
118
119 np.random.seed(self.__rs)
120
121
122 def compute(self, x):
123 """Compute Kmedoids.
124
125 :Parameters:
126 x : ndarray
127 An 2-dimensional vector (sample x features).
128
129 :Returns:
130 m : ndarray (1-dimensional vector)
131 medoids indexes
132 n : ndarray (1-dimensional vector)
133 non-medoids indexes
134 cl : ndarray 1-dimensional vector)
135 cluster membership for non-medoids.
136 Groups are in 0, ..., k-1
137 co : double
138 total cost of configuration
139 """
140
141 # randomly select k of the n data points as the mediods
142 idx = np.arange(x.shape[0])
143 np.random.shuffle(idx)
144 med = idx[0:self.__k]
145 oth = idx[self.__k::]
146
147 # compute distances
148 d = np.empty((oth.shape[0], med.shape[0]), dtype=float)
149 for i, n in enumerate(oth):
150 for j, m in enumerate(med):
151 d[i, j] = self.__dist.compute(x[m], x[n])
152
153 # associate each data point to the closest medoid
154 clust = np.argmin(d, axis=1)
155
156 # total cost of configuration
157 cost = np.sum(d[np.arange(d.shape[0]), clust])
158
159 # repeat kmedoids_core until there is no change in the medoid
160 for l in range(self.__maxloops):
161
162 med_n, oth_n, clust_n, cost_n = kmedoids_core(x, med, oth, clust, cost, self.__dist)
163
164 if (cost_n < cost):
165 med, oth, clust, cost = med_n, oth_n, clust_n, cost_n
166 else:
167 break
168
169 return med, oth, clust, cost
170
171
172 class Minkowski:
173 """
174 Computes the Minkowski distance between two vectors ``x`` and ``y``.
175
176 .. math::
177
178 {||x-y||}_p = (\sum{|x_i - y_i|^p})^{1/p}.
179 """
180
181 def __init__(self, p):
182 """
183 Initialize Minkowski class.
184
185 :Parameters:
186 p : float
187 The norm of the difference :math:`{||x-y||}_p`
188 """
189
190 self.__p = p
191
192
193 def compute(self, x, y):
194 """
195 Compute Minkowski distance
196
197 :Parameters:
198 x : ndarray
199 An 1-dimensional vector.
200 y : ndarray
201 An 1-dimensional vector.
202
203 :Returns:
204 d : float
205 The Minkowski distance between vectors ``x`` and ``y``
206 """
207
208 return (abs(x - y)**self.__p).sum() ** (1.0 / self.__p)