Mercurial > repos > sagun98 > micropita_v2
comparison src/breadcrumbs/src/KMedoids.py @ 0:0de566f21448 draft default tip
v2
author | sagun98 |
---|---|
date | Thu, 03 Jun 2021 18:13:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0de566f21448 |
---|---|
1 ## Included from MLPY build 2.2.0 | |
2 ## Attempts were made to contact Davide Albanese on 08-10-2012 and 09-19-2012 at albanese@fbk.eu | |
3 | |
4 ## This code is written by Davide Albanese, <albanese@fbk.eu> | |
5 ## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY. | |
6 | |
7 ## This program is free software: you can redistribute it and/or modify | |
8 ## it under the terms of the GNU General Public License as published by | |
9 ## the Free Software Foundation, either version 3 of the License, or | |
10 ## (at your option) any later version. | |
11 | |
12 ## This program is distributed in the hope that it will be useful, | |
13 ## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 ## GNU General Public License for more details. | |
16 | |
17 ## You should have received a copy of the GNU General Public License | |
18 ## along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 | |
21 __all__= ['Kmedoids', 'Minkowski'] | |
22 | |
23 | |
24 import numpy as np | |
25 import matplotlib | |
26 matplotlib.use( "Agg" ) | |
27 import mlpy | |
28 | |
29 | |
30 def kmedoids_core(x, med, oth, clust, cost, dist): | |
31 """ | |
32 * for each mediod m | |
33 * for each non-mediod data point n | |
34 Swap m and n and compute the total cost of the configuration | |
35 Select the configuration with the lowest cost | |
36 """ | |
37 | |
38 d = np.empty((oth.shape[0], med.shape[0]), dtype=float) | |
39 med_n = np.empty_like(med) | |
40 oth_n = np.empty_like(oth) | |
41 idx = np.arange(oth.shape[0]) | |
42 | |
43 med_cur = med.copy() | |
44 oth_cur = oth.copy() | |
45 clust_cur = clust.copy() | |
46 cost_cur = cost | |
47 | |
48 for i, m in enumerate(med): | |
49 for j, n in enumerate(oth[clust == i]): | |
50 | |
51 med_n, oth_n = med.copy(), oth.copy() | |
52 | |
53 med_n[i] = n | |
54 tmp = oth_n[clust == i] | |
55 tmp[j] = m | |
56 oth_n[clust == i] = tmp | |
57 | |
58 for ii, nn in enumerate(oth_n): | |
59 for jj, mm in enumerate(med_n): | |
60 d[ii, jj] = dist.compute(x[mm], x[nn]) | |
61 | |
62 clust_n = np.argmin(d, axis=1) # clusters | |
63 cost_n = np.sum(d[idx, clust_n]) # total cost of configuration | |
64 | |
65 if cost_n <= cost_cur: | |
66 med_cur = med_n.copy() | |
67 oth_cur = oth_n.copy() | |
68 clust_cur = clust_n.copy() | |
69 cost_cur = cost_n | |
70 | |
71 return med_cur, oth_cur, clust_cur, cost_cur | |
72 | |
73 | |
74 class Kmedoids: | |
75 """k-medoids algorithm. | |
76 """ | |
77 | |
78 def __init__(self, k, dist, maxloops=100, rs=0): | |
79 """Initialize Kmedoids. | |
80 | |
81 :Parameters: | |
82 | |
83 k : int | |
84 Number of clusters/medoids | |
85 dist : class | |
86 class with a .compute(x, y) method which | |
87 returns a distance | |
88 maxloops : int | |
89 maximum number of loops | |
90 rs : int | |
91 random seed | |
92 | |
93 Example: | |
94 | |
95 >>> import numpy as np | |
96 >>> import mlpy | |
97 >>> x = np.array([[ 1. , 1.5], | |
98 ... [ 1.1, 1.8], | |
99 ... [ 2. , 2.8], | |
100 ... [ 3.2, 3.1], | |
101 ... [ 3.4, 3.2]]) | |
102 >>> dtw = mlpy.Dtw(onlydist=True) | |
103 >>> km = mlpy.Kmedoids(k=3, dist=dtw) | |
104 >>> km.compute(x) | |
105 (array([4, 0, 2]), array([3, 1]), array([0, 1]), 0.072499999999999981) | |
106 | |
107 Samples 4, 0, 2 are medoids and represent cluster 0, 1, 2 respectively. | |
108 | |
109 * cluster 0: samples 4 (medoid) and 3 | |
110 * cluster 1: samples 0 (medoid) and 1 | |
111 * cluster 2: sample 2 (medoid) | |
112 """ | |
113 | |
114 self.__k = k | |
115 self.__maxloops = maxloops | |
116 self.__rs = rs | |
117 self.__dist = dist | |
118 | |
119 np.random.seed(self.__rs) | |
120 | |
121 | |
122 def compute(self, x): | |
123 """Compute Kmedoids. | |
124 | |
125 :Parameters: | |
126 x : ndarray | |
127 An 2-dimensional vector (sample x features). | |
128 | |
129 :Returns: | |
130 m : ndarray (1-dimensional vector) | |
131 medoids indexes | |
132 n : ndarray (1-dimensional vector) | |
133 non-medoids indexes | |
134 cl : ndarray 1-dimensional vector) | |
135 cluster membership for non-medoids. | |
136 Groups are in 0, ..., k-1 | |
137 co : double | |
138 total cost of configuration | |
139 """ | |
140 | |
141 # randomly select k of the n data points as the mediods | |
142 idx = np.arange(x.shape[0]) | |
143 np.random.shuffle(idx) | |
144 med = idx[0:self.__k] | |
145 oth = idx[self.__k::] | |
146 | |
147 # compute distances | |
148 d = np.empty((oth.shape[0], med.shape[0]), dtype=float) | |
149 for i, n in enumerate(oth): | |
150 for j, m in enumerate(med): | |
151 d[i, j] = self.__dist.compute(x[m], x[n]) | |
152 | |
153 # associate each data point to the closest medoid | |
154 clust = np.argmin(d, axis=1) | |
155 | |
156 # total cost of configuration | |
157 cost = np.sum(d[np.arange(d.shape[0]), clust]) | |
158 | |
159 # repeat kmedoids_core until there is no change in the medoid | |
160 for l in range(self.__maxloops): | |
161 | |
162 med_n, oth_n, clust_n, cost_n = kmedoids_core(x, med, oth, clust, cost, self.__dist) | |
163 | |
164 if (cost_n < cost): | |
165 med, oth, clust, cost = med_n, oth_n, clust_n, cost_n | |
166 else: | |
167 break | |
168 | |
169 return med, oth, clust, cost | |
170 | |
171 | |
172 class Minkowski: | |
173 """ | |
174 Computes the Minkowski distance between two vectors ``x`` and ``y``. | |
175 | |
176 .. math:: | |
177 | |
178 {||x-y||}_p = (\sum{|x_i - y_i|^p})^{1/p}. | |
179 """ | |
180 | |
181 def __init__(self, p): | |
182 """ | |
183 Initialize Minkowski class. | |
184 | |
185 :Parameters: | |
186 p : float | |
187 The norm of the difference :math:`{||x-y||}_p` | |
188 """ | |
189 | |
190 self.__p = p | |
191 | |
192 | |
193 def compute(self, x, y): | |
194 """ | |
195 Compute Minkowski distance | |
196 | |
197 :Parameters: | |
198 x : ndarray | |
199 An 1-dimensional vector. | |
200 y : ndarray | |
201 An 1-dimensional vector. | |
202 | |
203 :Returns: | |
204 d : float | |
205 The Minkowski distance between vectors ``x`` and ``y`` | |
206 """ | |
207 | |
208 return (abs(x - y)**self.__p).sum() ** (1.0 / self.__p) |