Mercurial > repos > immport-devteam > cross_sample
comparison src/cent_adjust.c @ 1:7eab80f86779 draft
add FLOCK
author | immport-devteam |
---|---|
date | Mon, 27 Feb 2017 13:26:09 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:8d951baf795f | 1:7eab80f86779 |
---|---|
1 ///////////////////////////////////////////////////////// | |
2 // Cent_adjust version number and modification history | |
3 // ImmPort BISC project | |
4 // Author: Yu "Max" Qian | |
5 // v1.01: Oct 16, 2009 | |
6 // Line 899 of the main function: | |
7 // Changed kmean_term=1 to kmean_term=2 | |
8 ////////////////////////////////////////////////////////// | |
9 | |
10 | |
11 #include <time.h> | |
12 #include <stdio.h> | |
13 #include <stdlib.h> | |
14 #include <math.h> | |
15 #include <string.h> | |
16 | |
17 #define DEBUG 0 | |
18 #define LINE_LEN 1024 | |
19 #define FILE_NAME_LEN 128 | |
20 #define PARA_NAME_LEN 64 | |
21 #define MAX_VALUE 1000000000 | |
22 #define CUBE 0 | |
23 | |
24 | |
25 void getctrfileinfo(FILE *f_src_ctr, long *num_clust) | |
26 { | |
27 int ch='\n'; | |
28 int prev='\n'; | |
29 long num_rows=0; | |
30 | |
31 while ((ch = fgetc(f_src_ctr))!= EOF ) | |
32 { | |
33 if (ch == '\n') | |
34 { | |
35 ++num_rows; | |
36 } | |
37 prev = ch; | |
38 } | |
39 if (prev!='\n') | |
40 ++num_rows; | |
41 | |
42 *num_clust=num_rows; | |
43 //printf("center file has %ld rows\n", *num_clust); | |
44 } | |
45 | |
46 /************************************* Read basic info of the source file **************************************/ | |
47 void getfileinfo(FILE *f_src, long *file_Len, long *num_dm, char *name_string, int *time_ID) | |
48 { | |
49 char src[LINE_LEN]; | |
50 char current_name[64]; | |
51 char prv; | |
52 | |
53 long num_rows=0; | |
54 long num_columns=0; | |
55 int ch='\n'; | |
56 int prev='\n'; | |
57 long time_pos=0; | |
58 long i=0; | |
59 long j=0; | |
60 | |
61 | |
62 | |
63 src[0]='\0'; | |
64 fgets(src, LINE_LEN, f_src); | |
65 | |
66 name_string[0]='\0'; | |
67 current_name[0]='\0'; | |
68 prv='\n'; | |
69 | |
70 while ((src[i]==' ') || (src[i]=='\t')) //skip space and tab characters | |
71 i++; | |
72 | |
73 while ((src[i]!='\r') && (src[i]!='\n')) //repeat until the end of the line | |
74 { | |
75 current_name[j]=src[i]; | |
76 | |
77 if ((src[i]=='\t') && (prv!='\t')) //a complete word | |
78 { | |
79 current_name[j]='\0'; | |
80 | |
81 /* | |
82 * Commented out John Campbell, June 10 2010 | |
83 * We no longer want to automatically remove Time column. | |
84 * This column should have been removed by column selection | |
85 if (0!=strcmp(current_name,"Time")) | |
86 { | |
87 num_columns++; //num_columns does not inlcude the column of Time | |
88 time_pos++; | |
89 strcat(name_string,current_name); | |
90 strcat(name_string,"\t"); | |
91 } | |
92 else | |
93 { | |
94 *time_ID=time_pos; | |
95 } | |
96 */ | |
97 | |
98 num_columns++; | |
99 strcat(name_string,current_name); | |
100 strcat(name_string,"\t"); | |
101 | |
102 current_name[0]='\0'; | |
103 j=0; | |
104 } | |
105 | |
106 if ((src[i]=='\t') && (prv=='\t')) //a duplicate tab or space | |
107 { | |
108 current_name[0]='\0'; | |
109 j=0; | |
110 } | |
111 | |
112 if (src[i]!='\t') | |
113 j++; | |
114 | |
115 | |
116 prv=src[i]; | |
117 i++; | |
118 } | |
119 | |
120 if (prv!='\t') //the last one hasn't been retrieved | |
121 { | |
122 current_name[j]='\0'; | |
123 /* | |
124 * Commented out John Campbell, June 10 2010 | |
125 * We no longer want to automatically remove Time column. | |
126 * This column should have been removed by column selection | |
127 if (0!=strcmp(current_name,"Time")) | |
128 { | |
129 num_columns++; | |
130 strcat(name_string,current_name); | |
131 time_pos++; | |
132 } | |
133 else | |
134 { | |
135 *time_ID=time_pos; | |
136 } | |
137 */ | |
138 | |
139 num_columns++; | |
140 strcat(name_string,current_name); | |
141 } | |
142 | |
143 if (DEBUG==1) | |
144 { | |
145 printf("time_ID is %d\n",*time_ID); | |
146 printf("name_string is %s\n",name_string); | |
147 } | |
148 | |
149 // # of rows | |
150 | |
151 while ((ch = fgetc(f_src))!= EOF ) | |
152 { | |
153 if (ch == '\n') | |
154 { | |
155 ++num_rows; | |
156 } | |
157 prev = ch; | |
158 } | |
159 if (prev!='\n') | |
160 ++num_rows; | |
161 | |
162 *file_Len=num_rows; | |
163 *num_dm=num_columns; | |
164 | |
165 //printf("original file size is %ld; number of dimensions is %ld\n", *file_Len, *num_dm); | |
166 } | |
167 | |
168 | |
169 | |
170 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
171 /************************************* Read the source file into uncomp_data **************************************/ | |
172 void readsource(FILE *f_src, long file_Len, long num_dm, double **uncomp_data, int time_ID) | |
173 { | |
174 long time_pass=0; //to mark whether the time_ID has been passed | |
175 long index=0; | |
176 | |
177 long i=0; | |
178 long j=0; | |
179 long t=0; | |
180 | |
181 char src[LINE_LEN]; | |
182 char xc[LINE_LEN/10]; | |
183 | |
184 src[0]='\0'; | |
185 fgets(src,LINE_LEN, f_src); //skip the first line about parameter names | |
186 | |
187 while (!feof(f_src) && (index<file_Len)) //index = 0, 1, ..., file_Len-1 | |
188 { | |
189 src[0]='\0'; | |
190 fgets(src,LINE_LEN,f_src); | |
191 i=0; | |
192 time_pass=0; | |
193 | |
194 if (time_ID==-1) | |
195 { | |
196 for (t=0;t<num_dm;t++) //there is no time_ID | |
197 { | |
198 xc[0]='\0'; | |
199 j=0; | |
200 while ((src[i]!='\r') && (src[i]!='\n') && (src[i]!=' ') && (src[i]!='\t')) | |
201 { | |
202 xc[j]=src[i]; | |
203 i++; | |
204 j++; | |
205 } | |
206 | |
207 xc[j]='\0'; | |
208 i++; | |
209 | |
210 uncomp_data[index][t]=atof(xc); | |
211 } | |
212 } | |
213 else | |
214 { | |
215 for (t=0;t<=num_dm;t++) //the time column needs to be skipped, so there are num_dm+1 columns | |
216 { | |
217 xc[0]='\0'; | |
218 j=0; | |
219 while ((src[i]!='\r') && (src[i]!='\n') && (src[i]!=' ') && (src[i]!='\t')) | |
220 { | |
221 xc[j]=src[i]; | |
222 i++; | |
223 j++; | |
224 } | |
225 | |
226 xc[j]='\0'; | |
227 i++; | |
228 | |
229 if (t==time_ID) | |
230 { | |
231 time_pass=1; | |
232 continue; | |
233 } | |
234 | |
235 if (time_pass) | |
236 uncomp_data[index][t-1]=atof(xc); | |
237 else | |
238 uncomp_data[index][t]=atof(xc); | |
239 } | |
240 } | |
241 index++; | |
242 //fprintf(fout_ID,"%s",src); | |
243 } //end of while | |
244 | |
245 if (DEBUG == 1) | |
246 { | |
247 printf("the last line of the source data is:\n"); | |
248 for (j=0;j<num_dm;j++) | |
249 printf("%f ",uncomp_data[index-1][j]); | |
250 printf("\n"); | |
251 } | |
252 } | |
253 | |
254 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
255 void readcenter(FILE *f_src_ctr, long num_clust, long num_dm, double **cluster_center, long *IDmapping) | |
256 { | |
257 char src[LINE_LEN]; | |
258 char xc[LINE_LEN/10]; | |
259 | |
260 long i=0; | |
261 long j=0; | |
262 int m=0; | |
263 int t=0; | |
264 | |
265 for (i=0;i<num_clust;i++) | |
266 { | |
267 src[0]='\0'; | |
268 fgets(src,LINE_LEN, f_src_ctr); | |
269 m=0; | |
270 for (j=0;j<num_dm+1;j++) | |
271 { | |
272 xc[0]='\0'; | |
273 t=0; | |
274 while ((src[m]!='\r') && (src[m]!='\n') && (src[m]!=' ') && (src[m]!='\t')) | |
275 { | |
276 xc[t]=src[m]; | |
277 m++; | |
278 t++; | |
279 } | |
280 xc[t]='\0'; | |
281 m++; | |
282 if (j==0) | |
283 IDmapping[i]=atoi(xc); | |
284 else | |
285 cluster_center[i][j-1]=atof(xc); | |
286 //printf("cluster_center[%d][%d]=%f\n",i,j,cluster_center[i][j]); | |
287 } | |
288 } | |
289 } | |
290 | |
291 | |
292 /**************************************** Normalization ******************************************/ | |
293 void tran(double **orig_data, long clean_Len, long num_dm, long norm_used, double **matrix_to_cluster) | |
294 { | |
295 long i=0; | |
296 long j=0; | |
297 | |
298 double biggest=0; | |
299 double smallest=MAX_VALUE; | |
300 | |
301 double *aver; //average of each column | |
302 double *std; //standard deviation of each column | |
303 | |
304 aver=(double*)malloc(sizeof(double)*clean_Len); | |
305 memset(aver,0,sizeof(double)*clean_Len); | |
306 | |
307 std=(double*)malloc(sizeof(double)*clean_Len); | |
308 memset(std,0,sizeof(double)*clean_Len); | |
309 | |
310 if (norm_used==2) //z-score normalization | |
311 { | |
312 for (j=0;j<num_dm;j++) | |
313 { | |
314 aver[j]=0; | |
315 for (i=0;i<clean_Len;i++) | |
316 aver[j]=aver[j]+orig_data[i][j]; | |
317 aver[j]=aver[j]/(double)clean_Len; | |
318 | |
319 std[j]=0; | |
320 for (i=0;i<clean_Len;i++) | |
321 std[j]=std[j]+(orig_data[i][j]-aver[j])*(orig_data[i][j]-aver[j]); | |
322 std[j]=sqrt(std[j]/(double)clean_Len); | |
323 | |
324 for (i=0;i<clean_Len;i++) | |
325 matrix_to_cluster[i][j]=(orig_data[i][j]-aver[j])/std[j]; //z-score normalization | |
326 } | |
327 } | |
328 | |
329 if (norm_used==1) //0-1 min-max normalization | |
330 { | |
331 for (j=0;j<num_dm;j++) | |
332 { | |
333 biggest=0; | |
334 smallest=MAX_VALUE; | |
335 for (i=0;i<clean_Len;i++) | |
336 { | |
337 if (orig_data[i][j]>biggest) | |
338 biggest=orig_data[i][j]; | |
339 if (orig_data[i][j]<smallest) | |
340 smallest=orig_data[i][j]; | |
341 } | |
342 | |
343 for (i=0;i<clean_Len;i++) | |
344 { | |
345 if (biggest==smallest) | |
346 matrix_to_cluster[i][j]=biggest; | |
347 else | |
348 matrix_to_cluster[i][j]=(orig_data[i][j]-smallest)/(biggest-smallest); | |
349 } | |
350 } | |
351 } | |
352 | |
353 if (norm_used==0) //no normalization | |
354 { | |
355 for (i=0;i<clean_Len;i++) | |
356 for (j=0;j<num_dm;j++) | |
357 matrix_to_cluster[i][j]=orig_data[i][j]; | |
358 } | |
359 | |
360 | |
361 } | |
362 | |
363 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
364 void assign_event(double **Matrix, long k, long dist_used, double kmean_term, long file_Len, long num_dm, long *shortest_id, double **center, int random_init) | |
365 { | |
366 | |
367 long i=0; | |
368 | |
369 long j=0; | |
370 long t=0; | |
371 long random=0; | |
372 long random1=0; | |
373 long random2=0; | |
374 long times=0; | |
375 long times_allowed=0; | |
376 | |
377 long *num; //num[i]=t means the ith cluster has t points | |
378 | |
379 double vvv=1.0; // the biggest variation; | |
380 double distance=0.0; | |
381 double xv=0.0; | |
382 double variation=0.0; | |
383 double EPS=0.0; | |
384 double diff=0.0; | |
385 double mean_dx=0; | |
386 double mean_dy=0; | |
387 double sum_var=0; | |
388 double dx=0; | |
389 double dy=0; | |
390 double sd_x=0; | |
391 double sd_y=0; | |
392 | |
393 double *temp_center; | |
394 double *shortest_distance; | |
395 | |
396 double **sum; | |
397 | |
398 temp_center = (double *)malloc(sizeof(double)*num_dm); | |
399 memset(temp_center,0,sizeof(double)*num_dm); | |
400 | |
401 /* Choosing Centers */ | |
402 if (random_init) | |
403 { | |
404 for (i=0;i<k;i++) | |
405 { | |
406 random1=rand()*rand(); | |
407 //srand( (unsigned)time( NULL ) ); | |
408 random2=abs((random1%5)+1); | |
409 for (t=0;t<random2;t++) | |
410 random2=random2*rand()+rand(); | |
411 | |
412 random=abs(random2%file_Len); | |
413 //printf("random=%d\n",random); | |
414 for (j=0;j<num_dm;j++) | |
415 center[i][j]=Matrix[random][j]; | |
416 | |
417 } | |
418 } | |
419 | |
420 //printf("finish random selection\n"); | |
421 /* To compute the nearest center for every point */ | |
422 | |
423 shortest_distance = (double *)malloc(sizeof(double)*file_Len); | |
424 memset(shortest_distance,0,sizeof(double)*file_Len); | |
425 | |
426 num = (long *)malloc(sizeof(long)*k); | |
427 memset(num,0,sizeof(long)*k); | |
428 | |
429 sum = (double **)malloc(sizeof(double*)*k); | |
430 memset(sum,0,sizeof(double*)*k); | |
431 for (i=0;i<k;i++) | |
432 { | |
433 sum[i] = (double *)malloc(sizeof(double)*num_dm); | |
434 memset(sum[i],0,sizeof(double)*num_dm); | |
435 } | |
436 | |
437 for (i=0;i<k;i++) | |
438 for (j=0;j<num_dm;j++) | |
439 sum[i][j]=0.0; //sum[i][j] = k means the sum of the jth dimension of all points in the ith group is k | |
440 | |
441 //printf("before recursion\n"); | |
442 if (kmean_term>=1) | |
443 times_allowed = (long)kmean_term; | |
444 else | |
445 EPS = kmean_term; | |
446 | |
447 times=0; | |
448 | |
449 while (((vvv>EPS) && (kmean_term<1)) || ((times<times_allowed) && (kmean_term>=1))) | |
450 { | |
451 for (i=0;i<k;i++) | |
452 { | |
453 num[i]=0; | |
454 for (j=0;j<num_dm;j++) | |
455 sum[i][j]=0.0; | |
456 } | |
457 | |
458 for (i=0;i<file_Len;i++) //for each data point i, we compute the distance between Matrix[i] and center[j] | |
459 { | |
460 shortest_distance[i]=MAX_VALUE; | |
461 for (j=0;j<k;j++) //for each center j | |
462 { | |
463 | |
464 distance=0.0; | |
465 | |
466 if (dist_used==0) //Euclidean distance | |
467 { | |
468 for (t=0;t<num_dm;t++) //for each dimension | |
469 { | |
470 diff=center[j][t]-Matrix[i][t]; | |
471 if (diff<0) | |
472 diff=-diff; | |
473 | |
474 if (CUBE) | |
475 distance = distance+(diff*diff*diff); | |
476 else | |
477 distance = distance+(diff*diff); | |
478 } | |
479 } | |
480 else //pearson correlation | |
481 { | |
482 mean_dx=0.0; | |
483 mean_dy=0.0; | |
484 sum_var=0.0; | |
485 dx=0.0; | |
486 dy=0.0; | |
487 sd_x=0.0; | |
488 sd_y=0.0; | |
489 for (t=0;t<num_dm;t++) | |
490 { | |
491 mean_dx+=center[j][t]; | |
492 mean_dy+=Matrix[i][t]; | |
493 } | |
494 mean_dx=mean_dx/(double)num_dm; | |
495 mean_dy=mean_dy/(double)num_dm; | |
496 //printf("mean_dx=%f\n",mean_dx); | |
497 | |
498 for (t=0;t<num_dm;t++) | |
499 { | |
500 dx=center[j][t]-mean_dx; | |
501 dy=Matrix[i][t]-mean_dy; | |
502 sum_var+=dx*dy; | |
503 sd_x+=dx*dx; | |
504 sd_y+=dy*dy; | |
505 } | |
506 if (sqrt(sd_x*sd_y)==0) | |
507 distance = 1.0; | |
508 else | |
509 distance = 1.0 - (sum_var/(sqrt(sd_x*sd_y))); // distance ranges from 0 to 2; | |
510 //printf("distance=%f\n",distance); | |
511 } //pearson correlation ends | |
512 | |
513 | |
514 if (distance<shortest_distance[i]) | |
515 { | |
516 shortest_distance[i]=distance; | |
517 shortest_id[i]=j; | |
518 } | |
519 }//end for j | |
520 num[shortest_id[i]]=num[shortest_id[i]]+1; | |
521 for (t=0;t<num_dm;t++) | |
522 sum[shortest_id[i]][t]=sum[shortest_id[i]][t]+Matrix[i][t]; | |
523 }//end for i | |
524 /* recompute the centers */ | |
525 //compute_mean(group); | |
526 vvv=0.0; | |
527 for (j=0;j<k;j++) | |
528 { | |
529 memcpy(temp_center,center[j],sizeof(double)*num_dm); | |
530 variation=0.0; | |
531 if (num[j]!=0) | |
532 { | |
533 for (t=0;t<num_dm;t++) | |
534 { | |
535 center[j][t]=sum[j][t]/(double)num[j]; | |
536 xv=(temp_center[t]-center[j][t]); | |
537 variation=variation+xv*xv; | |
538 } | |
539 } | |
540 | |
541 if (variation>vvv) | |
542 vvv=variation; //vvv is the biggest variation among the k clusters; | |
543 } | |
544 //compute_variation; | |
545 times++; | |
546 } //end for while | |
547 | |
548 | |
549 | |
550 free(num); | |
551 for (i=0;i<k;i++) | |
552 free(sum[i]); | |
553 free(sum); | |
554 free(temp_center); | |
555 free(shortest_distance); | |
556 | |
557 } | |
558 | |
559 ////////////////////////////////////////////////////// | |
560 /*************************** Show *****************************/ | |
561 void show(double **Matrix, long *cluster_id, long file_Len, long k, long num_dm, char *name_string, long *IDmapping) | |
562 { | |
563 int situ1=0; | |
564 int situ2=0; | |
565 | |
566 long i=0; | |
567 long id=0; | |
568 long j=0; | |
569 long info_id=0; | |
570 long nearest_id=0; | |
571 long insert=0; | |
572 long temp=0; | |
573 long m=0; | |
574 long n=0; | |
575 long t=0; | |
576 | |
577 long *size_c; | |
578 | |
579 | |
580 | |
581 long **size_mybound_1; | |
582 long **size_mybound_2; | |
583 long **size_mybound_3; | |
584 long **size_mybound_0; | |
585 | |
586 double interval=0.0; | |
587 | |
588 double *big; | |
589 double *small; | |
590 | |
591 | |
592 double **center; | |
593 double **mybound; | |
594 | |
595 long **prof; //prof[i][j]=1 means population i is + at parameter j | |
596 | |
597 FILE *fpcnt_id; //proportion id | |
598 //FILE *fcent_id; //center_id, i.e., centers of clusters within the original data | |
599 FILE *fprof_id; //profile_id | |
600 | |
601 big=(double *)malloc(sizeof(double)*num_dm); | |
602 memset(big,0,sizeof(double)*num_dm); | |
603 | |
604 small=(double *)malloc(sizeof(double)*num_dm); | |
605 memset(small,0,sizeof(double)*num_dm); | |
606 | |
607 for (i=0;i<num_dm;i++) | |
608 { | |
609 big[i]=0.0; | |
610 small[i]=(double)MAX_VALUE; | |
611 } | |
612 | |
613 | |
614 size_c=(long *)malloc(sizeof(long)*k); | |
615 memset(size_c,0,sizeof(long)*k); | |
616 | |
617 center=(double**)malloc(sizeof(double*)*k); | |
618 memset(center,0,sizeof(double*)*k); | |
619 for (i=0;i<k;i++) | |
620 { | |
621 center[i]=(double*)malloc(sizeof(double)*num_dm); | |
622 memset(center[i],0,sizeof(double)*num_dm); | |
623 } | |
624 | |
625 mybound=(double**)malloc(sizeof(double*)*num_dm); | |
626 memset(mybound,0,sizeof(double*)*num_dm); | |
627 for (i=0;i<num_dm;i++) //there are 3 mybounds for 4 categories | |
628 { | |
629 mybound[i]=(double*)malloc(sizeof(double)*3); | |
630 memset(mybound[i],0,sizeof(double)*3); | |
631 } | |
632 | |
633 prof=(long **)malloc(sizeof(long*)*k); | |
634 memset(prof,0,sizeof(long*)*k); | |
635 for (i=0;i<k;i++) | |
636 { | |
637 prof[i]=(long *)malloc(sizeof(long)*num_dm); | |
638 memset(prof[i],0,sizeof(long)*num_dm); | |
639 } | |
640 | |
641 | |
642 for (i=0;i<file_Len;i++) | |
643 { | |
644 id=cluster_id[i]; | |
645 for (j=0;j<num_dm;j++) | |
646 { | |
647 center[id][j]=center[id][j]+Matrix[i][j]; | |
648 if (big[j]<Matrix[i][j]) | |
649 big[j]=Matrix[i][j]; | |
650 if (small[j]>Matrix[i][j]) | |
651 small[j]=Matrix[i][j]; | |
652 } | |
653 | |
654 size_c[id]++; | |
655 } | |
656 | |
657 for (i=0;i<k;i++) | |
658 for (j=0;j<num_dm;j++) | |
659 { | |
660 if (size_c[i]!=0) | |
661 center[i][j]=(center[i][j]/(double)(size_c[i])); | |
662 else | |
663 center[i][j]=0; | |
664 } | |
665 | |
666 for (j=0;j<num_dm;j++) | |
667 { | |
668 interval=((big[j]-small[j])/4.0); | |
669 //printf("interval[%d] is %f\n",j,interval); | |
670 for (i=0;i<3;i++) | |
671 mybound[j][i]=small[j]+((double)(i+1)*interval); | |
672 } | |
673 | |
674 | |
675 size_mybound_0=(long **)malloc(sizeof(long*)*k); | |
676 memset(size_mybound_0,0,sizeof(long*)*k); | |
677 | |
678 for (i=0;i<k;i++) | |
679 { | |
680 size_mybound_0[i]=(long*)malloc(sizeof(long)*num_dm); | |
681 memset(size_mybound_0[i],0,sizeof(long)*num_dm); | |
682 } | |
683 | |
684 size_mybound_1=(long **)malloc(sizeof(long*)*k); | |
685 memset(size_mybound_1,0,sizeof(long*)*k); | |
686 | |
687 for (i=0;i<k;i++) | |
688 { | |
689 size_mybound_1[i]=(long*)malloc(sizeof(long)*num_dm); | |
690 memset(size_mybound_1[i],0,sizeof(long)*num_dm); | |
691 } | |
692 | |
693 size_mybound_2=(long **)malloc(sizeof(long*)*k); | |
694 memset(size_mybound_2,0,sizeof(long*)*k); | |
695 | |
696 for (i=0;i<k;i++) | |
697 { | |
698 size_mybound_2[i]=(long*)malloc(sizeof(long)*num_dm); | |
699 memset(size_mybound_2[i],0,sizeof(long)*num_dm); | |
700 } | |
701 | |
702 size_mybound_3=(long **)malloc(sizeof(long*)*k); | |
703 memset(size_mybound_3,0,sizeof(long*)*k); | |
704 | |
705 for (i=0;i<k;i++) | |
706 { | |
707 size_mybound_3[i]=(long*)malloc(sizeof(long)*num_dm); | |
708 memset(size_mybound_3[i],0,sizeof(long)*num_dm); | |
709 } | |
710 | |
711 for (i=0;i<file_Len;i++) | |
712 for (j=0;j<num_dm;j++) | |
713 { | |
714 if (Matrix[i][j]<mybound[j][0])// && ((Matrix[i][j]-small[j])>0)) //the smallest values excluded | |
715 size_mybound_0[cluster_id[i]][j]++; | |
716 else | |
717 { | |
718 if (Matrix[i][j]<mybound[j][1]) | |
719 size_mybound_1[cluster_id[i]][j]++; | |
720 else | |
721 { | |
722 if (Matrix[i][j]<mybound[j][2]) | |
723 size_mybound_2[cluster_id[i]][j]++; | |
724 else | |
725 //if (Matrix[i][j]!=big[j]) //the biggest values excluded | |
726 size_mybound_3[cluster_id[i]][j]++; | |
727 } | |
728 | |
729 } | |
730 } | |
731 | |
732 fprof_id=fopen("profile.txt","w"); | |
733 fprintf(fprof_id,"Population_ID\t"); | |
734 fprintf(fprof_id,"%s\n",name_string); | |
735 | |
736 for (i=0;i<k;i++) | |
737 { | |
738 fprintf(fprof_id,"%ld\t",IDmapping[i]); | |
739 for (j=0;j<num_dm;j++) | |
740 { | |
741 | |
742 if (size_mybound_0[i][j]>size_mybound_1[i][j]) | |
743 situ1=0; | |
744 else | |
745 situ1=1; | |
746 if (size_mybound_2[i][j]>size_mybound_3[i][j]) | |
747 situ2=2; | |
748 else | |
749 situ2=3; | |
750 | |
751 if ((situ1==0) && (situ2==2)) | |
752 { | |
753 if (size_mybound_0[i][j]>size_mybound_2[i][j]) | |
754 prof[i][j]=0; | |
755 else | |
756 prof[i][j]=2; | |
757 } | |
758 if ((situ1==0) && (situ2==3)) | |
759 { | |
760 if (size_mybound_0[i][j]>size_mybound_3[i][j]) | |
761 prof[i][j]=0; | |
762 else | |
763 prof[i][j]=3; | |
764 } | |
765 if ((situ1==1) && (situ2==2)) | |
766 { | |
767 if (size_mybound_1[i][j]>size_mybound_2[i][j]) | |
768 prof[i][j]=1; | |
769 else | |
770 prof[i][j]=2; | |
771 } | |
772 if ((situ1==1) && (situ2==3)) | |
773 { | |
774 if (size_mybound_1[i][j]>size_mybound_3[i][j]) | |
775 prof[i][j]=1; | |
776 else | |
777 prof[i][j]=3; | |
778 } | |
779 | |
780 //begin to output profile | |
781 if (j==num_dm-1) | |
782 { | |
783 if (prof[i][j]==0) | |
784 fprintf(fprof_id,"1\n"); | |
785 if (prof[i][j]==1) | |
786 fprintf(fprof_id,"2\n"); | |
787 if (prof[i][j]==2) | |
788 fprintf(fprof_id,"3\n"); | |
789 if (prof[i][j]==3) | |
790 fprintf(fprof_id,"4\n"); | |
791 } | |
792 else | |
793 { | |
794 if (prof[i][j]==0) | |
795 fprintf(fprof_id,"1\t"); | |
796 if (prof[i][j]==1) | |
797 fprintf(fprof_id,"2\t"); | |
798 if (prof[i][j]==2) | |
799 fprintf(fprof_id,"3\t"); | |
800 if (prof[i][j]==3) | |
801 fprintf(fprof_id,"4\t"); | |
802 } | |
803 } | |
804 } | |
805 fclose(fprof_id); | |
806 | |
807 /////////////////////////////////////////////////////////// | |
808 | |
809 | |
810 fpcnt_id=fopen("percentage.txt","w"); | |
811 fprintf(fpcnt_id,"Population_ID\tPercentage\n"); | |
812 | |
813 for (t=0;t<k;t++) | |
814 { | |
815 fprintf(fpcnt_id,"%ld\t%.2f\n",IDmapping[t],(double)size_c[t]*100.0/(double)file_Len); | |
816 } | |
817 fclose(fpcnt_id); | |
818 | |
819 free(big); | |
820 free(small); | |
821 free(size_c); | |
822 | |
823 for (i=0;i<k;i++) | |
824 { | |
825 free(center[i]); | |
826 free(prof[i]); | |
827 free(size_mybound_0[i]); | |
828 free(size_mybound_1[i]); | |
829 free(size_mybound_2[i]); | |
830 free(size_mybound_3[i]); | |
831 } | |
832 free(center); | |
833 free(prof); | |
834 free(size_mybound_0); | |
835 free(size_mybound_1); | |
836 free(size_mybound_2); | |
837 free(size_mybound_3); | |
838 | |
839 for (i=0;i<num_dm;i++) | |
840 free(mybound[i]); | |
841 free(mybound); | |
842 | |
843 } | |
844 /******************************************************** Main Function **************************************************/ | |
845 int main (int argc, char **argv) | |
846 { | |
847 //inputs | |
848 FILE *f_src; //source file pointer | |
849 FILE *f_src_ctr; //source center file | |
850 //outputs | |
851 FILE *f_cid; //cluster-id file pointer | |
852 FILE *f_mfi; //added April 16, 2009 | |
853 | |
854 | |
855 char name_string[LINE_LEN]; //for name use | |
856 | |
857 int time_id=-1; | |
858 | |
859 long file_Len=0; | |
860 long num_clust=0; | |
861 long num_dm=0; | |
862 long norm_used=0; | |
863 long dist_used=0; | |
864 long i=0; | |
865 long j=0; | |
866 | |
867 long *cluster_id; | |
868 long *IDmapping; //this is to keep the original populationID of the center.txt | |
869 | |
870 double kmean_term=0; | |
871 | |
872 double **cluster_center; | |
873 double **orig_data; | |
874 double **normalized_data; | |
875 | |
876 /* | |
877 _strtime( tmpbuf ); | |
878 printf( "Starting time:\t\t\t\t%s\n", tmpbuf ); | |
879 _strdate( tmpbuf ); | |
880 printf( "Starting date:\t\t\t\t%s\n", tmpbuf ); | |
881 */ | |
882 | |
883 if (argc!=3) | |
884 { | |
885 printf("usage: cent_adjust input_center input_data_file\n"); | |
886 exit(0); | |
887 } | |
888 | |
889 | |
890 f_src_ctr=fopen(argv[1],"r"); | |
891 | |
892 //read source data | |
893 f_src=fopen(argv[2],"r"); | |
894 | |
895 getfileinfo(f_src, &file_Len, &num_dm, name_string, &time_id); //get the filelength, number of dimensions, and num/name of parameters | |
896 | |
897 rewind(f_src); //reset data file pointer | |
898 | |
899 orig_data = (double **)malloc(sizeof(double*)*file_Len); | |
900 memset(orig_data,0,sizeof(double*)*file_Len); | |
901 for (i=0;i<file_Len;i++) | |
902 { | |
903 orig_data[i]=(double *)malloc(sizeof(double)*num_dm); | |
904 memset(orig_data[i],0,sizeof(double)*num_dm); | |
905 } | |
906 | |
907 readsource(f_src, file_Len, num_dm, orig_data, time_id); //read the data; | |
908 | |
909 fclose(f_src); | |
910 ///////////////////////////////////////////////////////////////////////////// | |
911 getctrfileinfo(f_src_ctr, &num_clust); //get how many populations | |
912 norm_used=0; | |
913 dist_used=0; | |
914 kmean_term=2; //modified on Oct 16, 2009: changed kmean_term=1 to kmean_term=2 | |
915 | |
916 rewind(f_src_ctr); //reset center file pointer | |
917 | |
918 //read population center | |
919 cluster_center=(double **)malloc(sizeof(double*)*num_clust); | |
920 memset(cluster_center,0,sizeof(double*)*num_clust); | |
921 for (i=0;i<num_clust;i++) | |
922 { | |
923 cluster_center[i]=(double*)malloc(sizeof(double)*num_dm); | |
924 memset(cluster_center[i],0,sizeof(double)*num_dm); | |
925 } | |
926 for (i=0;i<num_clust;i++) | |
927 for (j=0;j<num_dm;j++) | |
928 cluster_center[i][j]=0; | |
929 | |
930 IDmapping=(long *)malloc(sizeof(long)*num_clust); | |
931 memset(IDmapping,0,sizeof(long)*num_clust); | |
932 | |
933 readcenter(f_src_ctr,num_clust,num_dm,cluster_center,IDmapping); //read population center | |
934 fclose(f_src_ctr); | |
935 | |
936 ///////////////////////////////////////////////////////////////////////////// | |
937 normalized_data=(double **)malloc(sizeof(double*)*file_Len); | |
938 memset(normalized_data,0,sizeof(double*)*file_Len); | |
939 for (i=0;i<file_Len;i++) | |
940 { | |
941 normalized_data[i]=(double *)malloc(sizeof(double)*num_dm); | |
942 memset(normalized_data[i],0,sizeof(double)*num_dm); | |
943 } | |
944 | |
945 tran(orig_data, file_Len, num_dm, norm_used, normalized_data); | |
946 /************************************************* Compute number of clusters *************************************************/ | |
947 | |
948 cluster_id=(long*)malloc(sizeof(long)*file_Len); | |
949 memset(cluster_id,0,sizeof(long)*file_Len); | |
950 | |
951 assign_event(normalized_data,num_clust,dist_used,kmean_term,file_Len,num_dm,cluster_id,cluster_center,0); | |
952 | |
953 | |
954 //show(orig_data,cluster_id,file_Len,num_clust,num_dm,show_data,num_disp,name_string); | |
955 show(orig_data, cluster_id, file_Len, num_clust, num_dm, name_string, IDmapping); | |
956 | |
957 f_cid=fopen("population_id.txt","w"); | |
958 | |
959 for (i=0;i<file_Len;i++) | |
960 fprintf(f_cid,"%ld\n",IDmapping[cluster_id[i]]); | |
961 | |
962 | |
963 fclose(f_cid); | |
964 | |
965 //added April 16, 2009 | |
966 f_mfi=fopen("MFI.txt","w"); | |
967 | |
968 for (i=0;i<num_clust;i++) | |
969 { | |
970 fprintf(f_mfi,"%ld\t",IDmapping[i]); | |
971 | |
972 for (j=0;j<num_dm;j++) | |
973 { | |
974 if (j==num_dm-1) | |
975 fprintf(f_mfi,"%.0f\n",cluster_center[i][j]); | |
976 else | |
977 fprintf(f_mfi,"%.0f\t",cluster_center[i][j]); | |
978 } | |
979 } | |
980 fclose(f_mfi); | |
981 | |
982 //ended April 16, 2009 | |
983 | |
984 for (i=0;i<num_clust;i++) | |
985 free(cluster_center[i]); | |
986 free(cluster_center); | |
987 | |
988 | |
989 /********************************************** Release memory ******************************************/ | |
990 | |
991 for (i=0;i<file_Len;i++) | |
992 { | |
993 free(orig_data[i]); | |
994 free(normalized_data[i]); | |
995 } | |
996 | |
997 free(orig_data); | |
998 free(normalized_data); | |
999 free(cluster_id); | |
1000 free(IDmapping); | |
1001 | |
1002 /* | |
1003 _strtime( tmpbuf ); | |
1004 printf( "Ending time:\t\t\t\t%s\n", tmpbuf ); | |
1005 _strdate( tmpbuf ); | |
1006 printf( "Ending date:\t\t\t\t%s\n", tmpbuf ); | |
1007 */ | |
1008 | |
1009 } |