cross_sample: src/flock1.c comparison

comparison src/flock1.c @ 1:7eab80f86779 draft

add FLOCK

author	immport-devteam
date	Mon, 27 Feb 2017 13:26:09 -0500
parents
children

comparison

equal deleted inserted replaced

-:8d951baf795f
+:7eab80f86779
+/*****************************************************************************
+	FLOCK: FLOw cytometry Clustering without K (Named by: Jamie A. Lee and Richard H. Scheuermann)
+	Author: (Max) Yu Qian, Ph.D.
+	Copyright: Scheuermann Lab, Dept. of Pathology, UTSW
+	Development: November 2005 ~ Forever
+	Algorithm Status: May 2007: Release 1.0
+	Usage: flock data_file
+		    Note: the input file format must be channel values and the delimiter between two values must be a tab.
+Changes made July 23, 2010: made errors to STDERR
+	Changes made Nov 4, 2010: added one more error (select_num_bin<min_grid) || (select_num_bin>max_grid) to STDERR;
+	                          MAX_GRID changed to 50 as larger than 50 seems not useful for any file we have got
+******************************************************************************/
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <assert.h>
+#define DEBUG 0
+#define LINE_LEN 1024
+#define FILE_NAME_LEN 128
+#define PARA_NAME_LEN 64
+#define MAX_VALUE 1000000000
+#define MIN_GRID 6
+#define MAX_GRID 50
+#define NORM_METHOD 2 //2 if z-score; 0 if no normalization; 1 if min-max
+#define KMEANS_TERM 100
+//#define MAX_NUM_POP 30
+int find_connected(int **G, int num_dense_grids, int ndim, int *grid_clusterID);
+/************* Read basic info of the source file ****************************/
+void getfileinfo(FILE *f_src, int *file_Len, int *num_dm, char *name_string, int *time_ID)
+{
+char src[LINE_LEN];
+char current_name[64];
+char prv;
+int num_rows=0;
+int num_columns=0;
+int ch='\n';
+int prev='\n';
+int time_pos=0;
+int i=0;
+int j=0;
+int sw=0;
+src[0]='\0';
+fgets(src, LINE_LEN, f_src);
+if ((src[0]=='F') && (src[1]=='C') && (src[2]=='S'))
+	{
+		fprintf(stderr,"the correct input format is a tab-delimited txt file, instead of FCS file.\n");
+		abort();
+	}
+name_string[0]='\0';
+current_name[0]='\0';
+prv='\n';
+// skip space and tab characters
+while ((src[i]==' ') || (src[i]=='\t'))
+i++;
+// repeat until the end of line is reached
+while ((src[i]!='\0') && (src[i]!='\n') && (src[i]!='\r'))
+{
+current_name[j]=src[i];
+if ((src[i]=='\t') && (prv!='\t')) //a complete word
+{
+current_name[j]='\0';
+if (0!=strcmp(current_name,"Time"))
+{
+num_columns++; //num_columns does not inlcude the column of Time
+time_pos++;
+if (sw) {
+strcat(name_string,"\t");
+}
+strcat(name_string,current_name);
+sw = 1;
+}
+else
+{
+*time_ID=time_pos;
+}
+current_name[0]='\0';
+j=0;
+}
+if ((src[i]=='\t') && (prv=='\t')) //a duplicate tab or space
+{
+current_name[0]='\0';
+j=0;
+}
+if (src[i]!='\t')
+j++;
+prv=src[i];
+i++;
+}
+if (prv!='\t') //the last one hasn't been retrieved
+{
+current_name[j]='\0';
+if (0!=strcmp(current_name,"Time"))
+{
+num_columns++;
+strcat(name_string,"\t");
+strcat(name_string,current_name);
+time_pos++;
+}
+else
+{
+*time_ID=time_pos;
+}
+}
+if (DEBUG==1)
+{
+printf("time_ID is %d\n",*time_ID);
+printf("name_string is %s\n",name_string);
+}
+//start computing # of rows
+while ((ch = fgetc(f_src))!= EOF )
+{
+if (ch == '\n')
+{
+++num_rows;
+}
+prev = ch;
+}
+if (prev!='\n')
+++num_rows;
+//added on July 23, 2010
+if (num_rows<50)
+{
+fprintf(stderr,"Number of events in the input file is too few and should not be processed!\n"); //modified on July 23, 2010
+	abort();
+}
+*file_Len=num_rows;
+*num_dm=num_columns;
+printf("original file size is %d; number of dimensions is %d\n", *file_Len, *num_dm);
+}
+/************************************* Read the source file into uncomp_data **************************************/
+void readsource(FILE *f_src, int file_Len, int num_dm, double **uncomp_data, int time_ID)
+{
+int time_pass=0; //to mark whether the time_ID has been passed
+int index=0;
+int i=0;
+int j=0;
+int t=0;
+char src[LINE_LEN];
+char xc[LINE_LEN/10];
+src[0]='\0';
+fgets(src,LINE_LEN, f_src); //skip the first line about parameter names
+while (!feof(f_src) && (index<file_Len)) //index = 0, 1, ..., file_Len-1
+{
+src[0]='\0';
+fgets(src,LINE_LEN,f_src);
+i=0;
+time_pass=0;
+if (time_ID==-1)
+{
+for (t=0;t<num_dm;t++) //there is no time_ID
+{
+xc[0]='\0';
+j=0;
+while ((src[i]!='\0') && (src[i]!='\n') && (src[i]!=' ') && (src[i]!='\t'))
+{
+xc[j]=src[i];
+i++;
+j++;
+}
+xc[j]='\0';
+i++;
+uncomp_data[index][t]=atof(xc);
+}
+}
+else
+{
+for (t=0;t<=num_dm;t++) //the time column needs to be skipped, so there are num_dm+1 columns
+{
+xc[0]='\0';
+j=0;
+while ((src[i]!='\0') && (src[i]!='\n') && (src[i]!=' ') && (src[i]!='\t'))
+{
+xc[j]=src[i];
+i++;
+j++;
+}
+xc[j]='\0';
+i++;
+if (t==time_ID)
+{
+time_pass=1;
+continue;
+}
+if (time_pass)
+uncomp_data[index][t-1]=atof(xc);
+else
+uncomp_data[index][t]=atof(xc);
+}
+}
+index++;
+//fprintf(fout_ID,"%s",src);
+} //end of while
+if (DEBUG == 1)
+{
+printf("the last line of the source data is:\n");
+for (j=0;j<num_dm;j++)
+printf("%f ",uncomp_data[index-1][j]);
+printf("\n");
+}
+}
+/**************************************** Normalization ******************************************/
+void tran(double **orig_data, int file_Len, int num_dm, int norm_used, double **matrix_to_cluster)
+{
+int i=0;
+int j=0;
+double biggest=0;
+double smallest=MAX_VALUE;
+double *aver; //average of each column
+double *std; //standard deviation of each column
+aver=(double*)malloc(sizeof(double)*file_Len);
+memset(aver,0,sizeof(double)*file_Len);
+std=(double*)malloc(sizeof(double)*file_Len);
+memset(std,0,sizeof(double)*file_Len);
+if (norm_used==2) //z-score normalization
+{
+for (j=0;j<num_dm;j++)
+{
+aver[j]=0;
+for (i=0;i<file_Len;i++)
+aver[j]=aver[j]+orig_data[i][j];
+aver[j]=aver[j]/(double)file_Len;
+std[j]=0;
+for (i=0;i<file_Len;i++)
+std[j]=std[j]+(orig_data[i][j]-aver[j])*(orig_data[i][j]-aver[j]);
+std[j]=sqrt(std[j]/(double)file_Len);
+for (i=0;i<file_Len;i++)
+matrix_to_cluster[i][j]=(orig_data[i][j]-aver[j])/std[j];  //z-score normalization
+}
+}
+if (norm_used==1) //0-1 min-max normalization
+{
+for (j=0;j<num_dm;j++)
+{
+biggest=0;
+smallest=MAX_VALUE;
+for (i=0;i<file_Len;i++)
+{
+if (orig_data[i][j]>biggest)
+biggest=orig_data[i][j];
+if (orig_data[i][j]<smallest)
+smallest=orig_data[i][j];
+}
+for (i=0;i<file_Len;i++)
+{
+if (biggest==smallest)
+matrix_to_cluster[i][j]=biggest;
+else
+matrix_to_cluster[i][j]=(orig_data[i][j]-smallest)/(biggest-smallest);
+}
+}
+}
+if (norm_used==0) //no normalization
+{
+for (i=0;i<file_Len;i++)
+for (j=0;j<num_dm;j++)
+matrix_to_cluster[i][j]=orig_data[i][j];
+}
+}
+/********************************************** RadixSort *******************************************/
+/* Perform a radix sort using each dimension from the original data as a radix.
+* Outputs:
+* sorted_seq   -- a permutation vector mapping the ordered list onto the original data.
+*                  (sorted_seq[i] -> index in the original data of the ith element of the ordered list)
+* grid_ID      -- mapping between the original data and the "grids" (see below) found as a byproduct
+*                  of the sorting procedure.
+* num_nonempty -- the number of grids that occur in the data (= the number of distinct values assumed
+*                  by grid_ID)
+*/
+void radixsort_flock(int **position,int file_Len,int num_dm,int num_bin,int *sorted_seq,int *num_nonempty,int *grid_ID)
+{
+int i=0;
+int length=0;
+int start=0;
+int prev_ID=0;
+int curr_ID=0;
+int j=0;
+int t=0;
+int p=0;
+int loc=0;
+int temp=0;
+int equal=0;
+int *count; //count[i]=j means there are j numbers having value i at the processing digit
+int *index; //index[i]=j means the starting position of grid i is j
+int *cp; //current position
+int *mark; //mark[i]=0 means it is not an ending point of a part, 1 means it is (a "part" is a group of items with identical bins for all dimensions)
+int *seq; //temporary sequence
+count=(int*)malloc(sizeof(int)*num_bin);
+memset(count,0,sizeof(int)*num_bin);
+cp=(int*)malloc(sizeof(int)*num_bin);
+memset(cp,0,sizeof(int)*num_bin);
+index=(int*)malloc(sizeof(int)*num_bin); // initialized below
+seq=(int*)malloc(sizeof(int)*file_Len);
+memset(seq,0,sizeof(int)*file_Len);
+mark=(int*)malloc(sizeof(int)*file_Len);
+memset(mark,0,sizeof(int)*file_Len);
+for (i=0;i<file_Len;i++)
+{
+sorted_seq[i]=i;
+mark[i]=0;
+seq[i]=0;
+}
+for (i=0;i<num_bin;i++)
+{
+index[i]=0;
+cp[i]=0;
+count[i]=0;
+}
+for (j=0;j<num_dm;j++)
+{
+if (j==0) //compute the initial values of mark
+{
+for (i=0;i<file_Len;i++)
+count[position[i][j]]++; // initialize the count to the number of items in each bin of the 0th dimension
+index[0] = 0;
+for (i=0;i<num_bin-1;i++)
+{
+index[i+1]=index[i]+count[i];  //index[k]=x means k segment starts at x (in the ordered list)
+if ((index[i+1]>0) && (index[i+1]<=file_Len))
+{
+mark[index[i+1]-1]=1; // Mark the end of the segment in the ordered list
+}
+else
+{
+printf("out of myboundary for mark at index[i+1]-1.\n");
+}
+}
+mark[file_Len-1]=1;
+for (i=0;i<file_Len;i++)
+{
+/* Build a permutation vector for the partially ordered data.  Store the PV in sorted_seq */
+loc=position[i][j];
+temp=index[loc]+cp[loc]; //cp[i]=j means the offset from the starting position of grid i is j
+sorted_seq[temp]=i;  //sorted_seq[i]=temp is also another way to sort
+cp[loc]++;
+}
+}
+else
+{
+//reset count, index, loc, temp, cp, start, and length
+length=0;
+loc=0;
+temp=0;
+start=0;
+for (p=0;p<num_bin;p++)
+{
+cp[p]=0;
+count[p]=0;
+index[p]=0;
+}
+for (i=0;i<file_Len;i++)
+{
+int iperm = sorted_seq[i]; // iperm allows us to traverse the data in sorted order.
+if (mark[i]!=1)
+{
+/* Count the number of items in each bin of
+dimension j, BUT we are going to reset at the end
+of each "part".  Thus, the total effect is to do
+a sort by bin on the jth dimension for each group
+of data that has been identical for the
+dimensions processed up to this point.  This is
+the standard radix sort procedure, but doing it
+this way saves us having to allocate buckets to
+hold the data in each group of "identical-so-far"
+elements. */
+count[position[iperm][j]]++;  //count[position[i][j]]++;
+length++;                     // This is the total length of the part, irrespective of the value of the jth component
+// (actually, less one, since we don't increment for the final element below)
+}
+if (mark[i]==1)
+{
+//length++;
+count[position[iperm][j]]++;//count[position[i][j]]++;  //the current point must be counted in
+start=i-length; //this part starts from start to i: [start,i]
+/* Now we sort on the jth radix, just like we did for the 0th above, but we restrict it to just the current part.
+This would be a lot more clear if we broke this bit of code out into a separate function and processed recursively,
+plus we could multi-thread over the parts.  (Hmmm...)
+*/
+index[0] = start; // Let index give the offset within the whole ordered list.
+for (t=0;t<num_bin-1;t++)
+{
+index[t+1]=index[t]+count[t];
+if ((index[t+1]<=file_Len) && (index[t+1]>0))
+{
+mark[index[t+1]-1]=1; // update the part boundaries to include the differences in the current radix.
+}
+}
+mark[i]=1;
+/* Update the permutation vector for the current part (i.e., from start to i).  By the time we finish the loop over i
+the PV will be completely updated for the partial ordering up to the current radix. */
+for (t=start;t<=i;t++)
+{
+loc=position[sorted_seq[t]][j];//loc=position[t][j];
+temp=index[loc]+cp[loc];
+if ((temp<file_Len) && (temp>=0))
+{
+// seq is a temporary because we have to maintain the old PV until we have finished this step.
+seq[temp]=sorted_seq[t];  //sorted_seq[i]=temp is also another way to sort
+cp[loc]++;
+}
+else
+{
+printf("out of myboundary for seq at temp.\n");
+}
+}
+for (t=start;t<=i;t++)
+{
+// copy the temporary back into sorted_seq.  sorted_seq is now updated for radix j up through
+// entry i in the ordered list.
+if ((t>=0) && (t<file_Len))
+sorted_seq[t]=seq[t];
+else
+printf("out of myboundary for seq and sorted_seq at t.\n");
+}
+//reset count, index, seq, length, and cp
+length=0;
+loc=0;
+temp=0;
+for (p=0;p<num_bin;p++)
+{
+cp[p]=0;
+count[p]=0;
+index[p]=0;
+}
+}
+}//end for i
+}//end else
+}//end for j
+/* sorted_seq[] now contains the ordered list for all radices.  mark[] gives the boundaries between groups of elements that are
+identical over all radices (= dimensions in the original data) (although it appears we aren't going to make use of this fact) */
+for (i=0;i<file_Len;i++)
+grid_ID[i]=0; //in case the initial value hasn't been assigned
+*num_nonempty=1; //starting from 1!
+/* assign the "grid" identifiers for all of the data.  A grid will be what we were calling a "part" above.  We will number them
+serially and tag the *unordered* data with the grid IDs.  We will also count the number of populated grids (in general there will
+be many possible combinations of bin values that simply never occur) */
+for (i=1;i<file_Len;i++)
+{
+equal=1;
+prev_ID=sorted_seq[i-1];
+curr_ID=sorted_seq[i];
+for (j=0;j<num_dm;j++)
+{
+if (position[prev_ID][j]!=position[curr_ID][j])
+{
+equal=0;  //not equal
+break;
+}
+}
+if (equal)
+{
+grid_ID[curr_ID]=grid_ID[prev_ID];
+}
+else
+{
+*num_nonempty=*num_nonempty+1;
+grid_ID[curr_ID]=grid_ID[prev_ID]+1;
+}
+//all_grid_vol[grid_ID[curr_ID]]++;
+}
+//free memory
+free(count);
+free(index);
+free(cp);
+free(seq);
+free(mark);
+}
+/********************************************** Compute Position of Events ************************************************/
+void compute_position(double **data_in, int file_Len, int num_dm, int num_bin, int **position, double *interval)
+{
+/* What we are really doing here is binning the data, with the bins
+spanning the range of the data and number of bins = num_bin */
+int i=0;
+int j=0;
+double *small; //small[j] is the smallest value within dimension j
+double *big; //big[j] is the biggest value within dimension j
+small=(double*)malloc(sizeof(double)*num_dm);
+memset(small,0,sizeof(double)*num_dm);
+big=(double*)malloc(sizeof(double)*num_dm);
+memset(big,0,sizeof(double)*num_dm);
+for (j=0;j<num_dm;j++)
+{
+big[j]=MAX_VALUE*(-1);
+small[j]=MAX_VALUE;
+for (i=0;i<file_Len;i++)
+{
+if (data_in[i][j]>big[j])
+big[j]=data_in[i][j];
+if (data_in[i][j]<small[j])
+small[j]=data_in[i][j];
+}
+interval[j]=(big[j]-small[j])/(double)num_bin;	//interval is computed using the biggest value and smallest value instead of the channel limit
+/* XXX: I'm pretty sure the denominator of the fraction above should be num_bin-1. */
+	  /* I don't think so: num_bin is the number of bins */
+}
+for (j=0;j<num_dm;j++)
+{
+for (i=0;i<file_Len;i++)
+{
+if (data_in[i][j]>=big[j])
+position[i][j]=num_bin-1;
+else
+{
+position[i][j]=(int)((data_in[i][j]-small[j])/interval[j]); //position[i][j]=t means point i is at the t grid of dimensional j
+if ((position[i][j]>=num_bin) || (position[i][j]<0))
+{
+//printf("position mis-computed in density analysis!\n");
+//exit(0);
+			   fprintf(stderr,"Incorrect input file format or input parameters (number of bins overflows)!\n"); //modified on July 23, 2010
+				abort();
+}
+}
+}
+}
+free(small);
+free(big);
+}
+/********************************************** select_bin to select the number of bins **********************************/
+//num_bin=select_bin(normalized_data, file_Len, num_dm, MIN_GRID, MAX_GRID, position, sorted_seq, all_grid_ID, &num_nonempty);
+/* Determine the number of bins to use in each dimension.  Additionally sort the data elements according to the binned
+* values, and partition the data into "grids" with identical (binned) values.  We try progressively more bins until we
+* maximize a merit function, then return the results obtained using the optimal number of bins.
+*
+* Outputs:
+* position     -- binned data values
+* sorted_seq   -- permutation vector mapping the ordered list to the original data
+* all_grid_ID  -- grid to which each data element was assigned.
+* num_nonempty -- number of distinct values assumed by all_grid_ID
+* interval     -- bin width for each data dimension
+* return value -- the number of bins selected.
+*/
+int select_bin(double **normalized_data, int file_Len, int num_dm, int min_grid, int max_grid, int **position, int *sorted_seq,
+int *all_grid_ID, int *num_nonempty, double *interval, int user_num_bin)
+{
+int num_bin=0;
+int select_num_bin=0;
+int m=0;
+int n=0;
+int i=0;
+int bin_scope=0;
+int temp_num_nonempty=0;
+int *temp_grid_ID;
+int *temp_sorted_seq;
+int **temp_position;
+//sorted_seq[i]=j means the event j ranks i
+double temp_index=0;
+double *bin_index;
+double *temp_interval;
+temp_grid_ID=(int *)malloc(sizeof(int)*file_Len);
+memset(temp_grid_ID,0,sizeof(int)*file_Len);
+temp_sorted_seq=(int *)malloc(sizeof(int)*file_Len);
+memset(temp_sorted_seq,0,sizeof(int)*file_Len);
+temp_position=(int **)malloc(sizeof(int*)*file_Len);
+memset(temp_position,0,sizeof(int*)*file_Len);
+for (m=0;m<file_Len;m++)
+{
+temp_position[m]=(int*)malloc(sizeof(int)*num_dm);
+memset(temp_position[m],0,sizeof(int)*num_dm);
+}
+temp_interval=(double*)malloc(sizeof(double)*num_dm);
+memset(temp_interval,0,sizeof(double)*num_dm);
+bin_scope=max_grid-min_grid+1;
+bin_index=(double *)malloc(sizeof(double)*bin_scope);
+memset(bin_index,0,sizeof(double)*bin_scope);
+i=0;
+for (num_bin=min_grid;num_bin<=max_grid;num_bin++)
+{
+/* compute_position bins the data into num_bin bins.  Each
+dimension is binned independently.
+Outputs:
+temp_position[i][j] -- bin for the jth component of data element i.
+temp_interval[j]    -- bin-width for the jth component
+*/
+compute_position(normalized_data, file_Len, num_dm, num_bin, temp_position, temp_interval);
+radixsort_flock(temp_position,file_Len,num_dm,num_bin,temp_sorted_seq,&temp_num_nonempty,temp_grid_ID);
+/* our figure of merit is the number of non-empty grids divided by number of bins per dimension.
+We declare victory when we have found a local maximum */
+bin_index[i]=((double)temp_num_nonempty)/((double)num_bin);
+	  if ((double)(temp_num_nonempty)>=(double)(file_Len)*0.95)
+		  break;
+if ((bin_index[i]<temp_index) && (user_num_bin==0))
+break;
+	  if ((user_num_bin==num_bin-1) && (user_num_bin!=0))
+		 break;
+/* Since we have accepted this trial bin, copy all the temporary results into
+the output buffers */
+memcpy(all_grid_ID,temp_grid_ID,sizeof(int)*file_Len);
+memcpy(sorted_seq,temp_sorted_seq,sizeof(int)*file_Len);
+memcpy(interval,temp_interval,sizeof(double)*num_dm);
+for (m=0;m<file_Len;m++)
+for (n=0;n<num_dm;n++)
+position[m][n]=temp_position[m][n];
+temp_index=bin_index[i];
+select_num_bin=num_bin;
+num_nonempty[0]=temp_num_nonempty;
+i++;
+}
+if ((select_num_bin<min_grid) || (select_num_bin>max_grid))
+{
+fprintf(stderr,"Number of events collected is too few in terms of number of markers used. The file should not be processed!\n"); //modified on Nov 4, 2010
+	exit(0);
+}
+if (temp_index==0)
+{
+	 fprintf(stderr,"Too many dimensions with too few events in the input file, or a too large number of bins used.\n"); //modified on July 23, 2010
+	 abort();
+}
+free(temp_grid_ID);
+free(temp_sorted_seq);
+free(bin_index);
+free(temp_interval);
+for (m=0;m<file_Len;m++)
+free(temp_position[m]);
+free(temp_position);
+return select_num_bin;
+}
+/************************************* Select dense grids **********************************/
+// compute num_dense_grids, num_dense_events, dense_grid_reverse, and all_grid_vol
+// den_cutoff=select_dense(file_Len, all_grid_ID, num_nonempty, &num_dense_grids, &num_dense_events, dense_grid_reverse);
+/*
+* Prune away grids that are insufficiently "dense" (i.e., contain too few data items)
+*
+* Outputs:
+* num_dense_grids    -- number of dense grids
+* num_dense_events   -- total number of data items in all dense grids
+* dense_grid_reverse -- mapping from list of all grids to list of dense grids.
+* return value       -- density cutoff for separating dense from non-dense grids.
+*/
+int select_dense(int file_Len, int *all_grid_ID, int num_nonempty, int *num_dense_grids, int *num_dense_events, int *dense_grid_reverse, int den_t_event)
+{
+int i=0;
+int vol_ID=0;
+int biggest_size=0; //biggest grid_size, to define grid_size_index
+int biggest_index=0;
+//int actual_threshold=0; //the actual threshold on grid_size, e.g., 1 implies 1 event in the grid
+//int num_remain=0; //number of remaining grids with different density thresholds
+int temp_num_dense_grids=0;
+int temp_num_dense_events=0;
+int *grid_size_index;
+int *all_grid_vol;
+int *grid_density_index;
+//double den_average=0;
+// double avr_index=0;
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Compute all_grid_vol
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+all_grid_vol=(int *)malloc(sizeof(int)*num_nonempty);
+memset(all_grid_vol,0,sizeof(int)*num_nonempty);
+/* Grid "volume" is just the number of data contained in the grid. */
+for (i=0;i<file_Len;i++)
+{
+vol_ID=all_grid_ID[i]; //vol_ID=all_grid_ID[sorted_seq[i]];
+all_grid_vol[vol_ID]++;  //all_grid_vol[i]=j means grid i has j points
+}
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Compute grid_size_index (histogram of grid sizes)
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+for (i=0;i<num_nonempty;i++)
+{
+if (biggest_size<all_grid_vol[i])
+{
+biggest_size=all_grid_vol[i];
+}
+}
+//added on July 23, 2010
+if (biggest_size<3)
+{
+	 fprintf(stderr,"Too many dimensions with too few events in the input file, or a too large number of bins used.\n"); //modified on July 23, 2010
+	 abort();
+}
+grid_size_index=(int*)malloc(sizeof(int)*biggest_size);
+memset(grid_size_index,0,sizeof(int)*biggest_size);
+for (i=0;i<num_nonempty;i++)
+{
+grid_size_index[all_grid_vol[i]-1]++; //grid_size_index[0]=5 means there are 5 grids having size 1
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Compute den_cutoff
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+grid_density_index=(int *)malloc(sizeof(int)*(biggest_size-2));//from event 2 to biggest_size-1, i.e., from 0 to biggest_size-3
+memset(grid_density_index,0,sizeof(int)*(biggest_size-2));
+if (den_t_event==0)  //user doesn't define the density threshold
+{
+	  biggest_index=0;
+	  for (i=2;i<biggest_size-1;i++) //the grid with 1 event will be skipped, i.e., grid_density_index[0] won't be defined
+	  {
+		  grid_density_index[i-1]=(grid_size_index[i-1]+grid_size_index[i+1]-2*grid_size_index[i]);
+		  if (biggest_index<grid_density_index[i-1])
+		  {
+			biggest_index=grid_density_index[i-1];
+			den_t_event=i+1;
+		  }
+	  }
+}
+if (den_t_event==0) //if something is wrong
+	  den_t_event=3;
+for (i=0;i<num_nonempty;i++)
+	  if (all_grid_vol[i]>=den_t_event)
+		temp_num_dense_grids++;
+if (temp_num_dense_grids<=1)
+{
+	  //modified on July 23, 2010
+	  //printf("a too high density threshold is set! Please decrease your density threshold.\n");
+	  //exit(0);
+	  fprintf(stderr,"a too high density threshold is set! Please decrease your density threshold.\n"); //modified on July 23, 2010
+	  abort();
+}
+if (temp_num_dense_grids>=100000)
+{
+	  //modified on July 23, 2010
+	  //printf("a too low density threshold is set! Please increase your density threshold.\n");
+	  //exit(0);
+	  fprintf(stderr,"a too low density threshold is set! Please increase your density threshold.\n"); //modified on July 23, 2010
+	  abort();
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Compute dense_grid_reverse
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+temp_num_dense_grids=0;
+temp_num_dense_events=0;
+for (i=0;i<num_nonempty;i++)
+{
+dense_grid_reverse[i]=-1;
+if (all_grid_vol[i]>=den_t_event)
+{
+dense_grid_reverse[i]=temp_num_dense_grids;  //dense_grid_reverse provides a mapping from all nonempty grids to dense grids.
+temp_num_dense_grids++;
+temp_num_dense_events+=all_grid_vol[i];
+}
+}
+num_dense_grids[0]=temp_num_dense_grids;
+num_dense_events[0]=temp_num_dense_events;
+free(grid_size_index);
+free(all_grid_vol);
+return den_t_event;
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Compute densegridID_To_gridevent and eventID_To_denseventID
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//	grid_To_event(file_Len, dense_grid_reverse, all_grid_ID, eventID_To_denseventID, densegridID_To_gridevent);
+/*
+* Filter the data so that only the data belonging to dense grids is left
+*
+* Output:
+* eventID_To_denseeventID   -- mapping from original event ID to ID in list containing only events contained in dense grids.
+* densegridID_To_gridevent  -- mapping from dense grids to prototype members of the grids.
+*
+*/
+void grid_To_event(int file_Len, int *dense_grid_reverse, int *all_grid_ID, int *eventID_To_denseventID, int *densegridID_To_gridevent)
+{
+int i=0;
+int dense_grid_ID=0;
+int dense_event_ID=0;
+for (i=0;i<file_Len;i++)
+{
+dense_grid_ID=dense_grid_reverse[all_grid_ID[i]];
+eventID_To_denseventID[i]=-1;
+if (dense_grid_ID!=-1) //for point (i) belonging to dense grids
+{
+eventID_To_denseventID[i]=dense_event_ID;
+dense_event_ID++;
+if (densegridID_To_gridevent[dense_grid_ID]==-1) //for point i that hasn't been selected
+densegridID_To_gridevent[dense_grid_ID]=i; //densegridID_To_gridevent maps dense_grid_ID to its representative point
+}
+}
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Compute dense_grid_seq
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//	generate_grid_seq(file_Len, num_dm, sorted_seq, num_dense_grids, densegridID_To_gridevent, position, dense_grid_rank, dense_grid_seq);
+/* Construct a table of binned data values for each dense grid.
+*
+* Output:
+*
+* dense_grid_seq  -- table of binned data values for each dense grid (recall that all members of a grid have identical binned data values)
+*/
+void generate_grid_seq(int num_dm, int num_dense_grids, int *densegridID_To_gridevent, int **position, int **dense_grid_seq)
+{
+int i=0;
+int j=0;
+int ReventID=0; //representative event ID of the dense grid
+for (i=0;i<num_dense_grids;i++)
+{
+ReventID = densegridID_To_gridevent[i];
+for (j=0;j<num_dm;j++)
+dense_grid_seq[i][j]=position[ReventID][j];
+}
+}
+//compare two vectors
+int compare_value(int num_dm, int *search_value, int *seq_value)
+{
+int i=0;
+for (i=0;i<num_dm;i++)
+{
+if (search_value[i]<seq_value[i])
+return 1;
+if (search_value[i]>seq_value[i])
+return -1;
+if (search_value[i]==seq_value[i])
+continue;
+}
+return 0;
+}
+//binary search the dense_grid_seq to return the dense grid ID if it exists
+int binary_search(int num_dense_grids, int num_dm, int *search_value, int **dense_grid_seq)
+{
+int low=0;
+int high=0;
+int mid=0;
+int comp_result=0;
+int match=0;
+//int found=0;
+low = 0;
+high = num_dense_grids-1;
+while (low <= high)
+{
+mid = (int)((low + high)/2);
+comp_result=compare_value(num_dm, search_value,dense_grid_seq[mid]);
+switch(comp_result)
+{
+case 1:
+high=mid-1;
+break;
+case -1:
+low=mid+1;
+break;
+case 0:
+match=1;
+break;
+}
+if (match==1)
+break;
+}
+if (match==1)
+{
+return mid;
+}
+else
+return -1;
+}
+/********************************************** Computing Centers Using IDs **********************************************/
+void ID2Center(double **data_in, int file_Len, int num_dm, int *eventID_To_denseventID, int num_clust, int *cluster_ID, double **population_center)
+{
+int i=0;
+int j=0;
+int ID=0;
+int eventID=0;
+int *size_c;
+size_c=(int *)malloc(sizeof(int)*num_clust);
+memset(size_c,0,sizeof(int)*num_clust);
+for (i=0;i<num_clust;i++)
+for (j=0;j<num_dm;j++)
+population_center[i][j]=0;
+for (i=0;i<file_Len;i++)
+{
+eventID=eventID_To_denseventID[i];
+if (eventID!=-1) //only events in dense grids count
+{
+ID=cluster_ID[eventID];
+if (ID==-1)
+{
+//modified on July 23, 2010
+			  //printf("ID==-1! in ID2Center\n");
+//exit(0);
+			  fprintf(stderr,"Incorrect file format or input parameters (no dense regions found!)\n"); //modified on July 23, 2010
+			  abort();
+}
+for (j=0;j<num_dm;j++)
+population_center[ID][j]=population_center[ID][j]+data_in[i][j];
+size_c[ID]++;
+}
+}
+for (i=0;i<num_clust;i++)
+{
+for (j=0;j<num_dm;j++)
+if (size_c[i]!=0)
+population_center[i][j]=(population_center[i][j]/(double)(size_c[i]));
+else
+population_center[i][j]=0;
+}
+free(size_c);
+}
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//  Compute Population Center with all events
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void ID2Center_all(double **data_in, int file_Len, int num_dm, int num_clust, int *cluster_ID, double **population_center)
+{
+int i=0;
+int j=0;
+int ID=0;
+int *size_c;
+size_c=(int *)malloc(sizeof(int)*num_clust);
+memset(size_c,0,sizeof(int)*num_clust);
+for (i=0;i<num_clust;i++)
+for (j=0;j<num_dm;j++)
+population_center[i][j]=0;
+for (i=0;i<file_Len;i++)
+{
+ID=cluster_ID[i];
+if (ID==-1)
+{
+//commented on July 23, 2010
+			//printf("ID==-1! in ID2Center_all\n");
+//exit(0);
+			fprintf(stderr,"Incorrect file format or input parameters (resulting in incorrect population IDs)\n"); //modified on July 23, 2010
+			abort();
+}
+for (j=0;j<num_dm;j++)
+population_center[ID][j]=population_center[ID][j]+data_in[i][j];
+size_c[ID]++;
+}
+for (i=0;i<num_clust;i++)
+{
+for (j=0;j<num_dm;j++)
+if (size_c[i]!=0)
+population_center[i][j]=(population_center[i][j]/(double)(size_c[i]));
+else
+population_center[i][j]=0;
+}
+free(size_c);
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Merge neighboring grids to clusters
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+int merge_grids(double **normalized_data, double *interval, int file_Len, int num_dm, int num_bin, int **position, int num_dense_grids,
+int *dense_grid_reverse, int **dense_grid_seq, int *eventID_To_denseventID, int *densegridID_To_gridevent, int *all_grid_ID,
+int *cluster_ID, int *grid_ID, int *grid_clusterID)
+{
+int i=0;
+int j=0;
+int t=0;
+int p=0;
+int num_clust=0;
+int ReventID=0;
+int denseID=0;
+int neighbor_ID=0;
+//int temp_grid=0;
+int *grid_value;
+int *search_value;
+int **graph_of_grid; //the graph constructed for dense grids: each dense grid is a graph node
+double real_dist=0;
+double **norm_grid_center;
+norm_grid_center=(double **)malloc(sizeof(double*)*num_dense_grids);
+memset(norm_grid_center,0,sizeof(double*)*num_dense_grids);
+for (i=0;i<num_dense_grids;i++)
+{
+norm_grid_center[i]=(double *)malloc(sizeof(double)*num_dm);
+memset(norm_grid_center[i],0,sizeof(double)*num_dm);
+}
+for (i=0;i<file_Len;i++)
+{
+denseID=eventID_To_denseventID[i];
+if (denseID!=-1) //only dense events can enter
+{
+grid_ID[denseID]=dense_grid_reverse[all_grid_ID[i]];
+if (grid_ID[denseID]==-1)
+{
+fprintf(stderr,"Incorrect input file format or input parameters (no dense region found)!\n"); //modified on July 23, 2010
+abort();
+}
+}
+}
+/* Find centroid (in the normalized data) for each dense grid */
+ID2Center(normalized_data,file_Len,num_dm,eventID_To_denseventID,num_dense_grids,grid_ID,norm_grid_center);
+//printf("pass the grid ID2 center\n"); //commmented on July 23, 2010
+graph_of_grid=(int **)malloc(sizeof(int*)*num_dense_grids);
+memset(graph_of_grid,0,sizeof(int*)*num_dense_grids);
+for (i=0;i<num_dense_grids;i++)
+{
+graph_of_grid[i]=(int *)malloc(sizeof(int)*num_dm);
+memset(graph_of_grid[i],0,sizeof(int)*num_dm);
+for (j=0;j<num_dm;j++)
+graph_of_grid[i][j]=-1;
+}
+grid_value=(int *)malloc(sizeof(int)*num_dm);
+memset(grid_value,0,sizeof(int)*num_dm);
+search_value=(int *)malloc(sizeof(int)*num_dm);
+memset(search_value,0,sizeof(int)*num_dm);
+for (i=0;i<num_dense_grids;i++)
+{
+ReventID=densegridID_To_gridevent[i];
+for (j=0;j<num_dm;j++)
+{
+grid_value[j]=position[ReventID][j];
+}
+/* For each dimension, find the neighbor, if any, that is equal in all other dimensions and 1 greater in
+the chosen dimension.  If the neighbor's centroid is not too far away, add it to this grid's neighbor
+list. */
+for (t=0;t<num_dm;t++)
+{
+for (p=0;p<num_dm;p++)
+search_value[p]=grid_value[p];
+if (grid_value[t]==num_bin-1)
+continue;
+search_value[t]=grid_value[t]+1; //we only consider the neighbor at the bigger side
+neighbor_ID=binary_search(num_dense_grids, num_dm, search_value, dense_grid_seq);
+if (neighbor_ID!=-1)
+{
+real_dist=norm_grid_center[i][t]-norm_grid_center[neighbor_ID][t];
+if (real_dist<0)
+real_dist=-real_dist;
+if (real_dist<2*interval[t])
+graph_of_grid[i][t]=neighbor_ID;
+}
+}
+grid_clusterID[i]=i; //initialize grid_clusterID
+}
+//graph constructed
+//DFS-based search begins
+/* Use a depth-first search to construct a list of connected subgraphs (= "clusters").
+Note our graph as we have constructed it is a DAG, so we can use that to our advantage
+in our search. */
+//  num_clust=dfs(graph_of_grid,num_dense_grids,num_dm,grid_clusterID);
+num_clust=find_connected(graph_of_grid, num_dense_grids, num_dm, grid_clusterID);
+//computes grid_ID and cluster_ID
+for (i=0;i<file_Len;i++)
+{
+denseID=eventID_To_denseventID[i];
+if (denseID!=-1) //only dense events can enter
+	  {
+cluster_ID[denseID]=grid_clusterID[grid_ID[denseID]];
+		//if (cluster_ID[denseID]==-1)
+		//	printf("catch you!\n");
+	  }
+}
+free(search_value);
+free(grid_value);
+for (i=0;i<num_dense_grids;i++)
+{
+free(graph_of_grid[i]);
+free(norm_grid_center[i]);
+}
+free(graph_of_grid);
+free(norm_grid_center);
+return num_clust;
+}
+/********************************************* Merge Clusters to Populations *******************************************/
+// This is the function future work can be on because it is about how to cluster the about 500 points accurately
+int merge_clusters(int num_clust, int num_dm, double *interval, double **cluster_center, int *cluster_populationID, int max_num_pop)
+{
+int num_population=0;
+int temp_num_population=0;
+int i=0;
+int j=0;
+int t=0;
+int merge=0;
+int smid=0;
+int bgid=0;
+double merge_dist=1.1; //initial value of merge_dist*interval should be slightly larger than the bin width
+int *map_ID;
+double diff=0;
+map_ID=(int*)malloc(sizeof(int)*num_clust);
+memset(map_ID,0,sizeof(int)*num_clust);
+while ((num_population>max_num_pop) || (num_population<=1))
+{
+	  if (num_population<=1)
+	  	  merge_dist=merge_dist-0.1;
+	  if (num_population>max_num_pop)
+merge_dist=merge_dist+0.1;
+	 for (i=0;i<num_clust;i++)
+		cluster_populationID[i]=i;
+for (i=0;i<num_clust-1;i++)
+{
+for (j=i+1;j<num_clust;j++)
+{
+merge=1;
+for (t=0;t<num_dm;t++)
+{
+diff=cluster_center[i][t]-cluster_center[j][t];
+if (diff<0)
+diff=-diff;
+if (diff>(merge_dist*interval[t]))
+merge=0;
+}
+if ((merge) && (cluster_populationID[i]!=cluster_populationID[j]))
+{
+if (cluster_populationID[i]<cluster_populationID[j])  //they could not be equal
+{
+smid = cluster_populationID[i];
+bgid = cluster_populationID[j];
+}
+else
+{
+smid = cluster_populationID[j];
+bgid = cluster_populationID[i];
+}
+for (t=0;t<num_clust;t++)
+{
+if (cluster_populationID[t]==bgid)
+cluster_populationID[t]=smid;
+}
+}
+}
+}
+for (i=0;i<num_clust;i++)
+map_ID[i]=-1;
+for (i=0;i<num_clust;i++)
+map_ID[cluster_populationID[i]]=1;
+num_population=0;
+for (i=0;i<num_clust;i++)
+{
+if (map_ID[i]==1)
+{
+map_ID[i]=num_population;
+num_population++;
+}
+}
+if ((temp_num_population>max_num_pop) && (num_population==1))
+	  break;
+else
+	  temp_num_population=num_population;
+if (num_clust<=1)
+	break;
+}
+for (i=0;i<num_clust;i++)
+cluster_populationID[i]=map_ID[cluster_populationID[i]];
+free(map_ID);
+return num_population;
+}
+///////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+double kmeans(double **Matrix, int k, double kmean_term, int file_Len, int num_dm, int *shortest_id, double **center)
+{
+	int i=0;
+	int j=0;
+	int t=0;
+	int random=0;
+	int random1=0;
+	int random2=0;
+	int times=0;
+	int dist_used=0; //0 is Euclidean and 1 is Pearson
+	int random_init=0; //0: not use random seeds;
+	int real_Len=0;
+	int skipped=0;
+	int *num;  //num[i]=t means the ith cluster has t points
+	double vvv=1.0; // the biggest variation;
+	double distance=0.0;
+	double xv=0.0;
+	double variation=0.0;
+	double mean_dx=0;
+	double mean_dy=0;
+	double sum_var=0;
+	double dx=0;
+	double dy=0;
+	double sd_x=0;
+	double sd_y=0;
+	double diff=0;
+	double distortion=0;
+	double shortest_distance;
+	double *temp_center;
+	double **sum;
+	temp_center = (double *)malloc(sizeof(double)*num_dm);
+	memset(temp_center,0,sizeof(double)*num_dm);
+	if (random_init)
+	{
+		for (i=0;i<k;i++)
+		{
+			random1=rand()*rand();
+			random2=abs((random1%5)+1);
+			for (t=0;t<random2;t++)
+				random2=random2*rand()+rand();
+			random=abs(random2%file_Len);
+			for (j=0;j<num_dm;j++)
+				center[i][j]=Matrix[random][j];
+		}
+	}
+	num = (int *)malloc(sizeof(int)*k);
+	memset(num,0,sizeof(int)*k);
+	sum = (double **)malloc(sizeof(double*)*k);
+	memset(sum,0,sizeof(double*)*k);
+	for (i=0;i<k;i++)
+	{
+		sum[i] = (double *)malloc(sizeof(double)*num_dm);
+		memset(sum[i],0,sizeof(double)*num_dm);
+	}
+	times=0;
+	real_Len=0;
+	while (((vvv>kmean_term) && (kmean_term<1)) || ((times<kmean_term) && (kmean_term>=1)))
+	{
+		for (i=0;i<k;i++)
+		{
+			num[i]=0;
+			for (j=0;j<num_dm;j++)
+				sum[i][j]=0.0;
+		}
+		for (i=0;i<file_Len;i++)  //for each data point i, we compute the distance between Matrix[i] and center[j]
+		{
+			skipped = 0;
+			shortest_distance=MAX_VALUE;
+			for (j=0;j<k;j++)  //for each center j
+			{
+				distance=0.0;
+				if (dist_used==0)  //Euclidean distance
+				{
+					for (t=0;t<num_dm;t++) //for each dimension here num_dm is always 1 as we consider individual dimensions
+					{
+						diff=center[j][t]-Matrix[i][t];
+						diff=diff*diff;
+						distance = distance+diff; //here we have a weight for each dimension
+					}
+				}
+				else  //pearson correlation
+				{
+					mean_dx=0.0;
+					mean_dy=0.0;
+					sum_var=0.0;
+					dx=0.0;
+					dy=0.0;
+					sd_x=0.0;
+					sd_y=0.0;
+					for (t=0;t<num_dm;t++)
+					{
+						mean_dx+=center[j][t];
+						mean_dy+=Matrix[i][t];
+					}
+					mean_dx=mean_dx/(double)num_dm;
+					mean_dy=mean_dy/(double)num_dm;
+					for (t=0;t<num_dm;t++)
+					{
+						dx=center[j][t]-mean_dx;
+						dy=Matrix[i][t]-mean_dy;
+						sum_var+=dx*dy;
+						sd_x+=dx*dx;
+						sd_y+=dy*dy;
+					}
+					if (sqrt(sd_x*sd_y)==0)
+						distance = 1.0;
+					else
+						distance = 1.0 - (sum_var/(sqrt(sd_x*sd_y))); // distance ranges from 0 to 2;
+					//printf("distance=%f\n",distance);
+				}	//pearson correlation ends
+				if ((distance<shortest_distance) && (skipped == 0))
+				{
+					shortest_distance=distance;
+					shortest_id[i]=j;
+				}
+			}//end for j
+				real_Len++;
+				num[shortest_id[i]]=num[shortest_id[i]]+1;
+				for (t=0;t<num_dm;t++)
+					sum[shortest_id[i]][t]=sum[shortest_id[i]][t]+Matrix[i][t];
+		}//end for i
+	/* recompute the centers */
+	//compute_mean(group);
+		vvv=0.0;
+		for (j=0;j<k;j++)
+		{
+			memcpy(temp_center,center[j],sizeof(double)*num_dm);
+			variation=0.0;
+			if (num[j]!=0)
+			{
+				for (t=0;t<num_dm;t++)
+				{
+					center[j][t]=sum[j][t]/(double)num[j];
+					xv=(temp_center[t]-center[j][t]);
+					variation=variation+xv*xv;
+				}
+			}
+			if (variation>vvv)
+				vvv=variation;  //vvv is the biggest variation among the k clusters;
+		}
+	//compute_variation;
+		times++;
+	} //end for while
+	free(num);
+	for (i=0;i<k;i++)
+		free(sum[i]);
+	free(sum);
+	free(temp_center);
+	return distortion;
+}
+//////////////////////////////////////////////////////
+/*************************** Show *****************************/
+void show(double **Matrix, int *cluster_id, int file_Len, int k, int num_dm, char *name_string)
+{
+	int situ1=0;
+	int situ2=0;
+	int i=0;
+	int id=0;
+	int j=0;
+	int t=0;
+	int *size_c;
+	int **size_mybound_1;
+	int **size_mybound_2;
+	int **size_mybound_3;
+	int **size_mybound_0;
+	double interval=0.0;
+	double *big;
+	double *small;
+	double **center;
+	double **mybound;
+	int **prof; //prof[i][j]=1 means population i is + at parameter j
+	FILE *fpcnt_id; //proportion id
+	//FILE *fcent_id; //center_id, i.e., centers of clusters within the original data
+	FILE *fprof_id; //profile_id
+	big=(double *)malloc(sizeof(double)*num_dm);
+	memset(big,0,sizeof(double)*num_dm);
+	small=(double *)malloc(sizeof(double)*num_dm);
+	memset(small,0,sizeof(double)*num_dm);
+	for (i=0;i<num_dm;i++)
+	{
+		big[i]=0.0;
+		small[i]=(double)MAX_VALUE;
+	}
+	size_c=(int *)malloc(sizeof(int)*k);
+	memset(size_c,0,sizeof(int)*k);
+	center=(double**)malloc(sizeof(double*)*k);
+	memset(center,0,sizeof(double*)*k);
+	for (i=0;i<k;i++)
+	{
+		center[i]=(double*)malloc(sizeof(double)*num_dm);
+		memset(center[i],0,sizeof(double)*num_dm);
+	}
+	mybound=(double**)malloc(sizeof(double*)*num_dm);
+	memset(mybound,0,sizeof(double*)*num_dm);
+	for (i=0;i<num_dm;i++) //there are 3 mybounds for 4 categories
+	{
+		mybound[i]=(double*)malloc(sizeof(double)*3);
+		memset(mybound[i],0,sizeof(double)*3);
+	}
+	prof=(int **)malloc(sizeof(int*)*k);
+	memset(prof,0,sizeof(int*)*k);
+	for (i=0;i<k;i++)
+	{
+		prof[i]=(int *)malloc(sizeof(int)*num_dm);
+		memset(prof[i],0,sizeof(int)*num_dm);
+	}
+	for (i=0;i<file_Len;i++)
+	{
+		id=cluster_id[i];
+		for (j=0;j<num_dm;j++)
+		{
+			center[id][j]=center[id][j]+Matrix[i][j];
+			if (big[j]<Matrix[i][j])
+				big[j]=Matrix[i][j];
+			if (small[j]>Matrix[i][j])
+				small[j]=Matrix[i][j];
+		}
+		size_c[id]++;
+	}
+	for (i=0;i<k;i++)
+		for (j=0;j<num_dm;j++)
+		{
+			if (size_c[i]!=0)
+				center[i][j]=(center[i][j]/(double)(size_c[i]));
+			else
+				center[i][j]=0;
+		}
+	for (j=0;j<num_dm;j++)
+	{
+		interval=((big[j]-small[j])/4.0);
+		//printf("interval[%d] is %f\n",j,interval);
+		for (i=0;i<3;i++)
+			mybound[j][i]=small[j]+((double)(i+1)*interval);
+	}
+	size_mybound_0=(int **)malloc(sizeof(int*)*k);
+	memset(size_mybound_0,0,sizeof(int*)*k);
+	for (i=0;i<k;i++)
+	{
+		size_mybound_0[i]=(int*)malloc(sizeof(int)*num_dm);
+		memset(size_mybound_0[i],0,sizeof(int)*num_dm);
+	}
+	size_mybound_1=(int **)malloc(sizeof(int*)*k);
+	memset(size_mybound_1,0,sizeof(int*)*k);
+	for (i=0;i<k;i++)
+	{
+		size_mybound_1[i]=(int*)malloc(sizeof(int)*num_dm);
+		memset(size_mybound_1[i],0,sizeof(int)*num_dm);
+	}
+	size_mybound_2=(int **)malloc(sizeof(int*)*k);
+	memset(size_mybound_2,0,sizeof(int*)*k);
+	for (i=0;i<k;i++)
+	{
+		size_mybound_2[i]=(int*)malloc(sizeof(int)*num_dm);
+		memset(size_mybound_2[i],0,sizeof(int)*num_dm);
+	}
+	size_mybound_3=(int **)malloc(sizeof(int*)*k);
+	memset(size_mybound_3,0,sizeof(int*)*k);
+	for (i=0;i<k;i++)
+	{
+		size_mybound_3[i]=(int*)malloc(sizeof(int)*num_dm);
+		memset(size_mybound_3[i],0,sizeof(int)*num_dm);
+	}
+	for (i=0;i<file_Len;i++)
+		for (j=0;j<num_dm;j++)
+			{
+				if (Matrix[i][j]<mybound[j][0])// && ((Matrix[i][j]-small[j])>0)) //the smallest values excluded
+					size_mybound_0[cluster_id[i]][j]++;
+				else
+				{
+					if (Matrix[i][j]<mybound[j][1])
+						size_mybound_1[cluster_id[i]][j]++;
+					else
+					{
+						if (Matrix[i][j]<mybound[j][2])
+							size_mybound_2[cluster_id[i]][j]++;
+						else
+							//if (Matrix[i][j]!=big[j]) //the biggest values excluded
+								size_mybound_3[cluster_id[i]][j]++;
+					}
+				}
+			}
+	fprof_id=fopen("profile.txt","w");
+	fprintf(fprof_id,"Population_ID\t");
+	fprintf(fprof_id,"%s\n",name_string);
+	for (i=0;i<k;i++)
+	{
+		fprintf(fprof_id,"%d\t",i+1); //i changed to i+1 to start from 1 instead of 0: April 16, 2009
+		for (j=0;j<num_dm;j++)
+		{
+			if (size_mybound_0[i][j]>size_mybound_1[i][j])
+				situ1=0;
+			else
+				situ1=1;
+			if (size_mybound_2[i][j]>size_mybound_3[i][j])
+				situ2=2;
+			else
+				situ2=3;
+			if ((situ1==0) && (situ2==2))
+			{
+				if (size_mybound_0[i][j]>size_mybound_2[i][j])
+					prof[i][j]=0;
+				else
+					prof[i][j]=2;
+			}
+			if ((situ1==0) && (situ2==3))
+			{
+				if (size_mybound_0[i][j]>size_mybound_3[i][j])
+					prof[i][j]=0;
+				else
+					prof[i][j]=3;
+			}
+			if ((situ1==1) && (situ2==2))
+			{
+				if (size_mybound_1[i][j]>size_mybound_2[i][j])
+					prof[i][j]=1;
+				else
+					prof[i][j]=2;
+			}
+			if ((situ1==1) && (situ2==3))
+			{
+				if (size_mybound_1[i][j]>size_mybound_3[i][j])
+					prof[i][j]=1;
+				else
+					prof[i][j]=3;
+			}
+			//begin to output profile
+			if (j==num_dm-1)
+			{
+				if (prof[i][j]==0)
+					fprintf(fprof_id,"1\n");
+				if (prof[i][j]==1)
+					fprintf(fprof_id,"2\n");
+				if (prof[i][j]==2)
+					fprintf(fprof_id,"3\n");
+				if (prof[i][j]==3)
+					fprintf(fprof_id,"4\n");
+			}
+			else
+			{
+				if (prof[i][j]==0)
+					fprintf(fprof_id,"1\t");
+				if (prof[i][j]==1)
+					fprintf(fprof_id,"2\t");
+				if (prof[i][j]==2)
+					fprintf(fprof_id,"3\t");
+				if (prof[i][j]==3)
+					fprintf(fprof_id,"4\t");
+			}
+		}
+	}
+	fclose(fprof_id);
+	///////////////////////////////////////////////////////////
+	fpcnt_id=fopen("percentage.txt","w");
+	fprintf(fpcnt_id,"Population_ID\tPercentage\n");
+	for (t=0;t<k;t++)
+	{
+		fprintf(fpcnt_id,"%d\t%.2f\n",t+1,(double)size_c[t]*100.0/(double)file_Len);	//t changed to t+1 to start from 1 instead of 0: April 16, 2009
+	}
+	fclose(fpcnt_id);
+	free(big);
+	free(small);
+	free(size_c);
+	for (i=0;i<k;i++)
+	{
+		free(center[i]);
+		free(prof[i]);
+		free(size_mybound_0[i]);
+		free(size_mybound_1[i]);
+		free(size_mybound_2[i]);
+		free(size_mybound_3[i]);
+	}
+	free(center);
+	free(prof);
+	free(size_mybound_0);
+	free(size_mybound_1);
+	free(size_mybound_2);
+	free(size_mybound_3);
+	for (i=0;i<num_dm;i++)
+		free(mybound[i]);
+	free(mybound);
+}
+/******************************************************** Main Function **************************************************/
+//for normalized data, there are five variables:
+//cluster_ID
+//population_center
+//grid_clusterID
+//grid_ID
+//grid_Center
+//the same five variables exist for the original data
+//however, the three IDs (cluster_ID, grid_ID, grid_clusterID) don't change in either normalized or original data
+//also, data + cluster_ID -> population_center
+//data + grid_ID -> grid_Center
+/* what is the final output */
+//the final things we want are grid_Center in the original data and grid_clusterID //or population_center in the original data
+//Sparse grids will not be considered when computing the two centroids (centroids of grids and centroids of clusters)
+/*  what information should select_bin output */
+//the size of all IDs are unknown to function main because we only consider the events in dense grids, and also the number of dense grids
+//is unknown, therefore I must use a prescreening to output
+//how many bins I should use
+//the number of dense grids
+//total number of events in the dense grids
+/* basic procedure of main function */
+//1. read raw file and normalize the raw file
+//2. select_bin
+//3. allocate memory for eventID_To_denseventID, grid_clusterID, grid_ID, cluster_ID.
+//4. call select_dense and merge_grids with grid_clusterID, grid_ID, cluster_ID.
+//5. release normalized data; allocate memory for grid_Center and population_center
+//6. output grid_Center and population_center using ID2Center together with grid_clusterID //from IDs to centers
+int main (int argc, char **argv)
+{
+//inputs
+FILE *f_src; //source file pointer
+FILE *f_out; //coordinates
+FILE *f_cid; //population-ID of events
+FILE *f_ctr; //centroids of populations
+FILE *f_results; //coordinates file event and population column
+FILE *f_mfi; //added April 16, 2009 for mean fluorescence intensity
+FILE *f_parameters; //number of bins and density calculated by
+//the algorithm. Used to update the database
+FILE *f_properties; //Properties file used by Image generation software
+char para_name_string[LINE_LEN];
+int time_ID=-1;
+int num_bin=0; //the bins I will use on each dimension
+int file_Len=0; //number of events
+int num_dm=0;
+int num_clust=0;
+int num_dense_events=0;
+int num_dense_grids=0;
+int num_nonempty=0;
+int num_population=0;
+//int temp=0;
+//below are read from configuration file
+int i=0;
+int j=0;
+int max_num_pop=0;
+int den_t_event=0;
+int *grid_clusterID; //(dense)gridID to clusterID
+int *grid_ID; //(dense)eventID to gridID
+int *cluster_ID; //(dense)eventID to clusterID
+int *eventID_To_denseventID; //eventID_To_denseventID[i]=k means event i is in a dense grid and its ID within dense events is k
+int *all_grid_ID; //gridID for all events
+int *densegridID_To_gridevent;
+int *sorted_seq;
+int *dense_grid_reverse;
+int *population_ID; //denseeventID to populationID
+int *cluster_populationID; //clusterID to populationID
+int *grid_populationID; //gridID to populationID
+int *all_population_ID; //populationID of event
+int **position;
+int **dense_grid_seq;
+double *interval;
+double **population_center; //population centroids in the raw/original data
+double **cluster_center; //cluster centroids in the raw/original data
+double **input_data;
+double **normalized_data;
+int min = 999999;
+int max = 0;
+printf( "Starting time:\t\t\t\t");
+fflush(stdout);
+system("/bin/date");
+/////////////////////////////////////////////////////////////
+if ((argc!=2) && (argc!=4) && (argc!=5))
+{
+//modified on Jul 23, 2010
+	  //printf("usage:\n");
+	  //printf("basic mode: flock data_file\n");
+	  //printf("advanced mode: flock data_file num_bin density_index\n");
+//exit(0);
+	  fprintf(stderr,"Incorrect number of input parameters!\n"); //modified on July 23, 2010
+	  //fprintf(stderr,"basic mode: flock data_file\n"); //modified on July 23, 2010
+	  //fprintf(stderr,"advanced mode1: flock data_file num_bin density_index\n"); //modified on July 23, 2010
+	  fprintf(stderr,"advanced mode: flock data_file num_bin density_index max_num_pop\n"); //added on Dec 16, 2010 for GenePattern
+	  abort();
+}
+f_src=fopen(argv[1],"r");
+if (argc==2)
+{
+	 max_num_pop=30; //default value, maximum 30 clusters
+}
+if (argc==4)
+{
+	 num_bin=atoi(argv[2]);
+	 printf("num_bin is %d\n",num_bin);
+	 den_t_event=atoi(argv[3]);
+	 printf("density_index is %d\n",den_t_event);
+	 max_num_pop=30;
+	 if (((num_bin<6) && (num_bin!=0)) || (num_bin>29))
+	 {
+		fprintf(stderr,"Incorrect input range of number of bins, which should be larger than 5 and smaller than 30\n");
+		abort();
+	  }
+	  if (((den_t_event<3) && (den_t_event!=0)) || (den_t_event>99))
+	  {
+		fprintf(stderr,"Incorrect input range of density threshold, which should be larger than 2 and smaller than 100\n");
+		abort();
+	  }
+}
+if (argc==5)
+{
+	  num_bin=atoi(argv[2]);
+	  printf("num_bin is %d\n",num_bin);
+	  den_t_event=atoi(argv[3]);
+	  printf("density_index is %d\n",den_t_event);
+	  max_num_pop=atoi(argv[4]);
+	  printf("max number of clusters is %d\n",max_num_pop);
+	  if (((num_bin<6) && (num_bin!=0)) || (num_bin>29))
+	  {
+		fprintf(stderr,"Incorrect input range of number of bins, which should be larger than 5 and smaller than 30\n");
+		abort();
+	  }
+	  if (((den_t_event<3) && (den_t_event!=0)) || (den_t_event>99))
+	  {
+		fprintf(stderr,"Incorrect input range of density threshold, which should be larger than 2 and smaller than 100\n");
+		abort();
+	  }
+	  if ((max_num_pop<5) || (max_num_pop>999))
+	  {
+		fprintf(stderr,"Incorrect input range of maximum number of populations, which should be larger than 4 and smaller than 1000\n");
+		abort();
+	  }
+}
+getfileinfo(f_src, &file_Len, &num_dm, para_name_string, &time_ID); //get the filelength, number of dimensions, and num/name of parameters
+/************************************************* Read the data *****************************************************/
+rewind(f_src); //reset the file pointer
+input_data = (double **)malloc(sizeof(double*)*file_Len);
+memset(input_data,0,sizeof(double*)*file_Len);
+for (i=0;i<file_Len;i++)
+{
+input_data[i]=(double *)malloc(sizeof(double)*num_dm);
+memset(input_data[i],0,sizeof(double)*num_dm);
+}
+readsource(f_src, file_Len, num_dm, input_data, time_ID); //read the data;
+fclose(f_src);
+normalized_data=(double **)malloc(sizeof(double*)*file_Len);
+memset(normalized_data,0,sizeof(double*)*file_Len);
+for (i=0;i<file_Len;i++)
+{
+normalized_data[i]=(double *)malloc(sizeof(double)*num_dm);
+memset(normalized_data[i],0,sizeof(double)*num_dm);
+}
+tran(input_data, file_Len, num_dm, NORM_METHOD, normalized_data);
+position=(int **)malloc(sizeof(int*)*file_Len);
+memset(position,0,sizeof(int*)*file_Len);
+for (i=0;i<file_Len;i++)
+{
+position[i]=(int*)malloc(sizeof(int)*num_dm);
+memset(position[i],0,sizeof(int)*num_dm);
+}
+all_grid_ID=(int *)malloc(sizeof(int)*file_Len);
+memset(all_grid_ID,0,sizeof(int)*file_Len);
+sorted_seq=(int*)malloc(sizeof(int)*file_Len);
+memset(sorted_seq,0,sizeof(int)*file_Len);
+interval=(double*)malloc(sizeof(double)*num_dm);
+memset(interval,0,sizeof(double)*num_dm);
+/************************************************* select_bin *************************************************/
+if (num_bin>=1)  //num_bin has been selected by user
+	select_bin(normalized_data, file_Len, num_dm, MIN_GRID, MAX_GRID, position, sorted_seq, all_grid_ID, &num_nonempty, interval,num_bin);
+else  //num_bin has not been selected by user
+{
+	num_bin=select_bin(normalized_data, file_Len, num_dm, MIN_GRID, MAX_GRID, position, sorted_seq, all_grid_ID, &num_nonempty, interval,num_bin);
+	printf("selected bin number is %d\n",num_bin);
+}
+printf("number of non-empty grids is %d\n",num_nonempty);
+/* Although we return sorted_seq from select_bin(), we don't use it for anything, except possibly diagnostics */
+free(sorted_seq);
+dense_grid_reverse=(int*)malloc(sizeof(int)*num_nonempty);
+memset(dense_grid_reverse,0,sizeof(int)*num_nonempty);
+/************************************************* select_dense **********************************************/
+if (den_t_event>=1) //den_t_event must be larger or equal to 2 if the user wants to set it
+	select_dense(file_Len, all_grid_ID, num_nonempty, &num_dense_grids, &num_dense_events, dense_grid_reverse, den_t_event);
+else
+{
+	den_t_event=select_dense(file_Len, all_grid_ID, num_nonempty, &num_dense_grids, &num_dense_events, dense_grid_reverse, den_t_event);
+	printf("automated selected density threshold is %d\n",den_t_event);
+}
+printf("Number of dense grids is %d\n",num_dense_grids);
+densegridID_To_gridevent = (int *)malloc(sizeof(int)*num_dense_grids);
+memset(densegridID_To_gridevent,0,sizeof(int)*num_dense_grids);
+for (i=0;i<num_dense_grids;i++)
+densegridID_To_gridevent[i]=-1; //initialize all densegridID_To_gridevent values to -1
+eventID_To_denseventID=(int *)malloc(sizeof(int)*file_Len);
+memset(eventID_To_denseventID,0,sizeof(int)*file_Len);     //eventID_To_denseventID[i]=k means event i is in a dense grid and its ID within dense events is k
+grid_To_event(file_Len, dense_grid_reverse, all_grid_ID, eventID_To_denseventID, densegridID_To_gridevent);
+dense_grid_seq=(int **)malloc(sizeof(int*)*num_dense_grids);
+memset(dense_grid_seq,0,sizeof(int*)*num_dense_grids);
+for (i=0;i<num_dense_grids;i++)
+{
+dense_grid_seq[i]=(int *)malloc(sizeof(int)*num_dm);
+memset(dense_grid_seq[i],0,sizeof(int)*num_dm);
+}
+/* Look up the binned data values for each dense grid */
+generate_grid_seq(num_dm, num_dense_grids, densegridID_To_gridevent, position, dense_grid_seq);
+/************************************************* allocate memory *********************************************/
+grid_clusterID=(int *)malloc(sizeof(int)*num_dense_grids);
+memset(grid_clusterID,0,sizeof(int)*num_dense_grids);
+grid_ID=(int *)malloc(sizeof(int)*num_dense_events);
+memset(grid_ID,0,sizeof(int)*num_dense_events);
+cluster_ID=(int *)malloc(sizeof(int)*num_dense_events);
+memset(cluster_ID,0,sizeof(int)*num_dense_events);
+/*********************************************** merge_grids ***********************************************/
+//int merge_grids(int file_Len, int num_dm, int num_bin, int **position, int num_dense_grids, int *dense_grid_rank, int *dense_grid_reverse,
+//			 int **dense_grid_seq, int *eventID_To_denseventID, int *densegridID_To_gridevent, int *all_grid_ID,
+//			 int *cluster_ID, int *grid_ID, int *grid_clusterID)
+num_clust = merge_grids(normalized_data, interval, file_Len, num_dm, num_bin, position, num_dense_grids, dense_grid_reverse, dense_grid_seq, eventID_To_denseventID, densegridID_To_gridevent, all_grid_ID, cluster_ID, grid_ID, grid_clusterID);
+printf("computed number of groups is %d\n",num_clust);
+/************************************** release unnecessary memory and allocate memory and compute centers **********************************/
+for (i=0;i<file_Len;i++)
+free(position[i]);
+free(position);
+for (i=0;i<num_dense_grids;i++)
+free(dense_grid_seq[i]);
+free(dense_grid_seq);
+free(dense_grid_reverse);
+free(densegridID_To_gridevent);
+free(all_grid_ID);
+// cluster_center ////////////////////////////////////////////////////////////////////////////////////////////////////////
+cluster_center=(double **)malloc(sizeof(double*)*num_clust);
+memset(cluster_center,0,sizeof(double*)*num_clust);
+for (i=0;i<num_clust;i++)
+{
+cluster_center[i]=(double*)malloc(sizeof(double)*num_dm);
+memset(cluster_center[i],0,sizeof(double)*num_dm);
+}
+ID2Center(normalized_data,file_Len,num_dm,eventID_To_denseventID,num_clust,cluster_ID,cluster_center); //produce the centers with normalized data
+//printf("pass the first ID2center\n");  //commented on July 23, 2010
+/*** population_ID and grid_populationID **/
+cluster_populationID=(int*)malloc(sizeof(int)*num_clust);
+memset(cluster_populationID,0,sizeof(int)*num_clust);
+grid_populationID=(int*)malloc(sizeof(int)*num_dense_grids);
+memset(grid_populationID,0,sizeof(int)*num_dense_grids);
+population_ID=(int*)malloc(sizeof(int)*num_dense_events);
+memset(population_ID,0,sizeof(int)*num_dense_events);
+num_population = merge_clusters(num_clust, num_dm, interval, cluster_center, cluster_populationID,max_num_pop);
+for (i=0;i<num_clust;i++)
+free(cluster_center[i]);
+free(cluster_center);
+free(interval);
+for (i=0;i<num_dense_grids;i++)
+{
+grid_populationID[i]=cluster_populationID[grid_clusterID[i]];
+}
+for (i=0;i<num_dense_events;i++)
+{
+population_ID[i]=cluster_populationID[cluster_ID[i]];
+}
+printf("computed number of populations is %d\n",num_population);
+// population_center /////////////////////////////////////////////////////////////////////////////////////////////////////
+population_center=(double **)malloc(sizeof(double*)*num_population);
+memset(population_center,0,sizeof(double*)*num_population);
+for (i=0;i<num_population;i++)
+{
+population_center[i]=(double*)malloc(sizeof(double)*num_dm);
+memset(population_center[i],0,sizeof(double)*num_dm);
+}
+ID2Center(normalized_data,file_Len,num_dm,eventID_To_denseventID,num_population,population_ID,population_center); //produce population centers with normalized data
+// show ////////////////////////////////////////////////////////////////////////////////
+all_population_ID=(int*)malloc(sizeof(int)*file_Len);
+memset(all_population_ID,0,sizeof(int)*file_Len);
+kmeans(normalized_data, num_population, KMEANS_TERM, file_Len, num_dm, all_population_ID, population_center);
+show(input_data, all_population_ID, file_Len, num_population, num_dm, para_name_string);
+ID2Center_all(input_data,file_Len,num_dm,num_population,all_population_ID,population_center);
+f_cid=fopen("population_id.txt","w");
+f_ctr=fopen("population_center.txt","w");
+f_out=fopen("coordinates.txt","w");
+f_results=fopen("flock_results.txt","w");
+/*
+f_parameters=fopen("parameters.txt","w");
+fprintf(f_parameters,"Number_of_Bins\t%d\n",num_bin);
+fprintf(f_parameters,"Density\t%f\n",aver_index);
+fclose(f_parameters);
+*/
+for (i=0;i<file_Len;i++)
+	fprintf(f_cid,"%d\n",all_population_ID[i]+1); //all_population_ID[i] changed to all_population_ID[i]+1 to start from 1 instead of 0: April 16, 2009
+/*
+* New to check for min/max to add to parameters.txt
+*
+*/
+fprintf(f_out,"%s\n",para_name_string);
+//fprintf(f_results,"%s\tEvent\tPopulation\n",para_name_string);
+fprintf(f_results,"%s\tPopulation\n",para_name_string);
+for (i=0;i<file_Len;i++)
+{
+	for (j=0;j<num_dm;j++)
+	{
+		if (input_data[i][j] < min) {
+			min = (int)input_data[i][j];
+		}
+		if (input_data[i][j] > max) {
+			max = (int)input_data[i][j];
+		}
+		if (j==num_dm-1)
+		{
+			fprintf(f_out,"%d\n",(int)input_data[i][j]);
+			fprintf(f_results,"%d\t",(int)input_data[i][j]);
+		}
+		else
+		{
+			fprintf(f_out,"%d\t",(int)input_data[i][j]);
+			fprintf(f_results,"%d\t",(int)input_data[i][j]);
+		}
+	}
+	//fprintf(f_results,"%d\t",i + 1);
+	fprintf(f_results,"%d\n",all_population_ID[i]+1); //all_population_ID[i] changed to all_population_ID[i]+1 to start from 1 instead of 0: April 16, 2009
+}
+/*
+f_parameters=fopen("parameters.txt","w");
+fprintf(f_parameters,"Number_of_Bins\t%d\n",num_bin);
+fprintf(f_parameters,"Density\t%d\n",den_t_event);
+fprintf(f_parameters,"Min\t%d\n",min);
+fprintf(f_parameters,"Max\t%d\n",max);
+fclose(f_parameters);
+*/
+f_properties=fopen("fcs.properties","w");
+fprintf(f_properties,"Bins=%d\n",num_bin);
+fprintf(f_properties,"Density=%d\n",den_t_event);
+fprintf(f_properties,"Min=%d\n",min);
+fprintf(f_properties,"Max=%d\n",max);
+fprintf(f_properties,"Populations=%d\n",num_population);
+fprintf(f_properties,"Events=%d\n",file_Len);
+fprintf(f_properties,"Markers=%d\n",num_dm);
+fclose(f_properties);
+for (i=0;i<num_population;i++) {
+	/* Add if we want to include population id in the output
+	*/
+	fprintf(f_ctr,"%d\t",i+1);  //i changed to i+1 to start from 1 instead of 0: April 16, 2009
+	for (j=0;j<num_dm;j++) {
+		if (j==num_dm-1)
+			fprintf(f_ctr,"%.0f\n",population_center[i][j]);
+		else
+			fprintf(f_ctr,"%.0f\t",population_center[i][j]);
+	}
+}
+	//added April 16, 2009
+	f_mfi=fopen("MFI.txt","w");
+	for (i=0;i<num_population;i++)
+	{
+		fprintf(f_mfi,"%d\t",i+1);
+		for (j=0;j<num_dm;j++)
+		{
+			if (j==num_dm-1)
+				fprintf(f_mfi,"%.0f\n",population_center[i][j]);
+			else
+				fprintf(f_mfi,"%.0f\t",population_center[i][j]);
+		}
+	}
+	fclose(f_mfi);
+	//ended April 16, 2009
+fclose(f_cid);
+fclose(f_ctr);
+fclose(f_out);
+fclose(f_results);
+for (i=0;i<num_population;i++)
+{
+	free(population_center[i]);
+}
+free(population_center);
+for (i=0;i<file_Len;i++)
+free(normalized_data[i]);
+free(normalized_data);
+free(grid_populationID);
+free(cluster_populationID);
+free(grid_clusterID);
+free(cluster_ID);
+for (i=0;i<file_Len;i++)
+free(input_data[i]);
+free(input_data);
+free(grid_ID);
+free(population_ID);
+free(all_population_ID);
+free(eventID_To_denseventID);
+///////////////////////////////////////////////////////////
+printf("Ending time:\t\t\t\t");
+fflush(stdout);
+system("/bin/date");
+return 0;
+}

Mercurial > repos > immport-devteam > cross_sample

comparison src/flock1.c @ 1:7eab80f86779 draft