view mrcanavar-0.34/gcnorm.c @ 0:86522a0b5f59 default tip

Uploaded source code for mrCaNaVaR
author calkan
date Tue, 21 Feb 2012 10:44:13 -0500
parents
children
line wrap: on
line source

#include "gcnorm.h"

int norm_until_converges (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x){
  float max;
  float max_x;
  float min;
  float min_x;

  float p_max;
  float p_max_x;
  float p_min;
  float p_min_x;

  int i, j;
  float mean;
  float mean_x;
  float stdev;
  float stdev_x;
  int iter;

  float maxcut;
  float maxcut_x;
  float mincut;
  float mincut_x;

  iter = 1;
  calc_stat(wt, gclookup, gclookup_x, 0, &max, &max_x, &min, &min_x);

  switch(wt){
  case LW:
    mean    = LW_MEAN;
    mean_x  = LW_MEAN_X;
    break;
  case SW:
    mean    = SW_MEAN;
    mean_x  = SW_MEAN_X;
    break;
  case CW:
    mean    = CW_MEAN;
    mean_x  = CW_MEAN_X;
    break;
  }

  
  if (GENDER == AUTODETECT){
    if (VERBOSE)
      fprintf(stdout, "MEAN: %f\tMEAN_X: %f\n", mean, mean_x);
    

    if (mean_x / mean < 0.75){ // magical ratio
      GENDER = MALE;

      if (VERBOSE)
	fprintf(stdout, "Autodetect Gender: Male\n");

      i = findChrom("chrX", "", -1);

      
      switch(wt){

      case CW:
      for (j=0; j<chromosomes[i]->cw_cnt; j++)
	if (chromosomes[i]->cw[j].depth > (float) CW_MEAN_X * 2 || chromosomes[i]->cw[j].depth < (float) CW_MEAN_X / 10.0)
	  chromosomes[i]->cw[j].isControl = 0;	
      break;

      case LW:
      for (j=0; j<chromosomes[i]->lw_cnt; j++)
	if (chromosomes[i]->lw[j].depth > (float) LW_MEAN_X * 2 || chromosomes[i]->lw[j].depth < (float) LW_MEAN_X / 10.0)
	  chromosomes[i]->lw[j].isControl = 0;	
      break;

      case SW:
      for (j=0; j<chromosomes[i]->sw_cnt; j++)
	if (chromosomes[i]->sw[j].depth > (float) SW_MEAN_X * 2 || chromosomes[i]->sw[j].depth < (float) SW_MEAN_X / 10.0)
	  chromosomes[i]->sw[j].isControl = 0;	
      break;

      }
      
    }
    else{
      GENDER = FEMALE;
      if (VERBOSE)
	fprintf(stdout, "Autodetect Gender: Female\n");
    }
  }

  normalize(wt, gclookup, gclookup_x, &max, &max_x, &min, &min_x);


  do{

    if (VERBOSE){
      fprintf(stdout, "Control regions %s iteration %d ", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")), iter);
      fflush(stdout);
    }



    p_min   = min;
    p_max   = max;
    p_min_x = min_x;
    p_max_x = max_x;
    
    calc_stat(wt, gclookup, gclookup_x, 1, &max, &max_x, &min, &min_x);


    switch(wt){
    case LW:
      mean    = LW_MEAN;
      stdev   = LW_STD;
      mean_x  = LW_MEAN_X;
      stdev_x = LW_STD_X;
      break;
    case SW:
      mean    = SW_MEAN;
      stdev   = SW_STD;
      mean_x  = SW_MEAN_X;
      stdev_x = SW_STD_X;
      break;
    case CW:
      mean    = CW_MEAN;
      stdev   = CW_STD;
      mean_x  = CW_MEAN_X;
      stdev_x = CW_STD_X;
      break;
    }


    if (GENDER == FEMALE){
      max_x   = max;
      mean_x  = mean;
      stdev_x = stdev;
      min_x   = min;
    }

    
    iter++;

	      
    maxcut   = mean + 2.5 * stdev;
    mincut   = mean - 2.5 * stdev;
    maxcut_x = mean_x + 2.5 * stdev_x;
    mincut_x = mean_x - 2.5 * stdev_x;

    if (GENDER != FEMALE){
      maxcut_x = mean_x + 2 * stdev_x;
      mincut_x = mean_x - 2 * stdev_x;
    }

    if (mincut < 0.0) mincut = mean / 10.0;
    if (mincut_x < 0.0) mincut_x = mean_x / 10.0;

    if (maxcut - mean > mean - mincut)
      maxcut = mean + (mean - mincut);

    if (GENDER != FEMALE){
      if (maxcut_x - mean_x > mean_x - mincut_x)
	maxcut_x = mean_x + (mean_x - mincut_x);
    }


    if (VERBOSE){
      fprintf(stdout, "mean: %f\tstdev: %f\tmax: %f (cut: %f)\tmin: %f (cut: %f) \n", mean, stdev, max, maxcut, min, mincut);      
      if (GENDER != FEMALE)
	fprintf(stdout, "mean_x: %f\tstdev_x: %f\tmax_x: %f (cut: %f)\tmin_x: %f (cut: %f)\n", mean_x, stdev_x, max_x, maxcut_x, min_x, mincut_x);  
    }
    
    if (p_min == min && p_max == max && p_min_x == min_x && p_max_x == max_x)
      break;

  } while (max >= maxcut || max_x >= maxcut_x || min <= mincut || min_x <= mincut_x);
  

  fprintf(stdout, "%s Normalization completed.\n",  (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")));

  return 1;
}


void normalize (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, float *max, float *max_x, float *min, float *min_x){
  int gc_index;
  float new_depth;
  int i, j;

  float total, total_x;
  int win_cnt, win_cnt_x;

  total   = 0.0;
  win_cnt    = 0;
  total_x = 0.0;
  win_cnt_x  = 0;
  

  switch(wt){
  case LW:
    for (i=0; i<num_chrom; i++){
      for (j=0; j<chromosomes[i]->lw_cnt; j++){
	gc_index  = chromosomes[i]->lw[j].gc * (float) GC_BIN;
	if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
	  if (MULTGC)
	    new_depth = gclookup[i] * chromosomes[i]->lw[j].depth;
	  else
	    new_depth = chromosomes[i]->lw[j].depth - (gclookup[gc_index] - LW_MEAN);
	}
	else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
	  if (MULTGC)
	    new_depth = gclookup_x[i] * chromosomes[i]->lw[j].depth;
	  else
	    new_depth = chromosomes[i]->lw[j].depth - (gclookup_x[gc_index] - LW_MEAN_X);	  
	}
	
	if (new_depth < 0) new_depth = 0;

	chromosomes[i]->lw[j].depth = new_depth;
      }
    }
    break;
  
  case SW:
    for (i=0; i<num_chrom; i++){
      for (j=0; j<chromosomes[i]->sw_cnt; j++){
	gc_index  = chromosomes[i]->sw[j].gc * (float) GC_BIN;
	if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
	  if (MULTGC)
	    new_depth = gclookup[i] * chromosomes[i]->sw[j].depth;
	  else
	    new_depth = chromosomes[i]->sw[j].depth - (gclookup[gc_index] - SW_MEAN);
	}
	else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
	  if (MULTGC)
	    new_depth = gclookup_x[i] * chromosomes[i]->sw[j].depth;
	  else
	    new_depth = chromosomes[i]->sw[j].depth - (gclookup_x[gc_index] - SW_MEAN_X);	  
	}
	
	if (new_depth < 0) new_depth = 0;
	chromosomes[i]->sw[j].depth = new_depth;
      }
    }
    break;
  
  case CW:
    for (i=0; i<num_chrom; i++){
      for (j=0; j<chromosomes[i]->cw_cnt; j++){
	gc_index  = chromosomes[i]->cw[j].gc * (float) GC_BIN;
	if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
	  if (MULTGC)
	    new_depth = gclookup[i] * chromosomes[i]->cw[j].depth;
	  else
	    new_depth = chromosomes[i]->cw[j].depth - (gclookup[gc_index] - CW_MEAN);
	}
	else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
	  if (MULTGC)
	    new_depth = gclookup_x[i] * chromosomes[i]->cw[j].depth;
	  else
	    new_depth = chromosomes[i]->cw[j].depth - (gclookup_x[gc_index] - CW_MEAN_X);	  
	}
	
	if (new_depth < 0) new_depth = 0;
	
	chromosomes[i]->cw[j].depth = new_depth;
      }
    }
    break;
  }


  
  calc_stat(wt, gclookup, gclookup_x, 0, max, max_x, min, min_x);
  
}


void calc_stat(enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, char doClean, float *_max, float *_max_x, float *_min, float *_min_x){
  int i, j, k;

  float lw_var;
  float sw_var;
  float cw_var;
  
  float lw_total;
  float sw_total;
  float cw_total;
  
  float lw_var_x;
  float sw_var_x;
  float cw_var_x;
  
  float lw_total_x;
  float sw_total_x;
  float cw_total_x;
  
  int win_cnt;
  float max;

  int win_cnt_x;
  float max_x;

  float min;
  float min_x;

  int gc_index;

  float gc_total[GC_BIN]; // total depth by GC 
  int gc_wincount[GC_BIN]; // count of windows with the given GC content
  
  float gc_total_x[GC_BIN]; // total depth by GC 
  int gc_wincount_x[GC_BIN]; // count of windows with the given GC content
  
  float MEAN;
  float MEAN_X;


  float maxcut, mincut;
  float maxcut_x, mincut_x;

  lw_var = 0.0;
  sw_var = 0.0;
  cw_var = 0.0;

  lw_total = 0.0;
  sw_total = 0.0;
  cw_total = 0.0;

  win_cnt = 0;

  lw_var_x = 0.0;
  sw_var_x = 0.0;
  cw_var_x = 0.0;

  lw_total_x = 0.0;
  sw_total_x = 0.0;
  cw_total_x = 0.0;

  win_cnt_x = 0;

  for (i=0; i<GC_BIN; i++){
    gc_total[i]    = 0.0;
    gclookup[i]    = 0.0;
    gc_wincount[i] = 0;

    gc_total_x[i]    = 0.0;
    gclookup_x[i]    = 0.0;
    gc_wincount_x[i] = 0;
  }

  max = 0.0;
  max_x = 0.0;

  min = FLT_MAX;
  min_x = FLT_MAX;

  /*

  NOTE: 

  As of November 11, 2010, only CW implementation is reliable. 


  */

  switch (wt){
  case LW:

    if (doClean){

      for (i=0; i<num_chrom; i++){
	for (j=0; j<chromosomes[i]->lw_cnt; j++){
	  
	  if (chromosomes[i]->lw[j].isControl == 1){
	    
	    /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	    if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
	      
	      maxcut = LW_MEAN + 2.5 * LW_STD;
	      mincut = LW_MEAN - 2.5 * LW_STD; 
	      if (mincut < LW_MEAN / 10.0) mincut = LW_MEAN / 10.0;
	      
	      if (maxcut - LW_MEAN > LW_MEAN - mincut)
		maxcut = LW_MEAN + (LW_MEAN - mincut);
	      
	      if (chromosomes[i]->lw[j].depth > maxcut || chromosomes[i]->lw[j].depth < mincut){
		
		/* Remove this window and its neighbors from controls */
		chromosomes[i]->lw[j].isControl = 0;
		
		k = j;
		
		while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){
		  chromosomes[i]->lw[--k].isControl = 0;
		}

		k = j;

		while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){
		  chromosomes[i]->lw[++k].isControl = 0;
		}

	      }
	     
	      else{
		lw_total += chromosomes[i]->lw[j].depth;
		win_cnt++;
	      }
	    }  // if AUTOSOME


	    /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	    else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
	      
	      maxcut_x = LW_MEAN_X + 2 * LW_STD_X;
	      mincut_x = LW_MEAN_X - 2 * LW_STD_X; 
	      if (mincut_x < LW_MEAN_X / 10.0) mincut_x = LW_MEAN_X / 10.0;

	      if (chromosomes[i]->lw[j].depth > maxcut_x || chromosomes[i]->lw[j].depth < mincut_x){

		/* Remove this window and its neighbors from controls */
		chromosomes[i]->lw[j].isControl = 0;

		k = j;
		
		while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){
		  chromosomes[i]->lw[--k].isControl = 0;
		}

		k = j;

		while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){
		  chromosomes[i]->lw[++k].isControl = 0;
		}
	      }
	      
	      else{
		lw_total_x += chromosomes[i]->lw[j].depth;
		win_cnt_x++;
	      }
	    }  // if chrX
    	    	    
	  }
	  
	}
      }

      LW_MEAN_X = lw_total_x / win_cnt_x;
      LW_MEAN = lw_total / win_cnt;

    }  // do clean
  
    lw_total   = 0.0;
    win_cnt    = 0;
    lw_total_x = 0.0;
    win_cnt_x  = 0;

    for (i=0; i<num_chrom; i++){
      for (j=0; j<chromosomes[i]->lw_cnt; j++){

	if (chromosomes[i]->lw[j].isControl == 1){

	  /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	  if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
	    
	    lw_var += (LW_MEAN - chromosomes[i]->lw[j].depth) * (LW_MEAN - chromosomes[i]->lw[j].depth);
	    win_cnt++;
	    
	    lw_total += chromosomes[i]->lw[j].depth;
	    gc_index = chromosomes[i]->lw[j].gc * GC_BIN;
	    gc_total[gc_index] += chromosomes[i]->lw[j].depth;
	    gc_wincount[gc_index]++;
	    
	    if (chromosomes[i]->lw[j].depth > max) 
	      max = chromosomes[i]->lw[j].depth;
	    if (chromosomes[i]->lw[j].depth < min) 
	      min = chromosomes[i]->lw[j].depth;

	  } // AUTOSOMES

	  /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	  else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
	    
	    lw_var_x += (LW_MEAN_X - chromosomes[i]->lw[j].depth) * (LW_MEAN_X - chromosomes[i]->lw[j].depth);
	    win_cnt_x++;
	    
	    lw_total_x += chromosomes[i]->lw[j].depth;
	    gc_index = chromosomes[i]->lw[j].gc * GC_BIN;
	    gc_total_x[gc_index] += chromosomes[i]->lw[j].depth;
	    gc_wincount_x[gc_index]++;
	    
	    if (chromosomes[i]->lw[j].depth > max_x) 
	      max_x = chromosomes[i]->lw[j].depth;
	    if (chromosomes[i]->lw[j].depth < min_x) 
	      min_x = chromosomes[i]->lw[j].depth;
	    
	  } // chrX
	  

	} // if control
      } // outer for
    }     //    if (!doClean)

    LW_MEAN_X = lw_total_x / win_cnt_x;
    LW_STD_X = sqrt(lw_var_x / win_cnt_x);

    LW_MEAN = lw_total / win_cnt;
    LW_STD = sqrt(lw_var / win_cnt);

    MEAN = LW_MEAN;
    MEAN_X = LW_MEAN_X;

    break;

  case SW:


    if (doClean){
      for (i=0; i<num_chrom; i++){
	for (j=0; j<chromosomes[i]->sw_cnt; j++){
	  
	  if (chromosomes[i]->sw[j].isControl == 1){
	    
	    /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	    if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
	      
	      maxcut = SW_MEAN + 2.5 * SW_STD;
	      mincut = SW_MEAN - 2.5 * SW_STD; 
	      if (mincut < SW_MEAN / 10.0) mincut = SW_MEAN / 10.0;
	      
	      if (maxcut - SW_MEAN > SW_MEAN - mincut)
		maxcut = SW_MEAN + (SW_MEAN - mincut);
	      
	      if (chromosomes[i]->sw[j].depth > maxcut || chromosomes[i]->sw[j].depth < mincut){
		
		/* Remove this window and its neighbors from controls */
		chromosomes[i]->sw[j].isControl = 0;
		
		k = j;
		
		while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){
		  chromosomes[i]->sw[--k].isControl = 0;
		}

		k = j;

		while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){
		  chromosomes[i]->sw[++k].isControl = 0;
		}

	      }
	     
	      else{
		sw_total += chromosomes[i]->sw[j].depth;
		win_cnt++;
	      }
	    }  // if AUTOSOME


	    /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	    else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
	      
	      maxcut_x = SW_MEAN_X + 2 * SW_STD_X;
	      mincut_x = SW_MEAN_X - 2 * SW_STD_X; 
	      if (mincut_x < SW_MEAN_X / 10.0) mincut_x = SW_MEAN_X / 10.0;

	      if (chromosomes[i]->sw[j].depth > maxcut_x || chromosomes[i]->sw[j].depth < mincut_x){

		/* Remove this window and its neighbors from controls */
		chromosomes[i]->sw[j].isControl = 0;

		k = j;
		
		while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){
		  chromosomes[i]->sw[--k].isControl = 0;
		}

		k = j;

		while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){
		  chromosomes[i]->sw[++k].isControl = 0;
		}
	      }
	      
	      else{
		sw_total_x += chromosomes[i]->sw[j].depth;
		win_cnt_x++;
	      }
	    }  // if chrX
    	    	    
	  }
	  
	}
      }

      SW_MEAN_X = sw_total_x / win_cnt_x;
      SW_MEAN = sw_total / win_cnt;

    }  // do clean
  
    sw_total   = 0.0;
    win_cnt    = 0;
    sw_total_x = 0.0;
    win_cnt_x  = 0;

    for (i=0; i<num_chrom; i++){
      for (j=0; j<chromosomes[i]->sw_cnt; j++){

	if (chromosomes[i]->sw[j].isControl == 1){

	  /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	  if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
	    
	    sw_var += (SW_MEAN - chromosomes[i]->sw[j].depth) * (SW_MEAN - chromosomes[i]->sw[j].depth);
	    win_cnt++;
	    
	    sw_total += chromosomes[i]->sw[j].depth;
	    gc_index = chromosomes[i]->sw[j].gc * GC_BIN;
	    gc_total[gc_index] += chromosomes[i]->sw[j].depth;
	    gc_wincount[gc_index]++;
	    
	    if (chromosomes[i]->sw[j].depth > max) 
	      max = chromosomes[i]->sw[j].depth;
	    if (chromosomes[i]->sw[j].depth < min) 
	      min = chromosomes[i]->sw[j].depth;

	  } // AUTOSOMES

	  /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	  else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
	    
	    sw_var_x += (SW_MEAN_X - chromosomes[i]->sw[j].depth) * (SW_MEAN_X - chromosomes[i]->sw[j].depth);
	    win_cnt_x++;
	    
	    sw_total_x += chromosomes[i]->sw[j].depth;
	    gc_index = chromosomes[i]->sw[j].gc * GC_BIN;
	    gc_total_x[gc_index] += chromosomes[i]->sw[j].depth;
	    gc_wincount_x[gc_index]++;
	    
	    if (chromosomes[i]->sw[j].depth > max_x) 
	      max_x = chromosomes[i]->sw[j].depth;
	    if (chromosomes[i]->sw[j].depth < min_x) 
	      min_x = chromosomes[i]->sw[j].depth;
	    
	  } // chrX
	  

	} // if control
      } // outer for
    }     //    if (!doClean)

    SW_MEAN_X = sw_total_x / win_cnt_x;
    SW_STD_X = sqrt(sw_var_x / win_cnt_x);

    SW_MEAN = sw_total / win_cnt;
    SW_STD = sqrt(sw_var / win_cnt);

    MEAN = SW_MEAN;
    MEAN_X = SW_MEAN_X;


    break;

  case CW:

    if (doClean){
      for (i=0; i<num_chrom; i++){
	for (j=0; j<chromosomes[i]->cw_cnt; j++){
	  
	      
	  if (chromosomes[i]->cw[j].isControl == 1){

	    /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	    if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
	      
	      maxcut = CW_MEAN + 2.5 * CW_STD;
	      mincut = CW_MEAN - 2.5 * CW_STD; 
	      if (mincut < CW_MEAN / 10.0) mincut = CW_MEAN / 10.0;
	      
	      if (maxcut - CW_MEAN > CW_MEAN - mincut)
		maxcut = CW_MEAN + (CW_MEAN - mincut);
	      
	      if (chromosomes[i]->cw[j].depth > maxcut || chromosomes[i]->cw[j].depth < mincut){
		
		/* Remove this window and its neighbors from controls */
		chromosomes[i]->cw[j].isControl = 0;
		
		if (j > 0)
		  chromosomes[i]->cw[j-1].isControl = 0;
		if (j < chromosomes[i]->cw_cnt-1)
		  chromosomes[i]->cw[j+1].isControl = 0;
	      }
	      else{
		cw_total += chromosomes[i]->cw[j].depth;
		win_cnt++;
	      }
	    }  // if AUTOSOME


	    /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	    else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
	      
	      maxcut_x = CW_MEAN_X + 2 * CW_STD_X;
	      mincut_x = CW_MEAN_X - 2 * CW_STD_X; 
	      if (mincut_x < CW_MEAN_X / 10.0) mincut_x = CW_MEAN_X / 10.0;

	      if (chromosomes[i]->cw[j].depth > maxcut_x || chromosomes[i]->cw[j].depth < mincut_x){

		/* Remove this window and its neighbors from controls */
		chromosomes[i]->cw[j].isControl = 0;

		if (j > 0)
		  chromosomes[i]->cw[j-1].isControl = 0;
		if (j < chromosomes[i]->cw_cnt-1)
		  chromosomes[i]->cw[j+1].isControl = 0;
	      }
	      else{
		cw_total_x += chromosomes[i]->cw[j].depth;
		win_cnt_x++;
	      }
	    }  // if chrX
    	    	    
	  }
	  
	}
      }


      CW_MEAN_X = cw_total_x / win_cnt_x;

      CW_MEAN = cw_total / win_cnt;

    }  // do clean
  
    cw_total   = 0.0;
    win_cnt    = 0;
    cw_total_x = 0.0;
    win_cnt_x  = 0;

    for (i=0; i<num_chrom; i++){
      for (j=0; j<chromosomes[i]->cw_cnt; j++){

	if (chromosomes[i]->cw[j].isControl == 1){

	  /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	  if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
	    
	    cw_var += (CW_MEAN - chromosomes[i]->cw[j].depth) * (CW_MEAN - chromosomes[i]->cw[j].depth);
	    win_cnt++;
	    
	    cw_total += chromosomes[i]->cw[j].depth;
	    gc_index = chromosomes[i]->cw[j].gc * GC_BIN;
	    gc_total[gc_index] += chromosomes[i]->cw[j].depth;
	    gc_wincount[gc_index]++;
	    
	    if (chromosomes[i]->cw[j].depth > max) 
	      max = chromosomes[i]->cw[j].depth;
	    if (chromosomes[i]->cw[j].depth < min) 
	      min = chromosomes[i]->cw[j].depth;

	  } // AUTOSOMES

	  /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
	  else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
	    
	    cw_var_x += (CW_MEAN_X - chromosomes[i]->cw[j].depth) * (CW_MEAN_X - chromosomes[i]->cw[j].depth);
	    win_cnt_x++;
	    
	    cw_total_x += chromosomes[i]->cw[j].depth;
	    gc_index = chromosomes[i]->cw[j].gc * GC_BIN;
	    gc_total_x[gc_index] += chromosomes[i]->cw[j].depth;
	    gc_wincount_x[gc_index]++;
	    
	    if (chromosomes[i]->cw[j].depth > max_x) 
	      max_x = chromosomes[i]->cw[j].depth;
	    if (chromosomes[i]->cw[j].depth < min_x) 
	      min_x = chromosomes[i]->cw[j].depth;
	    
	  } // chrX
	  

	} // if control
      } // outer for
    }     //    if (!doClean)


    CW_MEAN_X = cw_total_x / win_cnt_x;
    CW_STD_X = sqrt(cw_var_x / win_cnt_x);

    CW_MEAN = cw_total / win_cnt;
    CW_STD = sqrt(cw_var / win_cnt);

    MEAN = CW_MEAN;
    MEAN_X = CW_MEAN_X;

    break;   
  }


  /* calculate the gclookup table */

  for (i=0; i<GC_BIN; i++){

    /* AUTOSOMES */

    if (gc_wincount[i] == 0 || gc_total[i] == 0){
      j=i-1;
      
      while (j >= 0 && gc_total[j] == 0) 
	j--;

      if (gc_total[j] == 0 || gc_wincount[j] == 0){
	j=i+1;
	while (j < GC_BIN && gc_total[j] == 0) j++;
      }

      gc_total[i]    = gc_total[j];
      gc_wincount[i] = gc_wincount[j];
    }

    if (gc_total[i] != 0.0){
      gclookup[i] = gc_total[i] / (float) gc_wincount[i];    
      
      if (MULTGC){
	
	/*
	if (VERBOSE && i > 200)
	  fprintf(stdout, "GC\%: %f\tlookup: %f\tnow: ", ((float)i / 10.0), gclookup[i]);
	*/

	gclookup[i] = MEAN / gclookup[i];    

	/*
	if (VERBOSE && i > 200)
	  fprintf(stdout, "%f\n", gclookup[i]);
	*/

	if (gclookup[i] > MAX_GC_CORR) 
	  gclookup[i] = MAX_GC_CORR;
	else if (gclookup[i] < MIN_GC_CORR) 
	  gclookup[i] = MIN_GC_CORR;
      }

      /*
      if (!MULTGC && VERBOSE && i > 200)
	fprintf(stdout, "GC\%: %f\tlookup: %f\n", ((float)i / 10.0), gclookup[i]);
      */

    }



    /* chrX */

    if (GENDER != FEMALE){
      
      if (gc_wincount_x[i] == 0 || gc_total_x[i] == 0){
	j=i-1;
	
	while (j >= 0 && gc_total_x[j] == 0) 
	  j--;
	
	if (gc_total_x[j] == 0 || gc_wincount_x[j] == 0){
	  j=i+1;
	  while (j < GC_BIN && gc_total_x[j] == 0) j++;
	}
	
	gc_total_x[i]    = gc_total_x[j];
	gc_wincount_x[i] = gc_wincount_x[j];
      }
      
      if (gc_total_x[i] != 0.0){
	gclookup_x[i] = gc_total_x[i] / (float) gc_wincount_x[i];    
	
	if (MULTGC){
	  gclookup_x[i] = MEAN_X / gclookup_x[i];    
	  if (gclookup_x[i] > MAX_GC_CORR) 
	    gclookup_x[i] = MAX_GC_CORR;
	  else if (gclookup_x[i] < MIN_GC_CORR) 
	    gclookup_x[i] = MIN_GC_CORR;
	}
	
      }

    }


  }


  *_max   = max;
  *_max_x = max_x;
  
  *_min   = min;
  *_min_x = min_x;

  
}