diff mrcanavar-0.34/callcnv.c @ 0:86522a0b5f59 default tip

Uploaded source code for mrCaNaVaR
author calkan
date Tue, 21 Feb 2012 10:44:13 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrcanavar-0.34/callcnv.c	Tue Feb 21 10:44:13 2012 -0500
@@ -0,0 +1,286 @@
+#include "callcnv.h"
+
+void call_cnv(char *depthFile, char *out_prefix){
+
+  int i, j;
+
+  float *gclookup;
+  float *gclookup_x;
+
+  char *fname;
+  char logname[2 * MAX_STR];
+  FILE *log;
+
+  if (GENOME_CONF == NULL)
+    print_error("Select genome configuration file (input) through the -conf parameter.\n");
+  if (depthFile == NULL)
+    print_error("Select read depth output file through the -depth parameter.\n");
+  if (out_prefix == NULL)
+    print_error("Select output file prefix through the -o parameter.\n");
+  
+
+
+  loadRefConfig(GENOME_CONF);
+
+  readDepth(depthFile);
+
+
+  sprintf(logname, "%s.log", out_prefix);
+  log = my_fopen(logname, "w", 0);
+
+  /* trivial control removal */
+
+  
+  fprintf(stdout, "Control region cleanup...");
+  fflush(stdout);
+
+  /* add stdev calculation here */
+
+
+  for (i=0; i<num_chrom; i++){
+    for (j=0; j<chromosomes[i]->lw_cnt; j++){
+      if (chromosomes[i]->lw[j].depth > (float) LW_MEAN * 2.0 || chromosomes[i]->lw[j].depth < (float) LW_MEAN / 10.0)
+	chromosomes[i]->lw[j].isControl = 0;
+      if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
+	chromosomes[i]->lw[j].isControl = 0;
+    }
+    for (j=0; j<chromosomes[i]->sw_cnt; j++){
+      if (chromosomes[i]->sw[j].depth > (float) SW_MEAN * 2.0 || chromosomes[i]->sw[j].depth < (float) SW_MEAN / 10.0)
+	chromosomes[i]->sw[j].isControl = 0;
+      if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
+	chromosomes[i]->sw[j].isControl = 0;
+    }
+
+    for (j=0; j<chromosomes[i]->cw_cnt; j++){
+      if (chromosomes[i]->cw[j].depth > (float) CW_MEAN * 2.0 || chromosomes[i]->cw[j].depth < (float) CW_MEAN / 10.0)
+	chromosomes[i]->cw[j].isControl = 0;
+      if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
+	chromosomes[i]->cw[j].isControl = 0;
+    }
+  }
+
+
+
+  fprintf(stdout, "\n");
+
+  gclookup   = (float *) malloc(sizeof(float) * GC_BIN);
+  gclookup_x = (float *) malloc(sizeof(float) * GC_BIN);
+
+  fprintf (log, "\nmrCaNaVaR version %s\nLast update: %s\n\n", VERSION, LAST_UPDATE);
+  fprintf (log, "Calculating library %s\n", out_prefix);
+  fprintf (log, "GC correction mode: %s\n", MULTGC == 1 ? "MULTIPLICATIVE" : "ADDITIVE");
+
+  norm_until_converges(CW, gclookup, gclookup_x);
+
+  fprintf (log, "\nAfter GC Correction:\n--------------------\n");
+  fprintf (log, "Sample Gender: %s.\n", GENDER == MALE ? "Male" : "Female");
+  fprintf (log, "CW Average Read Depth: %f, Standard Deviation: %f\n", CW_MEAN, CW_STD);
+
+
+  if (GENDER == MALE)
+    fprintf (log, "CW Average chrX Read Depth: %f, Standard Deviation: %f\n", CW_MEAN_X, CW_STD_X);
+
+  norm_until_converges(LW, gclookup, gclookup_x);
+  fprintf (log, "LW Average Read Depth: %f, Standard Deviation: %f\n", LW_MEAN, LW_STD);
+  if (GENDER == MALE)
+    fprintf (log, "LW Average chrX Read Depth: %f, Standard Deviation: %f\n", LW_MEAN_X, LW_STD_X);
+
+  norm_until_converges(SW, gclookup, gclookup_x);
+  fprintf (log, "SW Average Read Depth: %f, Standard Deviation: %f\n", SW_MEAN, SW_STD);
+  if (GENDER == MALE)
+    fprintf (log, "SW Average chrX Read Depth: %f, Standard Deviation: %f\n", SW_MEAN_X, SW_STD_X);
+
+
+  fprintf (stdout, "Writing normalized CW depth to: %s.cw_norm.bed.\n", out_prefix);
+
+  fname = (char *) malloc(sizeof (char) * (strlen(out_prefix) + strlen(".cw_norm.bed") + 1));
+
+  sprintf (fname, "%s.cw_norm.bed", out_prefix);
+  dump_text_windows(fname, CW);
+
+  fprintf (stdout, "Writing normalized LW depth to: %s.lw_norm.bed.\n", out_prefix);
+
+  sprintf (fname, "%s.lw_norm.bed", out_prefix);
+  dump_text_windows(fname, LW);
+
+  fprintf (stdout, "Writing normalized SW depth to: %s.sw_norm.bed.\n", out_prefix);
+
+  sprintf (fname, "%s.sw_norm.bed", out_prefix);
+  dump_text_windows(fname, SW);
+
+  sprintf (fname, "%s.copynumber.bed", out_prefix);
+  fprintf (stdout, "Writing copy numbers to: %s.copynumber.bed. \n", out_prefix);
+  print_copy_numbers(fname);
+
+  free(fname);
+
+  free(gclookup);
+  free(gclookup_x);
+  fclose(log);
+
+}
+
+
+
+void readDepth(char *depthFile){
+
+  FILE *binDepth;
+  int i, j;
+  int retVal;
+  
+  float lw_total;
+  float sw_total;
+  float cw_total;
+
+  int lw_cnt;
+  int sw_cnt;
+  int cw_cnt;
+  
+  int isMagicNum;
+
+
+  binDepth = my_fopen(depthFile, "r", 0);
+
+  retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, binDepth);
+  
+  if (isMagicNum != magicNum)
+    print_error("Read depth file seems to be invalid or corrupt.\n");
+    
+
+  lw_total = 0.0;
+  sw_total = 0.0;
+  cw_total = 0.0;
+
+  lw_cnt   = 0;
+  sw_cnt   = 0;
+  cw_cnt   = 0;
+
+  /* read LW */
+
+  for (i = 0; i < num_chrom; i++){
+    for (j = 0; j < chromosomes[i]->lw_cnt; j++){
+      retVal = fread(&(chromosomes[i]->lw[j].depth), sizeof(chromosomes[i]->lw[j].depth), 1, binDepth);
+      lw_total += chromosomes[i]->lw[j].depth;
+      chromosomes[i]->lw[j].isControl = 1;
+      lw_cnt++;
+    }
+  }
+
+  /* read SW */
+
+  for (i = 0; i < num_chrom; i++){
+    for (j = 0; j < chromosomes[i]->sw_cnt; j++){
+      retVal = fread(&(chromosomes[i]->sw[j].depth), sizeof(chromosomes[i]->sw[j].depth), 1, binDepth);
+      sw_total += chromosomes[i]->sw[j].depth;
+      chromosomes[i]->sw[j].isControl = 1;
+      sw_cnt++;
+    }
+  }
+
+  /* read CW */
+
+  for (i = 0; i < num_chrom; i++){
+    for (j = 0; j < chromosomes[i]->cw_cnt; j++){
+      retVal = fread(&(chromosomes[i]->cw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth);
+      cw_total += chromosomes[i]->cw[j].depth;
+      chromosomes[i]->cw[j].isControl = 1;
+      cw_cnt++;
+    }
+  }
+
+  LW_MEAN = lw_total / lw_cnt;
+  SW_MEAN = sw_total / sw_cnt;
+  CW_MEAN = cw_total / cw_cnt;
+
+  fprintf(stdout, "[OK] depth file %s is loaded.\n", depthFile);
+  if (VERBOSE)
+    fprintf(stdout, "LW_MEAN: %f\tSW_MEAN: %f\tCW_MEAN:%f\n",  LW_MEAN, SW_MEAN, CW_MEAN);
+  
+  fclose(binDepth);
+}
+
+
+void dump_text_windows(char *fname, enum WINDOWTYPE wt){
+
+  FILE *txtDepth;
+  int i, j;
+
+  txtDepth = my_fopen(fname, "w", 0);
+
+  fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "READ_DEPTH", "IS_CONTROL");
+
+  switch (wt){
+  case LW:
+    for (i = 0; i < num_chrom; i++){
+      for (j = 0; j < chromosomes[i]->lw_cnt; j++)
+        if (chromosomes[i]->lw[j].isControl == 1)
+          fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth);
+	else
+          fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth);
+    }
+    break;
+
+  case SW:
+    for (i = 0; i < num_chrom; i++){
+      for (j = 0; j < chromosomes[i]->sw_cnt; j++)
+        if (chromosomes[i]->sw[j].isControl == 1)
+          fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth);
+	else
+          fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth);
+    }
+    break;
+
+  case CW:
+    for (i = 0; i < num_chrom; i++){
+      for (j = 0; j < chromosomes[i]->cw_cnt; j++)
+        if (chromosomes[i]->cw[j].isControl == 1)
+          fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth);
+	else
+          fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth);
+    }
+    break;
+  }
+
+  fclose(txtDepth);
+
+}
+
+
+
+void  print_copy_numbers(char *fname){
+  /* UNFINISHED */
+
+  FILE *txtDepth;
+  int i, j;
+  float copy_num;
+
+  txtDepth = my_fopen(fname, "w", 0);
+
+  fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "COPYNUMBER");
+  
+  for (i = 0; i < num_chrom; i++){
+    for (j = 0; j < chromosomes[i]->cw_cnt; j++){
+      if (GENDER == FEMALE)
+	copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2;
+      else{
+	if (!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY"))
+	  copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2;
+	else
+	  copy_num = chromosomes[i]->cw[j].depth / CW_MEAN_X;
+      }
+
+      if (GENDER == FEMALE && (strstr(chromosomes[i]->name, "chrY")))
+	continue;
+
+      fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, copy_num);
+    }
+  }
+
+
+  fclose(txtDepth);
+
+
+}
+
+
+