Mercurial > repos > calkan > mrcanavar
diff mrcanavar-0.34/callcnv.c @ 0:86522a0b5f59 default tip
Uploaded source code for mrCaNaVaR
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:44:13 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mrcanavar-0.34/callcnv.c Tue Feb 21 10:44:13 2012 -0500 @@ -0,0 +1,286 @@ +#include "callcnv.h" + +void call_cnv(char *depthFile, char *out_prefix){ + + int i, j; + + float *gclookup; + float *gclookup_x; + + char *fname; + char logname[2 * MAX_STR]; + FILE *log; + + if (GENOME_CONF == NULL) + print_error("Select genome configuration file (input) through the -conf parameter.\n"); + if (depthFile == NULL) + print_error("Select read depth output file through the -depth parameter.\n"); + if (out_prefix == NULL) + print_error("Select output file prefix through the -o parameter.\n"); + + + + loadRefConfig(GENOME_CONF); + + readDepth(depthFile); + + + sprintf(logname, "%s.log", out_prefix); + log = my_fopen(logname, "w", 0); + + /* trivial control removal */ + + + fprintf(stdout, "Control region cleanup..."); + fflush(stdout); + + /* add stdev calculation here */ + + + for (i=0; i<num_chrom; i++){ + for (j=0; j<chromosomes[i]->lw_cnt; j++){ + if (chromosomes[i]->lw[j].depth > (float) LW_MEAN * 2.0 || chromosomes[i]->lw[j].depth < (float) LW_MEAN / 10.0) + chromosomes[i]->lw[j].isControl = 0; + if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn")) + chromosomes[i]->lw[j].isControl = 0; + } + for (j=0; j<chromosomes[i]->sw_cnt; j++){ + if (chromosomes[i]->sw[j].depth > (float) SW_MEAN * 2.0 || chromosomes[i]->sw[j].depth < (float) SW_MEAN / 10.0) + chromosomes[i]->sw[j].isControl = 0; + if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn")) + chromosomes[i]->sw[j].isControl = 0; + } + + for (j=0; j<chromosomes[i]->cw_cnt; j++){ + if (chromosomes[i]->cw[j].depth > (float) CW_MEAN * 2.0 || chromosomes[i]->cw[j].depth < (float) CW_MEAN / 10.0) + chromosomes[i]->cw[j].isControl = 0; + if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn")) + chromosomes[i]->cw[j].isControl = 0; + } + } + + + + fprintf(stdout, "\n"); + + gclookup = (float *) malloc(sizeof(float) * GC_BIN); + gclookup_x = (float *) malloc(sizeof(float) * GC_BIN); + + fprintf (log, "\nmrCaNaVaR version %s\nLast update: %s\n\n", VERSION, LAST_UPDATE); + fprintf (log, "Calculating library %s\n", out_prefix); + fprintf (log, "GC correction mode: %s\n", MULTGC == 1 ? "MULTIPLICATIVE" : "ADDITIVE"); + + norm_until_converges(CW, gclookup, gclookup_x); + + fprintf (log, "\nAfter GC Correction:\n--------------------\n"); + fprintf (log, "Sample Gender: %s.\n", GENDER == MALE ? "Male" : "Female"); + fprintf (log, "CW Average Read Depth: %f, Standard Deviation: %f\n", CW_MEAN, CW_STD); + + + if (GENDER == MALE) + fprintf (log, "CW Average chrX Read Depth: %f, Standard Deviation: %f\n", CW_MEAN_X, CW_STD_X); + + norm_until_converges(LW, gclookup, gclookup_x); + fprintf (log, "LW Average Read Depth: %f, Standard Deviation: %f\n", LW_MEAN, LW_STD); + if (GENDER == MALE) + fprintf (log, "LW Average chrX Read Depth: %f, Standard Deviation: %f\n", LW_MEAN_X, LW_STD_X); + + norm_until_converges(SW, gclookup, gclookup_x); + fprintf (log, "SW Average Read Depth: %f, Standard Deviation: %f\n", SW_MEAN, SW_STD); + if (GENDER == MALE) + fprintf (log, "SW Average chrX Read Depth: %f, Standard Deviation: %f\n", SW_MEAN_X, SW_STD_X); + + + fprintf (stdout, "Writing normalized CW depth to: %s.cw_norm.bed.\n", out_prefix); + + fname = (char *) malloc(sizeof (char) * (strlen(out_prefix) + strlen(".cw_norm.bed") + 1)); + + sprintf (fname, "%s.cw_norm.bed", out_prefix); + dump_text_windows(fname, CW); + + fprintf (stdout, "Writing normalized LW depth to: %s.lw_norm.bed.\n", out_prefix); + + sprintf (fname, "%s.lw_norm.bed", out_prefix); + dump_text_windows(fname, LW); + + fprintf (stdout, "Writing normalized SW depth to: %s.sw_norm.bed.\n", out_prefix); + + sprintf (fname, "%s.sw_norm.bed", out_prefix); + dump_text_windows(fname, SW); + + sprintf (fname, "%s.copynumber.bed", out_prefix); + fprintf (stdout, "Writing copy numbers to: %s.copynumber.bed. \n", out_prefix); + print_copy_numbers(fname); + + free(fname); + + free(gclookup); + free(gclookup_x); + fclose(log); + +} + + + +void readDepth(char *depthFile){ + + FILE *binDepth; + int i, j; + int retVal; + + float lw_total; + float sw_total; + float cw_total; + + int lw_cnt; + int sw_cnt; + int cw_cnt; + + int isMagicNum; + + + binDepth = my_fopen(depthFile, "r", 0); + + retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, binDepth); + + if (isMagicNum != magicNum) + print_error("Read depth file seems to be invalid or corrupt.\n"); + + + lw_total = 0.0; + sw_total = 0.0; + cw_total = 0.0; + + lw_cnt = 0; + sw_cnt = 0; + cw_cnt = 0; + + /* read LW */ + + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->lw_cnt; j++){ + retVal = fread(&(chromosomes[i]->lw[j].depth), sizeof(chromosomes[i]->lw[j].depth), 1, binDepth); + lw_total += chromosomes[i]->lw[j].depth; + chromosomes[i]->lw[j].isControl = 1; + lw_cnt++; + } + } + + /* read SW */ + + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->sw_cnt; j++){ + retVal = fread(&(chromosomes[i]->sw[j].depth), sizeof(chromosomes[i]->sw[j].depth), 1, binDepth); + sw_total += chromosomes[i]->sw[j].depth; + chromosomes[i]->sw[j].isControl = 1; + sw_cnt++; + } + } + + /* read CW */ + + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->cw_cnt; j++){ + retVal = fread(&(chromosomes[i]->cw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth); + cw_total += chromosomes[i]->cw[j].depth; + chromosomes[i]->cw[j].isControl = 1; + cw_cnt++; + } + } + + LW_MEAN = lw_total / lw_cnt; + SW_MEAN = sw_total / sw_cnt; + CW_MEAN = cw_total / cw_cnt; + + fprintf(stdout, "[OK] depth file %s is loaded.\n", depthFile); + if (VERBOSE) + fprintf(stdout, "LW_MEAN: %f\tSW_MEAN: %f\tCW_MEAN:%f\n", LW_MEAN, SW_MEAN, CW_MEAN); + + fclose(binDepth); +} + + +void dump_text_windows(char *fname, enum WINDOWTYPE wt){ + + FILE *txtDepth; + int i, j; + + txtDepth = my_fopen(fname, "w", 0); + + fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "READ_DEPTH", "IS_CONTROL"); + + switch (wt){ + case LW: + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->lw_cnt; j++) + if (chromosomes[i]->lw[j].isControl == 1) + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth); + else + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth); + } + break; + + case SW: + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->sw_cnt; j++) + if (chromosomes[i]->sw[j].isControl == 1) + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth); + else + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth); + } + break; + + case CW: + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->cw_cnt; j++) + if (chromosomes[i]->cw[j].isControl == 1) + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth); + else + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth); + } + break; + } + + fclose(txtDepth); + +} + + + +void print_copy_numbers(char *fname){ + /* UNFINISHED */ + + FILE *txtDepth; + int i, j; + float copy_num; + + txtDepth = my_fopen(fname, "w", 0); + + fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "COPYNUMBER"); + + for (i = 0; i < num_chrom; i++){ + for (j = 0; j < chromosomes[i]->cw_cnt; j++){ + if (GENDER == FEMALE) + copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2; + else{ + if (!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) + copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2; + else + copy_num = chromosomes[i]->cw[j].depth / CW_MEAN_X; + } + + if (GENDER == FEMALE && (strstr(chromosomes[i]->name, "chrY"))) + continue; + + fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, copy_num); + } + } + + + fclose(txtDepth); + + +} + + +