comparison mrcanavar-0.34/callcnv.c @ 0:86522a0b5f59 default tip

Uploaded source code for mrCaNaVaR
author calkan
date Tue, 21 Feb 2012 10:44:13 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:86522a0b5f59
1 #include "callcnv.h"
2
3 void call_cnv(char *depthFile, char *out_prefix){
4
5 int i, j;
6
7 float *gclookup;
8 float *gclookup_x;
9
10 char *fname;
11 char logname[2 * MAX_STR];
12 FILE *log;
13
14 if (GENOME_CONF == NULL)
15 print_error("Select genome configuration file (input) through the -conf parameter.\n");
16 if (depthFile == NULL)
17 print_error("Select read depth output file through the -depth parameter.\n");
18 if (out_prefix == NULL)
19 print_error("Select output file prefix through the -o parameter.\n");
20
21
22
23 loadRefConfig(GENOME_CONF);
24
25 readDepth(depthFile);
26
27
28 sprintf(logname, "%s.log", out_prefix);
29 log = my_fopen(logname, "w", 0);
30
31 /* trivial control removal */
32
33
34 fprintf(stdout, "Control region cleanup...");
35 fflush(stdout);
36
37 /* add stdev calculation here */
38
39
40 for (i=0; i<num_chrom; i++){
41 for (j=0; j<chromosomes[i]->lw_cnt; j++){
42 if (chromosomes[i]->lw[j].depth > (float) LW_MEAN * 2.0 || chromosomes[i]->lw[j].depth < (float) LW_MEAN / 10.0)
43 chromosomes[i]->lw[j].isControl = 0;
44 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
45 chromosomes[i]->lw[j].isControl = 0;
46 }
47 for (j=0; j<chromosomes[i]->sw_cnt; j++){
48 if (chromosomes[i]->sw[j].depth > (float) SW_MEAN * 2.0 || chromosomes[i]->sw[j].depth < (float) SW_MEAN / 10.0)
49 chromosomes[i]->sw[j].isControl = 0;
50 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
51 chromosomes[i]->sw[j].isControl = 0;
52 }
53
54 for (j=0; j<chromosomes[i]->cw_cnt; j++){
55 if (chromosomes[i]->cw[j].depth > (float) CW_MEAN * 2.0 || chromosomes[i]->cw[j].depth < (float) CW_MEAN / 10.0)
56 chromosomes[i]->cw[j].isControl = 0;
57 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
58 chromosomes[i]->cw[j].isControl = 0;
59 }
60 }
61
62
63
64 fprintf(stdout, "\n");
65
66 gclookup = (float *) malloc(sizeof(float) * GC_BIN);
67 gclookup_x = (float *) malloc(sizeof(float) * GC_BIN);
68
69 fprintf (log, "\nmrCaNaVaR version %s\nLast update: %s\n\n", VERSION, LAST_UPDATE);
70 fprintf (log, "Calculating library %s\n", out_prefix);
71 fprintf (log, "GC correction mode: %s\n", MULTGC == 1 ? "MULTIPLICATIVE" : "ADDITIVE");
72
73 norm_until_converges(CW, gclookup, gclookup_x);
74
75 fprintf (log, "\nAfter GC Correction:\n--------------------\n");
76 fprintf (log, "Sample Gender: %s.\n", GENDER == MALE ? "Male" : "Female");
77 fprintf (log, "CW Average Read Depth: %f, Standard Deviation: %f\n", CW_MEAN, CW_STD);
78
79
80 if (GENDER == MALE)
81 fprintf (log, "CW Average chrX Read Depth: %f, Standard Deviation: %f\n", CW_MEAN_X, CW_STD_X);
82
83 norm_until_converges(LW, gclookup, gclookup_x);
84 fprintf (log, "LW Average Read Depth: %f, Standard Deviation: %f\n", LW_MEAN, LW_STD);
85 if (GENDER == MALE)
86 fprintf (log, "LW Average chrX Read Depth: %f, Standard Deviation: %f\n", LW_MEAN_X, LW_STD_X);
87
88 norm_until_converges(SW, gclookup, gclookup_x);
89 fprintf (log, "SW Average Read Depth: %f, Standard Deviation: %f\n", SW_MEAN, SW_STD);
90 if (GENDER == MALE)
91 fprintf (log, "SW Average chrX Read Depth: %f, Standard Deviation: %f\n", SW_MEAN_X, SW_STD_X);
92
93
94 fprintf (stdout, "Writing normalized CW depth to: %s.cw_norm.bed.\n", out_prefix);
95
96 fname = (char *) malloc(sizeof (char) * (strlen(out_prefix) + strlen(".cw_norm.bed") + 1));
97
98 sprintf (fname, "%s.cw_norm.bed", out_prefix);
99 dump_text_windows(fname, CW);
100
101 fprintf (stdout, "Writing normalized LW depth to: %s.lw_norm.bed.\n", out_prefix);
102
103 sprintf (fname, "%s.lw_norm.bed", out_prefix);
104 dump_text_windows(fname, LW);
105
106 fprintf (stdout, "Writing normalized SW depth to: %s.sw_norm.bed.\n", out_prefix);
107
108 sprintf (fname, "%s.sw_norm.bed", out_prefix);
109 dump_text_windows(fname, SW);
110
111 sprintf (fname, "%s.copynumber.bed", out_prefix);
112 fprintf (stdout, "Writing copy numbers to: %s.copynumber.bed. \n", out_prefix);
113 print_copy_numbers(fname);
114
115 free(fname);
116
117 free(gclookup);
118 free(gclookup_x);
119 fclose(log);
120
121 }
122
123
124
125 void readDepth(char *depthFile){
126
127 FILE *binDepth;
128 int i, j;
129 int retVal;
130
131 float lw_total;
132 float sw_total;
133 float cw_total;
134
135 int lw_cnt;
136 int sw_cnt;
137 int cw_cnt;
138
139 int isMagicNum;
140
141
142 binDepth = my_fopen(depthFile, "r", 0);
143
144 retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, binDepth);
145
146 if (isMagicNum != magicNum)
147 print_error("Read depth file seems to be invalid or corrupt.\n");
148
149
150 lw_total = 0.0;
151 sw_total = 0.0;
152 cw_total = 0.0;
153
154 lw_cnt = 0;
155 sw_cnt = 0;
156 cw_cnt = 0;
157
158 /* read LW */
159
160 for (i = 0; i < num_chrom; i++){
161 for (j = 0; j < chromosomes[i]->lw_cnt; j++){
162 retVal = fread(&(chromosomes[i]->lw[j].depth), sizeof(chromosomes[i]->lw[j].depth), 1, binDepth);
163 lw_total += chromosomes[i]->lw[j].depth;
164 chromosomes[i]->lw[j].isControl = 1;
165 lw_cnt++;
166 }
167 }
168
169 /* read SW */
170
171 for (i = 0; i < num_chrom; i++){
172 for (j = 0; j < chromosomes[i]->sw_cnt; j++){
173 retVal = fread(&(chromosomes[i]->sw[j].depth), sizeof(chromosomes[i]->sw[j].depth), 1, binDepth);
174 sw_total += chromosomes[i]->sw[j].depth;
175 chromosomes[i]->sw[j].isControl = 1;
176 sw_cnt++;
177 }
178 }
179
180 /* read CW */
181
182 for (i = 0; i < num_chrom; i++){
183 for (j = 0; j < chromosomes[i]->cw_cnt; j++){
184 retVal = fread(&(chromosomes[i]->cw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth);
185 cw_total += chromosomes[i]->cw[j].depth;
186 chromosomes[i]->cw[j].isControl = 1;
187 cw_cnt++;
188 }
189 }
190
191 LW_MEAN = lw_total / lw_cnt;
192 SW_MEAN = sw_total / sw_cnt;
193 CW_MEAN = cw_total / cw_cnt;
194
195 fprintf(stdout, "[OK] depth file %s is loaded.\n", depthFile);
196 if (VERBOSE)
197 fprintf(stdout, "LW_MEAN: %f\tSW_MEAN: %f\tCW_MEAN:%f\n", LW_MEAN, SW_MEAN, CW_MEAN);
198
199 fclose(binDepth);
200 }
201
202
203 void dump_text_windows(char *fname, enum WINDOWTYPE wt){
204
205 FILE *txtDepth;
206 int i, j;
207
208 txtDepth = my_fopen(fname, "w", 0);
209
210 fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "READ_DEPTH", "IS_CONTROL");
211
212 switch (wt){
213 case LW:
214 for (i = 0; i < num_chrom; i++){
215 for (j = 0; j < chromosomes[i]->lw_cnt; j++)
216 if (chromosomes[i]->lw[j].isControl == 1)
217 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth);
218 else
219 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth);
220 }
221 break;
222
223 case SW:
224 for (i = 0; i < num_chrom; i++){
225 for (j = 0; j < chromosomes[i]->sw_cnt; j++)
226 if (chromosomes[i]->sw[j].isControl == 1)
227 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth);
228 else
229 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth);
230 }
231 break;
232
233 case CW:
234 for (i = 0; i < num_chrom; i++){
235 for (j = 0; j < chromosomes[i]->cw_cnt; j++)
236 if (chromosomes[i]->cw[j].isControl == 1)
237 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth);
238 else
239 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth);
240 }
241 break;
242 }
243
244 fclose(txtDepth);
245
246 }
247
248
249
250 void print_copy_numbers(char *fname){
251 /* UNFINISHED */
252
253 FILE *txtDepth;
254 int i, j;
255 float copy_num;
256
257 txtDepth = my_fopen(fname, "w", 0);
258
259 fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "COPYNUMBER");
260
261 for (i = 0; i < num_chrom; i++){
262 for (j = 0; j < chromosomes[i]->cw_cnt; j++){
263 if (GENDER == FEMALE)
264 copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2;
265 else{
266 if (!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY"))
267 copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2;
268 else
269 copy_num = chromosomes[i]->cw[j].depth / CW_MEAN_X;
270 }
271
272 if (GENDER == FEMALE && (strstr(chromosomes[i]->name, "chrY")))
273 continue;
274
275 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, copy_num);
276 }
277 }
278
279
280 fclose(txtDepth);
281
282
283 }
284
285
286