Mercurial > repos > calkan > mrcanavar
comparison mrcanavar-0.34/gcnorm.c @ 0:86522a0b5f59 default tip
Uploaded source code for mrCaNaVaR
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:44:13 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:86522a0b5f59 |
---|---|
1 #include "gcnorm.h" | |
2 | |
3 int norm_until_converges (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x){ | |
4 float max; | |
5 float max_x; | |
6 float min; | |
7 float min_x; | |
8 | |
9 float p_max; | |
10 float p_max_x; | |
11 float p_min; | |
12 float p_min_x; | |
13 | |
14 int i, j; | |
15 float mean; | |
16 float mean_x; | |
17 float stdev; | |
18 float stdev_x; | |
19 int iter; | |
20 | |
21 float maxcut; | |
22 float maxcut_x; | |
23 float mincut; | |
24 float mincut_x; | |
25 | |
26 iter = 1; | |
27 calc_stat(wt, gclookup, gclookup_x, 0, &max, &max_x, &min, &min_x); | |
28 | |
29 switch(wt){ | |
30 case LW: | |
31 mean = LW_MEAN; | |
32 mean_x = LW_MEAN_X; | |
33 break; | |
34 case SW: | |
35 mean = SW_MEAN; | |
36 mean_x = SW_MEAN_X; | |
37 break; | |
38 case CW: | |
39 mean = CW_MEAN; | |
40 mean_x = CW_MEAN_X; | |
41 break; | |
42 } | |
43 | |
44 | |
45 if (GENDER == AUTODETECT){ | |
46 if (VERBOSE) | |
47 fprintf(stdout, "MEAN: %f\tMEAN_X: %f\n", mean, mean_x); | |
48 | |
49 | |
50 if (mean_x / mean < 0.75){ // magical ratio | |
51 GENDER = MALE; | |
52 | |
53 if (VERBOSE) | |
54 fprintf(stdout, "Autodetect Gender: Male\n"); | |
55 | |
56 i = findChrom("chrX", "", -1); | |
57 | |
58 | |
59 switch(wt){ | |
60 | |
61 case CW: | |
62 for (j=0; j<chromosomes[i]->cw_cnt; j++) | |
63 if (chromosomes[i]->cw[j].depth > (float) CW_MEAN_X * 2 || chromosomes[i]->cw[j].depth < (float) CW_MEAN_X / 10.0) | |
64 chromosomes[i]->cw[j].isControl = 0; | |
65 break; | |
66 | |
67 case LW: | |
68 for (j=0; j<chromosomes[i]->lw_cnt; j++) | |
69 if (chromosomes[i]->lw[j].depth > (float) LW_MEAN_X * 2 || chromosomes[i]->lw[j].depth < (float) LW_MEAN_X / 10.0) | |
70 chromosomes[i]->lw[j].isControl = 0; | |
71 break; | |
72 | |
73 case SW: | |
74 for (j=0; j<chromosomes[i]->sw_cnt; j++) | |
75 if (chromosomes[i]->sw[j].depth > (float) SW_MEAN_X * 2 || chromosomes[i]->sw[j].depth < (float) SW_MEAN_X / 10.0) | |
76 chromosomes[i]->sw[j].isControl = 0; | |
77 break; | |
78 | |
79 } | |
80 | |
81 } | |
82 else{ | |
83 GENDER = FEMALE; | |
84 if (VERBOSE) | |
85 fprintf(stdout, "Autodetect Gender: Female\n"); | |
86 } | |
87 } | |
88 | |
89 normalize(wt, gclookup, gclookup_x, &max, &max_x, &min, &min_x); | |
90 | |
91 | |
92 do{ | |
93 | |
94 if (VERBOSE){ | |
95 fprintf(stdout, "Control regions %s iteration %d ", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")), iter); | |
96 fflush(stdout); | |
97 } | |
98 | |
99 | |
100 | |
101 p_min = min; | |
102 p_max = max; | |
103 p_min_x = min_x; | |
104 p_max_x = max_x; | |
105 | |
106 calc_stat(wt, gclookup, gclookup_x, 1, &max, &max_x, &min, &min_x); | |
107 | |
108 | |
109 switch(wt){ | |
110 case LW: | |
111 mean = LW_MEAN; | |
112 stdev = LW_STD; | |
113 mean_x = LW_MEAN_X; | |
114 stdev_x = LW_STD_X; | |
115 break; | |
116 case SW: | |
117 mean = SW_MEAN; | |
118 stdev = SW_STD; | |
119 mean_x = SW_MEAN_X; | |
120 stdev_x = SW_STD_X; | |
121 break; | |
122 case CW: | |
123 mean = CW_MEAN; | |
124 stdev = CW_STD; | |
125 mean_x = CW_MEAN_X; | |
126 stdev_x = CW_STD_X; | |
127 break; | |
128 } | |
129 | |
130 | |
131 if (GENDER == FEMALE){ | |
132 max_x = max; | |
133 mean_x = mean; | |
134 stdev_x = stdev; | |
135 min_x = min; | |
136 } | |
137 | |
138 | |
139 iter++; | |
140 | |
141 | |
142 maxcut = mean + 2.5 * stdev; | |
143 mincut = mean - 2.5 * stdev; | |
144 maxcut_x = mean_x + 2.5 * stdev_x; | |
145 mincut_x = mean_x - 2.5 * stdev_x; | |
146 | |
147 if (GENDER != FEMALE){ | |
148 maxcut_x = mean_x + 2 * stdev_x; | |
149 mincut_x = mean_x - 2 * stdev_x; | |
150 } | |
151 | |
152 if (mincut < 0.0) mincut = mean / 10.0; | |
153 if (mincut_x < 0.0) mincut_x = mean_x / 10.0; | |
154 | |
155 if (maxcut - mean > mean - mincut) | |
156 maxcut = mean + (mean - mincut); | |
157 | |
158 if (GENDER != FEMALE){ | |
159 if (maxcut_x - mean_x > mean_x - mincut_x) | |
160 maxcut_x = mean_x + (mean_x - mincut_x); | |
161 } | |
162 | |
163 | |
164 if (VERBOSE){ | |
165 fprintf(stdout, "mean: %f\tstdev: %f\tmax: %f (cut: %f)\tmin: %f (cut: %f) \n", mean, stdev, max, maxcut, min, mincut); | |
166 if (GENDER != FEMALE) | |
167 fprintf(stdout, "mean_x: %f\tstdev_x: %f\tmax_x: %f (cut: %f)\tmin_x: %f (cut: %f)\n", mean_x, stdev_x, max_x, maxcut_x, min_x, mincut_x); | |
168 } | |
169 | |
170 if (p_min == min && p_max == max && p_min_x == min_x && p_max_x == max_x) | |
171 break; | |
172 | |
173 } while (max >= maxcut || max_x >= maxcut_x || min <= mincut || min_x <= mincut_x); | |
174 | |
175 | |
176 fprintf(stdout, "%s Normalization completed.\n", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW"))); | |
177 | |
178 return 1; | |
179 } | |
180 | |
181 | |
182 void normalize (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, float *max, float *max_x, float *min, float *min_x){ | |
183 int gc_index; | |
184 float new_depth; | |
185 int i, j; | |
186 | |
187 float total, total_x; | |
188 int win_cnt, win_cnt_x; | |
189 | |
190 total = 0.0; | |
191 win_cnt = 0; | |
192 total_x = 0.0; | |
193 win_cnt_x = 0; | |
194 | |
195 | |
196 switch(wt){ | |
197 case LW: | |
198 for (i=0; i<num_chrom; i++){ | |
199 for (j=0; j<chromosomes[i]->lw_cnt; j++){ | |
200 gc_index = chromosomes[i]->lw[j].gc * (float) GC_BIN; | |
201 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){ | |
202 if (MULTGC) | |
203 new_depth = gclookup[i] * chromosomes[i]->lw[j].depth; | |
204 else | |
205 new_depth = chromosomes[i]->lw[j].depth - (gclookup[gc_index] - LW_MEAN); | |
206 } | |
207 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){ | |
208 if (MULTGC) | |
209 new_depth = gclookup_x[i] * chromosomes[i]->lw[j].depth; | |
210 else | |
211 new_depth = chromosomes[i]->lw[j].depth - (gclookup_x[gc_index] - LW_MEAN_X); | |
212 } | |
213 | |
214 if (new_depth < 0) new_depth = 0; | |
215 | |
216 chromosomes[i]->lw[j].depth = new_depth; | |
217 } | |
218 } | |
219 break; | |
220 | |
221 case SW: | |
222 for (i=0; i<num_chrom; i++){ | |
223 for (j=0; j<chromosomes[i]->sw_cnt; j++){ | |
224 gc_index = chromosomes[i]->sw[j].gc * (float) GC_BIN; | |
225 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){ | |
226 if (MULTGC) | |
227 new_depth = gclookup[i] * chromosomes[i]->sw[j].depth; | |
228 else | |
229 new_depth = chromosomes[i]->sw[j].depth - (gclookup[gc_index] - SW_MEAN); | |
230 } | |
231 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){ | |
232 if (MULTGC) | |
233 new_depth = gclookup_x[i] * chromosomes[i]->sw[j].depth; | |
234 else | |
235 new_depth = chromosomes[i]->sw[j].depth - (gclookup_x[gc_index] - SW_MEAN_X); | |
236 } | |
237 | |
238 if (new_depth < 0) new_depth = 0; | |
239 chromosomes[i]->sw[j].depth = new_depth; | |
240 } | |
241 } | |
242 break; | |
243 | |
244 case CW: | |
245 for (i=0; i<num_chrom; i++){ | |
246 for (j=0; j<chromosomes[i]->cw_cnt; j++){ | |
247 gc_index = chromosomes[i]->cw[j].gc * (float) GC_BIN; | |
248 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){ | |
249 if (MULTGC) | |
250 new_depth = gclookup[i] * chromosomes[i]->cw[j].depth; | |
251 else | |
252 new_depth = chromosomes[i]->cw[j].depth - (gclookup[gc_index] - CW_MEAN); | |
253 } | |
254 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){ | |
255 if (MULTGC) | |
256 new_depth = gclookup_x[i] * chromosomes[i]->cw[j].depth; | |
257 else | |
258 new_depth = chromosomes[i]->cw[j].depth - (gclookup_x[gc_index] - CW_MEAN_X); | |
259 } | |
260 | |
261 if (new_depth < 0) new_depth = 0; | |
262 | |
263 chromosomes[i]->cw[j].depth = new_depth; | |
264 } | |
265 } | |
266 break; | |
267 } | |
268 | |
269 | |
270 | |
271 calc_stat(wt, gclookup, gclookup_x, 0, max, max_x, min, min_x); | |
272 | |
273 } | |
274 | |
275 | |
276 void calc_stat(enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, char doClean, float *_max, float *_max_x, float *_min, float *_min_x){ | |
277 int i, j, k; | |
278 | |
279 float lw_var; | |
280 float sw_var; | |
281 float cw_var; | |
282 | |
283 float lw_total; | |
284 float sw_total; | |
285 float cw_total; | |
286 | |
287 float lw_var_x; | |
288 float sw_var_x; | |
289 float cw_var_x; | |
290 | |
291 float lw_total_x; | |
292 float sw_total_x; | |
293 float cw_total_x; | |
294 | |
295 int win_cnt; | |
296 float max; | |
297 | |
298 int win_cnt_x; | |
299 float max_x; | |
300 | |
301 float min; | |
302 float min_x; | |
303 | |
304 int gc_index; | |
305 | |
306 float gc_total[GC_BIN]; // total depth by GC | |
307 int gc_wincount[GC_BIN]; // count of windows with the given GC content | |
308 | |
309 float gc_total_x[GC_BIN]; // total depth by GC | |
310 int gc_wincount_x[GC_BIN]; // count of windows with the given GC content | |
311 | |
312 float MEAN; | |
313 float MEAN_X; | |
314 | |
315 | |
316 float maxcut, mincut; | |
317 float maxcut_x, mincut_x; | |
318 | |
319 lw_var = 0.0; | |
320 sw_var = 0.0; | |
321 cw_var = 0.0; | |
322 | |
323 lw_total = 0.0; | |
324 sw_total = 0.0; | |
325 cw_total = 0.0; | |
326 | |
327 win_cnt = 0; | |
328 | |
329 lw_var_x = 0.0; | |
330 sw_var_x = 0.0; | |
331 cw_var_x = 0.0; | |
332 | |
333 lw_total_x = 0.0; | |
334 sw_total_x = 0.0; | |
335 cw_total_x = 0.0; | |
336 | |
337 win_cnt_x = 0; | |
338 | |
339 for (i=0; i<GC_BIN; i++){ | |
340 gc_total[i] = 0.0; | |
341 gclookup[i] = 0.0; | |
342 gc_wincount[i] = 0; | |
343 | |
344 gc_total_x[i] = 0.0; | |
345 gclookup_x[i] = 0.0; | |
346 gc_wincount_x[i] = 0; | |
347 } | |
348 | |
349 max = 0.0; | |
350 max_x = 0.0; | |
351 | |
352 min = FLT_MAX; | |
353 min_x = FLT_MAX; | |
354 | |
355 /* | |
356 | |
357 NOTE: | |
358 | |
359 As of November 11, 2010, only CW implementation is reliable. | |
360 | |
361 | |
362 */ | |
363 | |
364 switch (wt){ | |
365 case LW: | |
366 | |
367 if (doClean){ | |
368 | |
369 for (i=0; i<num_chrom; i++){ | |
370 for (j=0; j<chromosomes[i]->lw_cnt; j++){ | |
371 | |
372 if (chromosomes[i]->lw[j].isControl == 1){ | |
373 | |
374 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
375 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){ | |
376 | |
377 maxcut = LW_MEAN + 2.5 * LW_STD; | |
378 mincut = LW_MEAN - 2.5 * LW_STD; | |
379 if (mincut < LW_MEAN / 10.0) mincut = LW_MEAN / 10.0; | |
380 | |
381 if (maxcut - LW_MEAN > LW_MEAN - mincut) | |
382 maxcut = LW_MEAN + (LW_MEAN - mincut); | |
383 | |
384 if (chromosomes[i]->lw[j].depth > maxcut || chromosomes[i]->lw[j].depth < mincut){ | |
385 | |
386 /* Remove this window and its neighbors from controls */ | |
387 chromosomes[i]->lw[j].isControl = 0; | |
388 | |
389 k = j; | |
390 | |
391 while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){ | |
392 chromosomes[i]->lw[--k].isControl = 0; | |
393 } | |
394 | |
395 k = j; | |
396 | |
397 while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){ | |
398 chromosomes[i]->lw[++k].isControl = 0; | |
399 } | |
400 | |
401 } | |
402 | |
403 else{ | |
404 lw_total += chromosomes[i]->lw[j].depth; | |
405 win_cnt++; | |
406 } | |
407 } // if AUTOSOME | |
408 | |
409 | |
410 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
411 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){ | |
412 | |
413 maxcut_x = LW_MEAN_X + 2 * LW_STD_X; | |
414 mincut_x = LW_MEAN_X - 2 * LW_STD_X; | |
415 if (mincut_x < LW_MEAN_X / 10.0) mincut_x = LW_MEAN_X / 10.0; | |
416 | |
417 if (chromosomes[i]->lw[j].depth > maxcut_x || chromosomes[i]->lw[j].depth < mincut_x){ | |
418 | |
419 /* Remove this window and its neighbors from controls */ | |
420 chromosomes[i]->lw[j].isControl = 0; | |
421 | |
422 k = j; | |
423 | |
424 while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){ | |
425 chromosomes[i]->lw[--k].isControl = 0; | |
426 } | |
427 | |
428 k = j; | |
429 | |
430 while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){ | |
431 chromosomes[i]->lw[++k].isControl = 0; | |
432 } | |
433 } | |
434 | |
435 else{ | |
436 lw_total_x += chromosomes[i]->lw[j].depth; | |
437 win_cnt_x++; | |
438 } | |
439 } // if chrX | |
440 | |
441 } | |
442 | |
443 } | |
444 } | |
445 | |
446 LW_MEAN_X = lw_total_x / win_cnt_x; | |
447 LW_MEAN = lw_total / win_cnt; | |
448 | |
449 } // do clean | |
450 | |
451 lw_total = 0.0; | |
452 win_cnt = 0; | |
453 lw_total_x = 0.0; | |
454 win_cnt_x = 0; | |
455 | |
456 for (i=0; i<num_chrom; i++){ | |
457 for (j=0; j<chromosomes[i]->lw_cnt; j++){ | |
458 | |
459 if (chromosomes[i]->lw[j].isControl == 1){ | |
460 | |
461 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
462 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){ | |
463 | |
464 lw_var += (LW_MEAN - chromosomes[i]->lw[j].depth) * (LW_MEAN - chromosomes[i]->lw[j].depth); | |
465 win_cnt++; | |
466 | |
467 lw_total += chromosomes[i]->lw[j].depth; | |
468 gc_index = chromosomes[i]->lw[j].gc * GC_BIN; | |
469 gc_total[gc_index] += chromosomes[i]->lw[j].depth; | |
470 gc_wincount[gc_index]++; | |
471 | |
472 if (chromosomes[i]->lw[j].depth > max) | |
473 max = chromosomes[i]->lw[j].depth; | |
474 if (chromosomes[i]->lw[j].depth < min) | |
475 min = chromosomes[i]->lw[j].depth; | |
476 | |
477 } // AUTOSOMES | |
478 | |
479 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
480 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){ | |
481 | |
482 lw_var_x += (LW_MEAN_X - chromosomes[i]->lw[j].depth) * (LW_MEAN_X - chromosomes[i]->lw[j].depth); | |
483 win_cnt_x++; | |
484 | |
485 lw_total_x += chromosomes[i]->lw[j].depth; | |
486 gc_index = chromosomes[i]->lw[j].gc * GC_BIN; | |
487 gc_total_x[gc_index] += chromosomes[i]->lw[j].depth; | |
488 gc_wincount_x[gc_index]++; | |
489 | |
490 if (chromosomes[i]->lw[j].depth > max_x) | |
491 max_x = chromosomes[i]->lw[j].depth; | |
492 if (chromosomes[i]->lw[j].depth < min_x) | |
493 min_x = chromosomes[i]->lw[j].depth; | |
494 | |
495 } // chrX | |
496 | |
497 | |
498 } // if control | |
499 } // outer for | |
500 } // if (!doClean) | |
501 | |
502 LW_MEAN_X = lw_total_x / win_cnt_x; | |
503 LW_STD_X = sqrt(lw_var_x / win_cnt_x); | |
504 | |
505 LW_MEAN = lw_total / win_cnt; | |
506 LW_STD = sqrt(lw_var / win_cnt); | |
507 | |
508 MEAN = LW_MEAN; | |
509 MEAN_X = LW_MEAN_X; | |
510 | |
511 break; | |
512 | |
513 case SW: | |
514 | |
515 | |
516 if (doClean){ | |
517 for (i=0; i<num_chrom; i++){ | |
518 for (j=0; j<chromosomes[i]->sw_cnt; j++){ | |
519 | |
520 if (chromosomes[i]->sw[j].isControl == 1){ | |
521 | |
522 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
523 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){ | |
524 | |
525 maxcut = SW_MEAN + 2.5 * SW_STD; | |
526 mincut = SW_MEAN - 2.5 * SW_STD; | |
527 if (mincut < SW_MEAN / 10.0) mincut = SW_MEAN / 10.0; | |
528 | |
529 if (maxcut - SW_MEAN > SW_MEAN - mincut) | |
530 maxcut = SW_MEAN + (SW_MEAN - mincut); | |
531 | |
532 if (chromosomes[i]->sw[j].depth > maxcut || chromosomes[i]->sw[j].depth < mincut){ | |
533 | |
534 /* Remove this window and its neighbors from controls */ | |
535 chromosomes[i]->sw[j].isControl = 0; | |
536 | |
537 k = j; | |
538 | |
539 while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){ | |
540 chromosomes[i]->sw[--k].isControl = 0; | |
541 } | |
542 | |
543 k = j; | |
544 | |
545 while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){ | |
546 chromosomes[i]->sw[++k].isControl = 0; | |
547 } | |
548 | |
549 } | |
550 | |
551 else{ | |
552 sw_total += chromosomes[i]->sw[j].depth; | |
553 win_cnt++; | |
554 } | |
555 } // if AUTOSOME | |
556 | |
557 | |
558 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
559 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){ | |
560 | |
561 maxcut_x = SW_MEAN_X + 2 * SW_STD_X; | |
562 mincut_x = SW_MEAN_X - 2 * SW_STD_X; | |
563 if (mincut_x < SW_MEAN_X / 10.0) mincut_x = SW_MEAN_X / 10.0; | |
564 | |
565 if (chromosomes[i]->sw[j].depth > maxcut_x || chromosomes[i]->sw[j].depth < mincut_x){ | |
566 | |
567 /* Remove this window and its neighbors from controls */ | |
568 chromosomes[i]->sw[j].isControl = 0; | |
569 | |
570 k = j; | |
571 | |
572 while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){ | |
573 chromosomes[i]->sw[--k].isControl = 0; | |
574 } | |
575 | |
576 k = j; | |
577 | |
578 while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){ | |
579 chromosomes[i]->sw[++k].isControl = 0; | |
580 } | |
581 } | |
582 | |
583 else{ | |
584 sw_total_x += chromosomes[i]->sw[j].depth; | |
585 win_cnt_x++; | |
586 } | |
587 } // if chrX | |
588 | |
589 } | |
590 | |
591 } | |
592 } | |
593 | |
594 SW_MEAN_X = sw_total_x / win_cnt_x; | |
595 SW_MEAN = sw_total / win_cnt; | |
596 | |
597 } // do clean | |
598 | |
599 sw_total = 0.0; | |
600 win_cnt = 0; | |
601 sw_total_x = 0.0; | |
602 win_cnt_x = 0; | |
603 | |
604 for (i=0; i<num_chrom; i++){ | |
605 for (j=0; j<chromosomes[i]->sw_cnt; j++){ | |
606 | |
607 if (chromosomes[i]->sw[j].isControl == 1){ | |
608 | |
609 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
610 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){ | |
611 | |
612 sw_var += (SW_MEAN - chromosomes[i]->sw[j].depth) * (SW_MEAN - chromosomes[i]->sw[j].depth); | |
613 win_cnt++; | |
614 | |
615 sw_total += chromosomes[i]->sw[j].depth; | |
616 gc_index = chromosomes[i]->sw[j].gc * GC_BIN; | |
617 gc_total[gc_index] += chromosomes[i]->sw[j].depth; | |
618 gc_wincount[gc_index]++; | |
619 | |
620 if (chromosomes[i]->sw[j].depth > max) | |
621 max = chromosomes[i]->sw[j].depth; | |
622 if (chromosomes[i]->sw[j].depth < min) | |
623 min = chromosomes[i]->sw[j].depth; | |
624 | |
625 } // AUTOSOMES | |
626 | |
627 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
628 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){ | |
629 | |
630 sw_var_x += (SW_MEAN_X - chromosomes[i]->sw[j].depth) * (SW_MEAN_X - chromosomes[i]->sw[j].depth); | |
631 win_cnt_x++; | |
632 | |
633 sw_total_x += chromosomes[i]->sw[j].depth; | |
634 gc_index = chromosomes[i]->sw[j].gc * GC_BIN; | |
635 gc_total_x[gc_index] += chromosomes[i]->sw[j].depth; | |
636 gc_wincount_x[gc_index]++; | |
637 | |
638 if (chromosomes[i]->sw[j].depth > max_x) | |
639 max_x = chromosomes[i]->sw[j].depth; | |
640 if (chromosomes[i]->sw[j].depth < min_x) | |
641 min_x = chromosomes[i]->sw[j].depth; | |
642 | |
643 } // chrX | |
644 | |
645 | |
646 } // if control | |
647 } // outer for | |
648 } // if (!doClean) | |
649 | |
650 SW_MEAN_X = sw_total_x / win_cnt_x; | |
651 SW_STD_X = sqrt(sw_var_x / win_cnt_x); | |
652 | |
653 SW_MEAN = sw_total / win_cnt; | |
654 SW_STD = sqrt(sw_var / win_cnt); | |
655 | |
656 MEAN = SW_MEAN; | |
657 MEAN_X = SW_MEAN_X; | |
658 | |
659 | |
660 break; | |
661 | |
662 case CW: | |
663 | |
664 if (doClean){ | |
665 for (i=0; i<num_chrom; i++){ | |
666 for (j=0; j<chromosomes[i]->cw_cnt; j++){ | |
667 | |
668 | |
669 if (chromosomes[i]->cw[j].isControl == 1){ | |
670 | |
671 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
672 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){ | |
673 | |
674 maxcut = CW_MEAN + 2.5 * CW_STD; | |
675 mincut = CW_MEAN - 2.5 * CW_STD; | |
676 if (mincut < CW_MEAN / 10.0) mincut = CW_MEAN / 10.0; | |
677 | |
678 if (maxcut - CW_MEAN > CW_MEAN - mincut) | |
679 maxcut = CW_MEAN + (CW_MEAN - mincut); | |
680 | |
681 if (chromosomes[i]->cw[j].depth > maxcut || chromosomes[i]->cw[j].depth < mincut){ | |
682 | |
683 /* Remove this window and its neighbors from controls */ | |
684 chromosomes[i]->cw[j].isControl = 0; | |
685 | |
686 if (j > 0) | |
687 chromosomes[i]->cw[j-1].isControl = 0; | |
688 if (j < chromosomes[i]->cw_cnt-1) | |
689 chromosomes[i]->cw[j+1].isControl = 0; | |
690 } | |
691 else{ | |
692 cw_total += chromosomes[i]->cw[j].depth; | |
693 win_cnt++; | |
694 } | |
695 } // if AUTOSOME | |
696 | |
697 | |
698 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
699 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){ | |
700 | |
701 maxcut_x = CW_MEAN_X + 2 * CW_STD_X; | |
702 mincut_x = CW_MEAN_X - 2 * CW_STD_X; | |
703 if (mincut_x < CW_MEAN_X / 10.0) mincut_x = CW_MEAN_X / 10.0; | |
704 | |
705 if (chromosomes[i]->cw[j].depth > maxcut_x || chromosomes[i]->cw[j].depth < mincut_x){ | |
706 | |
707 /* Remove this window and its neighbors from controls */ | |
708 chromosomes[i]->cw[j].isControl = 0; | |
709 | |
710 if (j > 0) | |
711 chromosomes[i]->cw[j-1].isControl = 0; | |
712 if (j < chromosomes[i]->cw_cnt-1) | |
713 chromosomes[i]->cw[j+1].isControl = 0; | |
714 } | |
715 else{ | |
716 cw_total_x += chromosomes[i]->cw[j].depth; | |
717 win_cnt_x++; | |
718 } | |
719 } // if chrX | |
720 | |
721 } | |
722 | |
723 } | |
724 } | |
725 | |
726 | |
727 CW_MEAN_X = cw_total_x / win_cnt_x; | |
728 | |
729 CW_MEAN = cw_total / win_cnt; | |
730 | |
731 } // do clean | |
732 | |
733 cw_total = 0.0; | |
734 win_cnt = 0; | |
735 cw_total_x = 0.0; | |
736 win_cnt_x = 0; | |
737 | |
738 for (i=0; i<num_chrom; i++){ | |
739 for (j=0; j<chromosomes[i]->cw_cnt; j++){ | |
740 | |
741 if (chromosomes[i]->cw[j].isControl == 1){ | |
742 | |
743 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
744 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){ | |
745 | |
746 cw_var += (CW_MEAN - chromosomes[i]->cw[j].depth) * (CW_MEAN - chromosomes[i]->cw[j].depth); | |
747 win_cnt++; | |
748 | |
749 cw_total += chromosomes[i]->cw[j].depth; | |
750 gc_index = chromosomes[i]->cw[j].gc * GC_BIN; | |
751 gc_total[gc_index] += chromosomes[i]->cw[j].depth; | |
752 gc_wincount[gc_index]++; | |
753 | |
754 if (chromosomes[i]->cw[j].depth > max) | |
755 max = chromosomes[i]->cw[j].depth; | |
756 if (chromosomes[i]->cw[j].depth < min) | |
757 min = chromosomes[i]->cw[j].depth; | |
758 | |
759 } // AUTOSOMES | |
760 | |
761 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */ | |
762 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){ | |
763 | |
764 cw_var_x += (CW_MEAN_X - chromosomes[i]->cw[j].depth) * (CW_MEAN_X - chromosomes[i]->cw[j].depth); | |
765 win_cnt_x++; | |
766 | |
767 cw_total_x += chromosomes[i]->cw[j].depth; | |
768 gc_index = chromosomes[i]->cw[j].gc * GC_BIN; | |
769 gc_total_x[gc_index] += chromosomes[i]->cw[j].depth; | |
770 gc_wincount_x[gc_index]++; | |
771 | |
772 if (chromosomes[i]->cw[j].depth > max_x) | |
773 max_x = chromosomes[i]->cw[j].depth; | |
774 if (chromosomes[i]->cw[j].depth < min_x) | |
775 min_x = chromosomes[i]->cw[j].depth; | |
776 | |
777 } // chrX | |
778 | |
779 | |
780 } // if control | |
781 } // outer for | |
782 } // if (!doClean) | |
783 | |
784 | |
785 CW_MEAN_X = cw_total_x / win_cnt_x; | |
786 CW_STD_X = sqrt(cw_var_x / win_cnt_x); | |
787 | |
788 CW_MEAN = cw_total / win_cnt; | |
789 CW_STD = sqrt(cw_var / win_cnt); | |
790 | |
791 MEAN = CW_MEAN; | |
792 MEAN_X = CW_MEAN_X; | |
793 | |
794 break; | |
795 } | |
796 | |
797 | |
798 /* calculate the gclookup table */ | |
799 | |
800 for (i=0; i<GC_BIN; i++){ | |
801 | |
802 /* AUTOSOMES */ | |
803 | |
804 if (gc_wincount[i] == 0 || gc_total[i] == 0){ | |
805 j=i-1; | |
806 | |
807 while (j >= 0 && gc_total[j] == 0) | |
808 j--; | |
809 | |
810 if (gc_total[j] == 0 || gc_wincount[j] == 0){ | |
811 j=i+1; | |
812 while (j < GC_BIN && gc_total[j] == 0) j++; | |
813 } | |
814 | |
815 gc_total[i] = gc_total[j]; | |
816 gc_wincount[i] = gc_wincount[j]; | |
817 } | |
818 | |
819 if (gc_total[i] != 0.0){ | |
820 gclookup[i] = gc_total[i] / (float) gc_wincount[i]; | |
821 | |
822 if (MULTGC){ | |
823 | |
824 /* | |
825 if (VERBOSE && i > 200) | |
826 fprintf(stdout, "GC\%: %f\tlookup: %f\tnow: ", ((float)i / 10.0), gclookup[i]); | |
827 */ | |
828 | |
829 gclookup[i] = MEAN / gclookup[i]; | |
830 | |
831 /* | |
832 if (VERBOSE && i > 200) | |
833 fprintf(stdout, "%f\n", gclookup[i]); | |
834 */ | |
835 | |
836 if (gclookup[i] > MAX_GC_CORR) | |
837 gclookup[i] = MAX_GC_CORR; | |
838 else if (gclookup[i] < MIN_GC_CORR) | |
839 gclookup[i] = MIN_GC_CORR; | |
840 } | |
841 | |
842 /* | |
843 if (!MULTGC && VERBOSE && i > 200) | |
844 fprintf(stdout, "GC\%: %f\tlookup: %f\n", ((float)i / 10.0), gclookup[i]); | |
845 */ | |
846 | |
847 } | |
848 | |
849 | |
850 | |
851 /* chrX */ | |
852 | |
853 if (GENDER != FEMALE){ | |
854 | |
855 if (gc_wincount_x[i] == 0 || gc_total_x[i] == 0){ | |
856 j=i-1; | |
857 | |
858 while (j >= 0 && gc_total_x[j] == 0) | |
859 j--; | |
860 | |
861 if (gc_total_x[j] == 0 || gc_wincount_x[j] == 0){ | |
862 j=i+1; | |
863 while (j < GC_BIN && gc_total_x[j] == 0) j++; | |
864 } | |
865 | |
866 gc_total_x[i] = gc_total_x[j]; | |
867 gc_wincount_x[i] = gc_wincount_x[j]; | |
868 } | |
869 | |
870 if (gc_total_x[i] != 0.0){ | |
871 gclookup_x[i] = gc_total_x[i] / (float) gc_wincount_x[i]; | |
872 | |
873 if (MULTGC){ | |
874 gclookup_x[i] = MEAN_X / gclookup_x[i]; | |
875 if (gclookup_x[i] > MAX_GC_CORR) | |
876 gclookup_x[i] = MAX_GC_CORR; | |
877 else if (gclookup_x[i] < MIN_GC_CORR) | |
878 gclookup_x[i] = MIN_GC_CORR; | |
879 } | |
880 | |
881 } | |
882 | |
883 } | |
884 | |
885 | |
886 } | |
887 | |
888 | |
889 *_max = max; | |
890 *_max_x = max_x; | |
891 | |
892 *_min = min; | |
893 *_min_x = min_x; | |
894 | |
895 | |
896 } | |
897 | |
898 | |
899 | |
900 | |
901 |