comparison mrcanavar-0.34/gcnorm.c @ 0:86522a0b5f59 default tip

Uploaded source code for mrCaNaVaR
author calkan
date Tue, 21 Feb 2012 10:44:13 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:86522a0b5f59
1 #include "gcnorm.h"
2
3 int norm_until_converges (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x){
4 float max;
5 float max_x;
6 float min;
7 float min_x;
8
9 float p_max;
10 float p_max_x;
11 float p_min;
12 float p_min_x;
13
14 int i, j;
15 float mean;
16 float mean_x;
17 float stdev;
18 float stdev_x;
19 int iter;
20
21 float maxcut;
22 float maxcut_x;
23 float mincut;
24 float mincut_x;
25
26 iter = 1;
27 calc_stat(wt, gclookup, gclookup_x, 0, &max, &max_x, &min, &min_x);
28
29 switch(wt){
30 case LW:
31 mean = LW_MEAN;
32 mean_x = LW_MEAN_X;
33 break;
34 case SW:
35 mean = SW_MEAN;
36 mean_x = SW_MEAN_X;
37 break;
38 case CW:
39 mean = CW_MEAN;
40 mean_x = CW_MEAN_X;
41 break;
42 }
43
44
45 if (GENDER == AUTODETECT){
46 if (VERBOSE)
47 fprintf(stdout, "MEAN: %f\tMEAN_X: %f\n", mean, mean_x);
48
49
50 if (mean_x / mean < 0.75){ // magical ratio
51 GENDER = MALE;
52
53 if (VERBOSE)
54 fprintf(stdout, "Autodetect Gender: Male\n");
55
56 i = findChrom("chrX", "", -1);
57
58
59 switch(wt){
60
61 case CW:
62 for (j=0; j<chromosomes[i]->cw_cnt; j++)
63 if (chromosomes[i]->cw[j].depth > (float) CW_MEAN_X * 2 || chromosomes[i]->cw[j].depth < (float) CW_MEAN_X / 10.0)
64 chromosomes[i]->cw[j].isControl = 0;
65 break;
66
67 case LW:
68 for (j=0; j<chromosomes[i]->lw_cnt; j++)
69 if (chromosomes[i]->lw[j].depth > (float) LW_MEAN_X * 2 || chromosomes[i]->lw[j].depth < (float) LW_MEAN_X / 10.0)
70 chromosomes[i]->lw[j].isControl = 0;
71 break;
72
73 case SW:
74 for (j=0; j<chromosomes[i]->sw_cnt; j++)
75 if (chromosomes[i]->sw[j].depth > (float) SW_MEAN_X * 2 || chromosomes[i]->sw[j].depth < (float) SW_MEAN_X / 10.0)
76 chromosomes[i]->sw[j].isControl = 0;
77 break;
78
79 }
80
81 }
82 else{
83 GENDER = FEMALE;
84 if (VERBOSE)
85 fprintf(stdout, "Autodetect Gender: Female\n");
86 }
87 }
88
89 normalize(wt, gclookup, gclookup_x, &max, &max_x, &min, &min_x);
90
91
92 do{
93
94 if (VERBOSE){
95 fprintf(stdout, "Control regions %s iteration %d ", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")), iter);
96 fflush(stdout);
97 }
98
99
100
101 p_min = min;
102 p_max = max;
103 p_min_x = min_x;
104 p_max_x = max_x;
105
106 calc_stat(wt, gclookup, gclookup_x, 1, &max, &max_x, &min, &min_x);
107
108
109 switch(wt){
110 case LW:
111 mean = LW_MEAN;
112 stdev = LW_STD;
113 mean_x = LW_MEAN_X;
114 stdev_x = LW_STD_X;
115 break;
116 case SW:
117 mean = SW_MEAN;
118 stdev = SW_STD;
119 mean_x = SW_MEAN_X;
120 stdev_x = SW_STD_X;
121 break;
122 case CW:
123 mean = CW_MEAN;
124 stdev = CW_STD;
125 mean_x = CW_MEAN_X;
126 stdev_x = CW_STD_X;
127 break;
128 }
129
130
131 if (GENDER == FEMALE){
132 max_x = max;
133 mean_x = mean;
134 stdev_x = stdev;
135 min_x = min;
136 }
137
138
139 iter++;
140
141
142 maxcut = mean + 2.5 * stdev;
143 mincut = mean - 2.5 * stdev;
144 maxcut_x = mean_x + 2.5 * stdev_x;
145 mincut_x = mean_x - 2.5 * stdev_x;
146
147 if (GENDER != FEMALE){
148 maxcut_x = mean_x + 2 * stdev_x;
149 mincut_x = mean_x - 2 * stdev_x;
150 }
151
152 if (mincut < 0.0) mincut = mean / 10.0;
153 if (mincut_x < 0.0) mincut_x = mean_x / 10.0;
154
155 if (maxcut - mean > mean - mincut)
156 maxcut = mean + (mean - mincut);
157
158 if (GENDER != FEMALE){
159 if (maxcut_x - mean_x > mean_x - mincut_x)
160 maxcut_x = mean_x + (mean_x - mincut_x);
161 }
162
163
164 if (VERBOSE){
165 fprintf(stdout, "mean: %f\tstdev: %f\tmax: %f (cut: %f)\tmin: %f (cut: %f) \n", mean, stdev, max, maxcut, min, mincut);
166 if (GENDER != FEMALE)
167 fprintf(stdout, "mean_x: %f\tstdev_x: %f\tmax_x: %f (cut: %f)\tmin_x: %f (cut: %f)\n", mean_x, stdev_x, max_x, maxcut_x, min_x, mincut_x);
168 }
169
170 if (p_min == min && p_max == max && p_min_x == min_x && p_max_x == max_x)
171 break;
172
173 } while (max >= maxcut || max_x >= maxcut_x || min <= mincut || min_x <= mincut_x);
174
175
176 fprintf(stdout, "%s Normalization completed.\n", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")));
177
178 return 1;
179 }
180
181
182 void normalize (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, float *max, float *max_x, float *min, float *min_x){
183 int gc_index;
184 float new_depth;
185 int i, j;
186
187 float total, total_x;
188 int win_cnt, win_cnt_x;
189
190 total = 0.0;
191 win_cnt = 0;
192 total_x = 0.0;
193 win_cnt_x = 0;
194
195
196 switch(wt){
197 case LW:
198 for (i=0; i<num_chrom; i++){
199 for (j=0; j<chromosomes[i]->lw_cnt; j++){
200 gc_index = chromosomes[i]->lw[j].gc * (float) GC_BIN;
201 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
202 if (MULTGC)
203 new_depth = gclookup[i] * chromosomes[i]->lw[j].depth;
204 else
205 new_depth = chromosomes[i]->lw[j].depth - (gclookup[gc_index] - LW_MEAN);
206 }
207 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
208 if (MULTGC)
209 new_depth = gclookup_x[i] * chromosomes[i]->lw[j].depth;
210 else
211 new_depth = chromosomes[i]->lw[j].depth - (gclookup_x[gc_index] - LW_MEAN_X);
212 }
213
214 if (new_depth < 0) new_depth = 0;
215
216 chromosomes[i]->lw[j].depth = new_depth;
217 }
218 }
219 break;
220
221 case SW:
222 for (i=0; i<num_chrom; i++){
223 for (j=0; j<chromosomes[i]->sw_cnt; j++){
224 gc_index = chromosomes[i]->sw[j].gc * (float) GC_BIN;
225 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
226 if (MULTGC)
227 new_depth = gclookup[i] * chromosomes[i]->sw[j].depth;
228 else
229 new_depth = chromosomes[i]->sw[j].depth - (gclookup[gc_index] - SW_MEAN);
230 }
231 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
232 if (MULTGC)
233 new_depth = gclookup_x[i] * chromosomes[i]->sw[j].depth;
234 else
235 new_depth = chromosomes[i]->sw[j].depth - (gclookup_x[gc_index] - SW_MEAN_X);
236 }
237
238 if (new_depth < 0) new_depth = 0;
239 chromosomes[i]->sw[j].depth = new_depth;
240 }
241 }
242 break;
243
244 case CW:
245 for (i=0; i<num_chrom; i++){
246 for (j=0; j<chromosomes[i]->cw_cnt; j++){
247 gc_index = chromosomes[i]->cw[j].gc * (float) GC_BIN;
248 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
249 if (MULTGC)
250 new_depth = gclookup[i] * chromosomes[i]->cw[j].depth;
251 else
252 new_depth = chromosomes[i]->cw[j].depth - (gclookup[gc_index] - CW_MEAN);
253 }
254 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
255 if (MULTGC)
256 new_depth = gclookup_x[i] * chromosomes[i]->cw[j].depth;
257 else
258 new_depth = chromosomes[i]->cw[j].depth - (gclookup_x[gc_index] - CW_MEAN_X);
259 }
260
261 if (new_depth < 0) new_depth = 0;
262
263 chromosomes[i]->cw[j].depth = new_depth;
264 }
265 }
266 break;
267 }
268
269
270
271 calc_stat(wt, gclookup, gclookup_x, 0, max, max_x, min, min_x);
272
273 }
274
275
276 void calc_stat(enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, char doClean, float *_max, float *_max_x, float *_min, float *_min_x){
277 int i, j, k;
278
279 float lw_var;
280 float sw_var;
281 float cw_var;
282
283 float lw_total;
284 float sw_total;
285 float cw_total;
286
287 float lw_var_x;
288 float sw_var_x;
289 float cw_var_x;
290
291 float lw_total_x;
292 float sw_total_x;
293 float cw_total_x;
294
295 int win_cnt;
296 float max;
297
298 int win_cnt_x;
299 float max_x;
300
301 float min;
302 float min_x;
303
304 int gc_index;
305
306 float gc_total[GC_BIN]; // total depth by GC
307 int gc_wincount[GC_BIN]; // count of windows with the given GC content
308
309 float gc_total_x[GC_BIN]; // total depth by GC
310 int gc_wincount_x[GC_BIN]; // count of windows with the given GC content
311
312 float MEAN;
313 float MEAN_X;
314
315
316 float maxcut, mincut;
317 float maxcut_x, mincut_x;
318
319 lw_var = 0.0;
320 sw_var = 0.0;
321 cw_var = 0.0;
322
323 lw_total = 0.0;
324 sw_total = 0.0;
325 cw_total = 0.0;
326
327 win_cnt = 0;
328
329 lw_var_x = 0.0;
330 sw_var_x = 0.0;
331 cw_var_x = 0.0;
332
333 lw_total_x = 0.0;
334 sw_total_x = 0.0;
335 cw_total_x = 0.0;
336
337 win_cnt_x = 0;
338
339 for (i=0; i<GC_BIN; i++){
340 gc_total[i] = 0.0;
341 gclookup[i] = 0.0;
342 gc_wincount[i] = 0;
343
344 gc_total_x[i] = 0.0;
345 gclookup_x[i] = 0.0;
346 gc_wincount_x[i] = 0;
347 }
348
349 max = 0.0;
350 max_x = 0.0;
351
352 min = FLT_MAX;
353 min_x = FLT_MAX;
354
355 /*
356
357 NOTE:
358
359 As of November 11, 2010, only CW implementation is reliable.
360
361
362 */
363
364 switch (wt){
365 case LW:
366
367 if (doClean){
368
369 for (i=0; i<num_chrom; i++){
370 for (j=0; j<chromosomes[i]->lw_cnt; j++){
371
372 if (chromosomes[i]->lw[j].isControl == 1){
373
374 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
375 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
376
377 maxcut = LW_MEAN + 2.5 * LW_STD;
378 mincut = LW_MEAN - 2.5 * LW_STD;
379 if (mincut < LW_MEAN / 10.0) mincut = LW_MEAN / 10.0;
380
381 if (maxcut - LW_MEAN > LW_MEAN - mincut)
382 maxcut = LW_MEAN + (LW_MEAN - mincut);
383
384 if (chromosomes[i]->lw[j].depth > maxcut || chromosomes[i]->lw[j].depth < mincut){
385
386 /* Remove this window and its neighbors from controls */
387 chromosomes[i]->lw[j].isControl = 0;
388
389 k = j;
390
391 while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){
392 chromosomes[i]->lw[--k].isControl = 0;
393 }
394
395 k = j;
396
397 while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){
398 chromosomes[i]->lw[++k].isControl = 0;
399 }
400
401 }
402
403 else{
404 lw_total += chromosomes[i]->lw[j].depth;
405 win_cnt++;
406 }
407 } // if AUTOSOME
408
409
410 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
411 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
412
413 maxcut_x = LW_MEAN_X + 2 * LW_STD_X;
414 mincut_x = LW_MEAN_X - 2 * LW_STD_X;
415 if (mincut_x < LW_MEAN_X / 10.0) mincut_x = LW_MEAN_X / 10.0;
416
417 if (chromosomes[i]->lw[j].depth > maxcut_x || chromosomes[i]->lw[j].depth < mincut_x){
418
419 /* Remove this window and its neighbors from controls */
420 chromosomes[i]->lw[j].isControl = 0;
421
422 k = j;
423
424 while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){
425 chromosomes[i]->lw[--k].isControl = 0;
426 }
427
428 k = j;
429
430 while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){
431 chromosomes[i]->lw[++k].isControl = 0;
432 }
433 }
434
435 else{
436 lw_total_x += chromosomes[i]->lw[j].depth;
437 win_cnt_x++;
438 }
439 } // if chrX
440
441 }
442
443 }
444 }
445
446 LW_MEAN_X = lw_total_x / win_cnt_x;
447 LW_MEAN = lw_total / win_cnt;
448
449 } // do clean
450
451 lw_total = 0.0;
452 win_cnt = 0;
453 lw_total_x = 0.0;
454 win_cnt_x = 0;
455
456 for (i=0; i<num_chrom; i++){
457 for (j=0; j<chromosomes[i]->lw_cnt; j++){
458
459 if (chromosomes[i]->lw[j].isControl == 1){
460
461 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
462 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
463
464 lw_var += (LW_MEAN - chromosomes[i]->lw[j].depth) * (LW_MEAN - chromosomes[i]->lw[j].depth);
465 win_cnt++;
466
467 lw_total += chromosomes[i]->lw[j].depth;
468 gc_index = chromosomes[i]->lw[j].gc * GC_BIN;
469 gc_total[gc_index] += chromosomes[i]->lw[j].depth;
470 gc_wincount[gc_index]++;
471
472 if (chromosomes[i]->lw[j].depth > max)
473 max = chromosomes[i]->lw[j].depth;
474 if (chromosomes[i]->lw[j].depth < min)
475 min = chromosomes[i]->lw[j].depth;
476
477 } // AUTOSOMES
478
479 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
480 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
481
482 lw_var_x += (LW_MEAN_X - chromosomes[i]->lw[j].depth) * (LW_MEAN_X - chromosomes[i]->lw[j].depth);
483 win_cnt_x++;
484
485 lw_total_x += chromosomes[i]->lw[j].depth;
486 gc_index = chromosomes[i]->lw[j].gc * GC_BIN;
487 gc_total_x[gc_index] += chromosomes[i]->lw[j].depth;
488 gc_wincount_x[gc_index]++;
489
490 if (chromosomes[i]->lw[j].depth > max_x)
491 max_x = chromosomes[i]->lw[j].depth;
492 if (chromosomes[i]->lw[j].depth < min_x)
493 min_x = chromosomes[i]->lw[j].depth;
494
495 } // chrX
496
497
498 } // if control
499 } // outer for
500 } // if (!doClean)
501
502 LW_MEAN_X = lw_total_x / win_cnt_x;
503 LW_STD_X = sqrt(lw_var_x / win_cnt_x);
504
505 LW_MEAN = lw_total / win_cnt;
506 LW_STD = sqrt(lw_var / win_cnt);
507
508 MEAN = LW_MEAN;
509 MEAN_X = LW_MEAN_X;
510
511 break;
512
513 case SW:
514
515
516 if (doClean){
517 for (i=0; i<num_chrom; i++){
518 for (j=0; j<chromosomes[i]->sw_cnt; j++){
519
520 if (chromosomes[i]->sw[j].isControl == 1){
521
522 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
523 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
524
525 maxcut = SW_MEAN + 2.5 * SW_STD;
526 mincut = SW_MEAN - 2.5 * SW_STD;
527 if (mincut < SW_MEAN / 10.0) mincut = SW_MEAN / 10.0;
528
529 if (maxcut - SW_MEAN > SW_MEAN - mincut)
530 maxcut = SW_MEAN + (SW_MEAN - mincut);
531
532 if (chromosomes[i]->sw[j].depth > maxcut || chromosomes[i]->sw[j].depth < mincut){
533
534 /* Remove this window and its neighbors from controls */
535 chromosomes[i]->sw[j].isControl = 0;
536
537 k = j;
538
539 while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){
540 chromosomes[i]->sw[--k].isControl = 0;
541 }
542
543 k = j;
544
545 while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){
546 chromosomes[i]->sw[++k].isControl = 0;
547 }
548
549 }
550
551 else{
552 sw_total += chromosomes[i]->sw[j].depth;
553 win_cnt++;
554 }
555 } // if AUTOSOME
556
557
558 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
559 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
560
561 maxcut_x = SW_MEAN_X + 2 * SW_STD_X;
562 mincut_x = SW_MEAN_X - 2 * SW_STD_X;
563 if (mincut_x < SW_MEAN_X / 10.0) mincut_x = SW_MEAN_X / 10.0;
564
565 if (chromosomes[i]->sw[j].depth > maxcut_x || chromosomes[i]->sw[j].depth < mincut_x){
566
567 /* Remove this window and its neighbors from controls */
568 chromosomes[i]->sw[j].isControl = 0;
569
570 k = j;
571
572 while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){
573 chromosomes[i]->sw[--k].isControl = 0;
574 }
575
576 k = j;
577
578 while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){
579 chromosomes[i]->sw[++k].isControl = 0;
580 }
581 }
582
583 else{
584 sw_total_x += chromosomes[i]->sw[j].depth;
585 win_cnt_x++;
586 }
587 } // if chrX
588
589 }
590
591 }
592 }
593
594 SW_MEAN_X = sw_total_x / win_cnt_x;
595 SW_MEAN = sw_total / win_cnt;
596
597 } // do clean
598
599 sw_total = 0.0;
600 win_cnt = 0;
601 sw_total_x = 0.0;
602 win_cnt_x = 0;
603
604 for (i=0; i<num_chrom; i++){
605 for (j=0; j<chromosomes[i]->sw_cnt; j++){
606
607 if (chromosomes[i]->sw[j].isControl == 1){
608
609 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
610 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
611
612 sw_var += (SW_MEAN - chromosomes[i]->sw[j].depth) * (SW_MEAN - chromosomes[i]->sw[j].depth);
613 win_cnt++;
614
615 sw_total += chromosomes[i]->sw[j].depth;
616 gc_index = chromosomes[i]->sw[j].gc * GC_BIN;
617 gc_total[gc_index] += chromosomes[i]->sw[j].depth;
618 gc_wincount[gc_index]++;
619
620 if (chromosomes[i]->sw[j].depth > max)
621 max = chromosomes[i]->sw[j].depth;
622 if (chromosomes[i]->sw[j].depth < min)
623 min = chromosomes[i]->sw[j].depth;
624
625 } // AUTOSOMES
626
627 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
628 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
629
630 sw_var_x += (SW_MEAN_X - chromosomes[i]->sw[j].depth) * (SW_MEAN_X - chromosomes[i]->sw[j].depth);
631 win_cnt_x++;
632
633 sw_total_x += chromosomes[i]->sw[j].depth;
634 gc_index = chromosomes[i]->sw[j].gc * GC_BIN;
635 gc_total_x[gc_index] += chromosomes[i]->sw[j].depth;
636 gc_wincount_x[gc_index]++;
637
638 if (chromosomes[i]->sw[j].depth > max_x)
639 max_x = chromosomes[i]->sw[j].depth;
640 if (chromosomes[i]->sw[j].depth < min_x)
641 min_x = chromosomes[i]->sw[j].depth;
642
643 } // chrX
644
645
646 } // if control
647 } // outer for
648 } // if (!doClean)
649
650 SW_MEAN_X = sw_total_x / win_cnt_x;
651 SW_STD_X = sqrt(sw_var_x / win_cnt_x);
652
653 SW_MEAN = sw_total / win_cnt;
654 SW_STD = sqrt(sw_var / win_cnt);
655
656 MEAN = SW_MEAN;
657 MEAN_X = SW_MEAN_X;
658
659
660 break;
661
662 case CW:
663
664 if (doClean){
665 for (i=0; i<num_chrom; i++){
666 for (j=0; j<chromosomes[i]->cw_cnt; j++){
667
668
669 if (chromosomes[i]->cw[j].isControl == 1){
670
671 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
672 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
673
674 maxcut = CW_MEAN + 2.5 * CW_STD;
675 mincut = CW_MEAN - 2.5 * CW_STD;
676 if (mincut < CW_MEAN / 10.0) mincut = CW_MEAN / 10.0;
677
678 if (maxcut - CW_MEAN > CW_MEAN - mincut)
679 maxcut = CW_MEAN + (CW_MEAN - mincut);
680
681 if (chromosomes[i]->cw[j].depth > maxcut || chromosomes[i]->cw[j].depth < mincut){
682
683 /* Remove this window and its neighbors from controls */
684 chromosomes[i]->cw[j].isControl = 0;
685
686 if (j > 0)
687 chromosomes[i]->cw[j-1].isControl = 0;
688 if (j < chromosomes[i]->cw_cnt-1)
689 chromosomes[i]->cw[j+1].isControl = 0;
690 }
691 else{
692 cw_total += chromosomes[i]->cw[j].depth;
693 win_cnt++;
694 }
695 } // if AUTOSOME
696
697
698 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
699 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
700
701 maxcut_x = CW_MEAN_X + 2 * CW_STD_X;
702 mincut_x = CW_MEAN_X - 2 * CW_STD_X;
703 if (mincut_x < CW_MEAN_X / 10.0) mincut_x = CW_MEAN_X / 10.0;
704
705 if (chromosomes[i]->cw[j].depth > maxcut_x || chromosomes[i]->cw[j].depth < mincut_x){
706
707 /* Remove this window and its neighbors from controls */
708 chromosomes[i]->cw[j].isControl = 0;
709
710 if (j > 0)
711 chromosomes[i]->cw[j-1].isControl = 0;
712 if (j < chromosomes[i]->cw_cnt-1)
713 chromosomes[i]->cw[j+1].isControl = 0;
714 }
715 else{
716 cw_total_x += chromosomes[i]->cw[j].depth;
717 win_cnt_x++;
718 }
719 } // if chrX
720
721 }
722
723 }
724 }
725
726
727 CW_MEAN_X = cw_total_x / win_cnt_x;
728
729 CW_MEAN = cw_total / win_cnt;
730
731 } // do clean
732
733 cw_total = 0.0;
734 win_cnt = 0;
735 cw_total_x = 0.0;
736 win_cnt_x = 0;
737
738 for (i=0; i<num_chrom; i++){
739 for (j=0; j<chromosomes[i]->cw_cnt; j++){
740
741 if (chromosomes[i]->cw[j].isControl == 1){
742
743 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
744 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
745
746 cw_var += (CW_MEAN - chromosomes[i]->cw[j].depth) * (CW_MEAN - chromosomes[i]->cw[j].depth);
747 win_cnt++;
748
749 cw_total += chromosomes[i]->cw[j].depth;
750 gc_index = chromosomes[i]->cw[j].gc * GC_BIN;
751 gc_total[gc_index] += chromosomes[i]->cw[j].depth;
752 gc_wincount[gc_index]++;
753
754 if (chromosomes[i]->cw[j].depth > max)
755 max = chromosomes[i]->cw[j].depth;
756 if (chromosomes[i]->cw[j].depth < min)
757 min = chromosomes[i]->cw[j].depth;
758
759 } // AUTOSOMES
760
761 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
762 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
763
764 cw_var_x += (CW_MEAN_X - chromosomes[i]->cw[j].depth) * (CW_MEAN_X - chromosomes[i]->cw[j].depth);
765 win_cnt_x++;
766
767 cw_total_x += chromosomes[i]->cw[j].depth;
768 gc_index = chromosomes[i]->cw[j].gc * GC_BIN;
769 gc_total_x[gc_index] += chromosomes[i]->cw[j].depth;
770 gc_wincount_x[gc_index]++;
771
772 if (chromosomes[i]->cw[j].depth > max_x)
773 max_x = chromosomes[i]->cw[j].depth;
774 if (chromosomes[i]->cw[j].depth < min_x)
775 min_x = chromosomes[i]->cw[j].depth;
776
777 } // chrX
778
779
780 } // if control
781 } // outer for
782 } // if (!doClean)
783
784
785 CW_MEAN_X = cw_total_x / win_cnt_x;
786 CW_STD_X = sqrt(cw_var_x / win_cnt_x);
787
788 CW_MEAN = cw_total / win_cnt;
789 CW_STD = sqrt(cw_var / win_cnt);
790
791 MEAN = CW_MEAN;
792 MEAN_X = CW_MEAN_X;
793
794 break;
795 }
796
797
798 /* calculate the gclookup table */
799
800 for (i=0; i<GC_BIN; i++){
801
802 /* AUTOSOMES */
803
804 if (gc_wincount[i] == 0 || gc_total[i] == 0){
805 j=i-1;
806
807 while (j >= 0 && gc_total[j] == 0)
808 j--;
809
810 if (gc_total[j] == 0 || gc_wincount[j] == 0){
811 j=i+1;
812 while (j < GC_BIN && gc_total[j] == 0) j++;
813 }
814
815 gc_total[i] = gc_total[j];
816 gc_wincount[i] = gc_wincount[j];
817 }
818
819 if (gc_total[i] != 0.0){
820 gclookup[i] = gc_total[i] / (float) gc_wincount[i];
821
822 if (MULTGC){
823
824 /*
825 if (VERBOSE && i > 200)
826 fprintf(stdout, "GC\%: %f\tlookup: %f\tnow: ", ((float)i / 10.0), gclookup[i]);
827 */
828
829 gclookup[i] = MEAN / gclookup[i];
830
831 /*
832 if (VERBOSE && i > 200)
833 fprintf(stdout, "%f\n", gclookup[i]);
834 */
835
836 if (gclookup[i] > MAX_GC_CORR)
837 gclookup[i] = MAX_GC_CORR;
838 else if (gclookup[i] < MIN_GC_CORR)
839 gclookup[i] = MIN_GC_CORR;
840 }
841
842 /*
843 if (!MULTGC && VERBOSE && i > 200)
844 fprintf(stdout, "GC\%: %f\tlookup: %f\n", ((float)i / 10.0), gclookup[i]);
845 */
846
847 }
848
849
850
851 /* chrX */
852
853 if (GENDER != FEMALE){
854
855 if (gc_wincount_x[i] == 0 || gc_total_x[i] == 0){
856 j=i-1;
857
858 while (j >= 0 && gc_total_x[j] == 0)
859 j--;
860
861 if (gc_total_x[j] == 0 || gc_wincount_x[j] == 0){
862 j=i+1;
863 while (j < GC_BIN && gc_total_x[j] == 0) j++;
864 }
865
866 gc_total_x[i] = gc_total_x[j];
867 gc_wincount_x[i] = gc_wincount_x[j];
868 }
869
870 if (gc_total_x[i] != 0.0){
871 gclookup_x[i] = gc_total_x[i] / (float) gc_wincount_x[i];
872
873 if (MULTGC){
874 gclookup_x[i] = MEAN_X / gclookup_x[i];
875 if (gclookup_x[i] > MAX_GC_CORR)
876 gclookup_x[i] = MAX_GC_CORR;
877 else if (gclookup_x[i] < MIN_GC_CORR)
878 gclookup_x[i] = MIN_GC_CORR;
879 }
880
881 }
882
883 }
884
885
886 }
887
888
889 *_max = max;
890 *_max_x = max_x;
891
892 *_min = min;
893 *_min_x = min_x;
894
895
896 }
897
898
899
900
901