0
|
1 #include "gcnorm.h"
|
|
2
|
|
3 int norm_until_converges (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x){
|
|
4 float max;
|
|
5 float max_x;
|
|
6 float min;
|
|
7 float min_x;
|
|
8
|
|
9 float p_max;
|
|
10 float p_max_x;
|
|
11 float p_min;
|
|
12 float p_min_x;
|
|
13
|
|
14 int i, j;
|
|
15 float mean;
|
|
16 float mean_x;
|
|
17 float stdev;
|
|
18 float stdev_x;
|
|
19 int iter;
|
|
20
|
|
21 float maxcut;
|
|
22 float maxcut_x;
|
|
23 float mincut;
|
|
24 float mincut_x;
|
|
25
|
|
26 iter = 1;
|
|
27 calc_stat(wt, gclookup, gclookup_x, 0, &max, &max_x, &min, &min_x);
|
|
28
|
|
29 switch(wt){
|
|
30 case LW:
|
|
31 mean = LW_MEAN;
|
|
32 mean_x = LW_MEAN_X;
|
|
33 break;
|
|
34 case SW:
|
|
35 mean = SW_MEAN;
|
|
36 mean_x = SW_MEAN_X;
|
|
37 break;
|
|
38 case CW:
|
|
39 mean = CW_MEAN;
|
|
40 mean_x = CW_MEAN_X;
|
|
41 break;
|
|
42 }
|
|
43
|
|
44
|
|
45 if (GENDER == AUTODETECT){
|
|
46 if (VERBOSE)
|
|
47 fprintf(stdout, "MEAN: %f\tMEAN_X: %f\n", mean, mean_x);
|
|
48
|
|
49
|
|
50 if (mean_x / mean < 0.75){ // magical ratio
|
|
51 GENDER = MALE;
|
|
52
|
|
53 if (VERBOSE)
|
|
54 fprintf(stdout, "Autodetect Gender: Male\n");
|
|
55
|
|
56 i = findChrom("chrX", "", -1);
|
|
57
|
|
58
|
|
59 switch(wt){
|
|
60
|
|
61 case CW:
|
|
62 for (j=0; j<chromosomes[i]->cw_cnt; j++)
|
|
63 if (chromosomes[i]->cw[j].depth > (float) CW_MEAN_X * 2 || chromosomes[i]->cw[j].depth < (float) CW_MEAN_X / 10.0)
|
|
64 chromosomes[i]->cw[j].isControl = 0;
|
|
65 break;
|
|
66
|
|
67 case LW:
|
|
68 for (j=0; j<chromosomes[i]->lw_cnt; j++)
|
|
69 if (chromosomes[i]->lw[j].depth > (float) LW_MEAN_X * 2 || chromosomes[i]->lw[j].depth < (float) LW_MEAN_X / 10.0)
|
|
70 chromosomes[i]->lw[j].isControl = 0;
|
|
71 break;
|
|
72
|
|
73 case SW:
|
|
74 for (j=0; j<chromosomes[i]->sw_cnt; j++)
|
|
75 if (chromosomes[i]->sw[j].depth > (float) SW_MEAN_X * 2 || chromosomes[i]->sw[j].depth < (float) SW_MEAN_X / 10.0)
|
|
76 chromosomes[i]->sw[j].isControl = 0;
|
|
77 break;
|
|
78
|
|
79 }
|
|
80
|
|
81 }
|
|
82 else{
|
|
83 GENDER = FEMALE;
|
|
84 if (VERBOSE)
|
|
85 fprintf(stdout, "Autodetect Gender: Female\n");
|
|
86 }
|
|
87 }
|
|
88
|
|
89 normalize(wt, gclookup, gclookup_x, &max, &max_x, &min, &min_x);
|
|
90
|
|
91
|
|
92 do{
|
|
93
|
|
94 if (VERBOSE){
|
|
95 fprintf(stdout, "Control regions %s iteration %d ", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")), iter);
|
|
96 fflush(stdout);
|
|
97 }
|
|
98
|
|
99
|
|
100
|
|
101 p_min = min;
|
|
102 p_max = max;
|
|
103 p_min_x = min_x;
|
|
104 p_max_x = max_x;
|
|
105
|
|
106 calc_stat(wt, gclookup, gclookup_x, 1, &max, &max_x, &min, &min_x);
|
|
107
|
|
108
|
|
109 switch(wt){
|
|
110 case LW:
|
|
111 mean = LW_MEAN;
|
|
112 stdev = LW_STD;
|
|
113 mean_x = LW_MEAN_X;
|
|
114 stdev_x = LW_STD_X;
|
|
115 break;
|
|
116 case SW:
|
|
117 mean = SW_MEAN;
|
|
118 stdev = SW_STD;
|
|
119 mean_x = SW_MEAN_X;
|
|
120 stdev_x = SW_STD_X;
|
|
121 break;
|
|
122 case CW:
|
|
123 mean = CW_MEAN;
|
|
124 stdev = CW_STD;
|
|
125 mean_x = CW_MEAN_X;
|
|
126 stdev_x = CW_STD_X;
|
|
127 break;
|
|
128 }
|
|
129
|
|
130
|
|
131 if (GENDER == FEMALE){
|
|
132 max_x = max;
|
|
133 mean_x = mean;
|
|
134 stdev_x = stdev;
|
|
135 min_x = min;
|
|
136 }
|
|
137
|
|
138
|
|
139 iter++;
|
|
140
|
|
141
|
|
142 maxcut = mean + 2.5 * stdev;
|
|
143 mincut = mean - 2.5 * stdev;
|
|
144 maxcut_x = mean_x + 2.5 * stdev_x;
|
|
145 mincut_x = mean_x - 2.5 * stdev_x;
|
|
146
|
|
147 if (GENDER != FEMALE){
|
|
148 maxcut_x = mean_x + 2 * stdev_x;
|
|
149 mincut_x = mean_x - 2 * stdev_x;
|
|
150 }
|
|
151
|
|
152 if (mincut < 0.0) mincut = mean / 10.0;
|
|
153 if (mincut_x < 0.0) mincut_x = mean_x / 10.0;
|
|
154
|
|
155 if (maxcut - mean > mean - mincut)
|
|
156 maxcut = mean + (mean - mincut);
|
|
157
|
|
158 if (GENDER != FEMALE){
|
|
159 if (maxcut_x - mean_x > mean_x - mincut_x)
|
|
160 maxcut_x = mean_x + (mean_x - mincut_x);
|
|
161 }
|
|
162
|
|
163
|
|
164 if (VERBOSE){
|
|
165 fprintf(stdout, "mean: %f\tstdev: %f\tmax: %f (cut: %f)\tmin: %f (cut: %f) \n", mean, stdev, max, maxcut, min, mincut);
|
|
166 if (GENDER != FEMALE)
|
|
167 fprintf(stdout, "mean_x: %f\tstdev_x: %f\tmax_x: %f (cut: %f)\tmin_x: %f (cut: %f)\n", mean_x, stdev_x, max_x, maxcut_x, min_x, mincut_x);
|
|
168 }
|
|
169
|
|
170 if (p_min == min && p_max == max && p_min_x == min_x && p_max_x == max_x)
|
|
171 break;
|
|
172
|
|
173 } while (max >= maxcut || max_x >= maxcut_x || min <= mincut || min_x <= mincut_x);
|
|
174
|
|
175
|
|
176 fprintf(stdout, "%s Normalization completed.\n", (wt == LW ? "LW" : (wt == SW ? "SW" : "CW")));
|
|
177
|
|
178 return 1;
|
|
179 }
|
|
180
|
|
181
|
|
182 void normalize (enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, float *max, float *max_x, float *min, float *min_x){
|
|
183 int gc_index;
|
|
184 float new_depth;
|
|
185 int i, j;
|
|
186
|
|
187 float total, total_x;
|
|
188 int win_cnt, win_cnt_x;
|
|
189
|
|
190 total = 0.0;
|
|
191 win_cnt = 0;
|
|
192 total_x = 0.0;
|
|
193 win_cnt_x = 0;
|
|
194
|
|
195
|
|
196 switch(wt){
|
|
197 case LW:
|
|
198 for (i=0; i<num_chrom; i++){
|
|
199 for (j=0; j<chromosomes[i]->lw_cnt; j++){
|
|
200 gc_index = chromosomes[i]->lw[j].gc * (float) GC_BIN;
|
|
201 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
|
|
202 if (MULTGC)
|
|
203 new_depth = gclookup[i] * chromosomes[i]->lw[j].depth;
|
|
204 else
|
|
205 new_depth = chromosomes[i]->lw[j].depth - (gclookup[gc_index] - LW_MEAN);
|
|
206 }
|
|
207 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
|
|
208 if (MULTGC)
|
|
209 new_depth = gclookup_x[i] * chromosomes[i]->lw[j].depth;
|
|
210 else
|
|
211 new_depth = chromosomes[i]->lw[j].depth - (gclookup_x[gc_index] - LW_MEAN_X);
|
|
212 }
|
|
213
|
|
214 if (new_depth < 0) new_depth = 0;
|
|
215
|
|
216 chromosomes[i]->lw[j].depth = new_depth;
|
|
217 }
|
|
218 }
|
|
219 break;
|
|
220
|
|
221 case SW:
|
|
222 for (i=0; i<num_chrom; i++){
|
|
223 for (j=0; j<chromosomes[i]->sw_cnt; j++){
|
|
224 gc_index = chromosomes[i]->sw[j].gc * (float) GC_BIN;
|
|
225 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
|
|
226 if (MULTGC)
|
|
227 new_depth = gclookup[i] * chromosomes[i]->sw[j].depth;
|
|
228 else
|
|
229 new_depth = chromosomes[i]->sw[j].depth - (gclookup[gc_index] - SW_MEAN);
|
|
230 }
|
|
231 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
|
|
232 if (MULTGC)
|
|
233 new_depth = gclookup_x[i] * chromosomes[i]->sw[j].depth;
|
|
234 else
|
|
235 new_depth = chromosomes[i]->sw[j].depth - (gclookup_x[gc_index] - SW_MEAN_X);
|
|
236 }
|
|
237
|
|
238 if (new_depth < 0) new_depth = 0;
|
|
239 chromosomes[i]->sw[j].depth = new_depth;
|
|
240 }
|
|
241 }
|
|
242 break;
|
|
243
|
|
244 case CW:
|
|
245 for (i=0; i<num_chrom; i++){
|
|
246 for (j=0; j<chromosomes[i]->cw_cnt; j++){
|
|
247 gc_index = chromosomes[i]->cw[j].gc * (float) GC_BIN;
|
|
248 if ((!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) || GENDER == FEMALE){
|
|
249 if (MULTGC)
|
|
250 new_depth = gclookup[i] * chromosomes[i]->cw[j].depth;
|
|
251 else
|
|
252 new_depth = chromosomes[i]->cw[j].depth - (gclookup[gc_index] - CW_MEAN);
|
|
253 }
|
|
254 else if ((strstr(chromosomes[i]->name, "chrX") || strstr(chromosomes[i]->name, "chrY")) && GENDER == MALE){
|
|
255 if (MULTGC)
|
|
256 new_depth = gclookup_x[i] * chromosomes[i]->cw[j].depth;
|
|
257 else
|
|
258 new_depth = chromosomes[i]->cw[j].depth - (gclookup_x[gc_index] - CW_MEAN_X);
|
|
259 }
|
|
260
|
|
261 if (new_depth < 0) new_depth = 0;
|
|
262
|
|
263 chromosomes[i]->cw[j].depth = new_depth;
|
|
264 }
|
|
265 }
|
|
266 break;
|
|
267 }
|
|
268
|
|
269
|
|
270
|
|
271 calc_stat(wt, gclookup, gclookup_x, 0, max, max_x, min, min_x);
|
|
272
|
|
273 }
|
|
274
|
|
275
|
|
276 void calc_stat(enum WINDOWTYPE wt, float *gclookup, float *gclookup_x, char doClean, float *_max, float *_max_x, float *_min, float *_min_x){
|
|
277 int i, j, k;
|
|
278
|
|
279 float lw_var;
|
|
280 float sw_var;
|
|
281 float cw_var;
|
|
282
|
|
283 float lw_total;
|
|
284 float sw_total;
|
|
285 float cw_total;
|
|
286
|
|
287 float lw_var_x;
|
|
288 float sw_var_x;
|
|
289 float cw_var_x;
|
|
290
|
|
291 float lw_total_x;
|
|
292 float sw_total_x;
|
|
293 float cw_total_x;
|
|
294
|
|
295 int win_cnt;
|
|
296 float max;
|
|
297
|
|
298 int win_cnt_x;
|
|
299 float max_x;
|
|
300
|
|
301 float min;
|
|
302 float min_x;
|
|
303
|
|
304 int gc_index;
|
|
305
|
|
306 float gc_total[GC_BIN]; // total depth by GC
|
|
307 int gc_wincount[GC_BIN]; // count of windows with the given GC content
|
|
308
|
|
309 float gc_total_x[GC_BIN]; // total depth by GC
|
|
310 int gc_wincount_x[GC_BIN]; // count of windows with the given GC content
|
|
311
|
|
312 float MEAN;
|
|
313 float MEAN_X;
|
|
314
|
|
315
|
|
316 float maxcut, mincut;
|
|
317 float maxcut_x, mincut_x;
|
|
318
|
|
319 lw_var = 0.0;
|
|
320 sw_var = 0.0;
|
|
321 cw_var = 0.0;
|
|
322
|
|
323 lw_total = 0.0;
|
|
324 sw_total = 0.0;
|
|
325 cw_total = 0.0;
|
|
326
|
|
327 win_cnt = 0;
|
|
328
|
|
329 lw_var_x = 0.0;
|
|
330 sw_var_x = 0.0;
|
|
331 cw_var_x = 0.0;
|
|
332
|
|
333 lw_total_x = 0.0;
|
|
334 sw_total_x = 0.0;
|
|
335 cw_total_x = 0.0;
|
|
336
|
|
337 win_cnt_x = 0;
|
|
338
|
|
339 for (i=0; i<GC_BIN; i++){
|
|
340 gc_total[i] = 0.0;
|
|
341 gclookup[i] = 0.0;
|
|
342 gc_wincount[i] = 0;
|
|
343
|
|
344 gc_total_x[i] = 0.0;
|
|
345 gclookup_x[i] = 0.0;
|
|
346 gc_wincount_x[i] = 0;
|
|
347 }
|
|
348
|
|
349 max = 0.0;
|
|
350 max_x = 0.0;
|
|
351
|
|
352 min = FLT_MAX;
|
|
353 min_x = FLT_MAX;
|
|
354
|
|
355 /*
|
|
356
|
|
357 NOTE:
|
|
358
|
|
359 As of November 11, 2010, only CW implementation is reliable.
|
|
360
|
|
361
|
|
362 */
|
|
363
|
|
364 switch (wt){
|
|
365 case LW:
|
|
366
|
|
367 if (doClean){
|
|
368
|
|
369 for (i=0; i<num_chrom; i++){
|
|
370 for (j=0; j<chromosomes[i]->lw_cnt; j++){
|
|
371
|
|
372 if (chromosomes[i]->lw[j].isControl == 1){
|
|
373
|
|
374 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
375 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
|
|
376
|
|
377 maxcut = LW_MEAN + 2.5 * LW_STD;
|
|
378 mincut = LW_MEAN - 2.5 * LW_STD;
|
|
379 if (mincut < LW_MEAN / 10.0) mincut = LW_MEAN / 10.0;
|
|
380
|
|
381 if (maxcut - LW_MEAN > LW_MEAN - mincut)
|
|
382 maxcut = LW_MEAN + (LW_MEAN - mincut);
|
|
383
|
|
384 if (chromosomes[i]->lw[j].depth > maxcut || chromosomes[i]->lw[j].depth < mincut){
|
|
385
|
|
386 /* Remove this window and its neighbors from controls */
|
|
387 chromosomes[i]->lw[j].isControl = 0;
|
|
388
|
|
389 k = j;
|
|
390
|
|
391 while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){
|
|
392 chromosomes[i]->lw[--k].isControl = 0;
|
|
393 }
|
|
394
|
|
395 k = j;
|
|
396
|
|
397 while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){
|
|
398 chromosomes[i]->lw[++k].isControl = 0;
|
|
399 }
|
|
400
|
|
401 }
|
|
402
|
|
403 else{
|
|
404 lw_total += chromosomes[i]->lw[j].depth;
|
|
405 win_cnt++;
|
|
406 }
|
|
407 } // if AUTOSOME
|
|
408
|
|
409
|
|
410 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
411 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
|
|
412
|
|
413 maxcut_x = LW_MEAN_X + 2 * LW_STD_X;
|
|
414 mincut_x = LW_MEAN_X - 2 * LW_STD_X;
|
|
415 if (mincut_x < LW_MEAN_X / 10.0) mincut_x = LW_MEAN_X / 10.0;
|
|
416
|
|
417 if (chromosomes[i]->lw[j].depth > maxcut_x || chromosomes[i]->lw[j].depth < mincut_x){
|
|
418
|
|
419 /* Remove this window and its neighbors from controls */
|
|
420 chromosomes[i]->lw[j].isControl = 0;
|
|
421
|
|
422 k = j;
|
|
423
|
|
424 while (k > 0 && chromosomes[i]->lw[k-1].end >= chromosomes[i]->lw[j].start){
|
|
425 chromosomes[i]->lw[--k].isControl = 0;
|
|
426 }
|
|
427
|
|
428 k = j;
|
|
429
|
|
430 while (k < chromosomes[i]->lw_cnt-1 && chromosomes[i]->lw[k+1].start <= chromosomes[i]->lw[j].end){
|
|
431 chromosomes[i]->lw[++k].isControl = 0;
|
|
432 }
|
|
433 }
|
|
434
|
|
435 else{
|
|
436 lw_total_x += chromosomes[i]->lw[j].depth;
|
|
437 win_cnt_x++;
|
|
438 }
|
|
439 } // if chrX
|
|
440
|
|
441 }
|
|
442
|
|
443 }
|
|
444 }
|
|
445
|
|
446 LW_MEAN_X = lw_total_x / win_cnt_x;
|
|
447 LW_MEAN = lw_total / win_cnt;
|
|
448
|
|
449 } // do clean
|
|
450
|
|
451 lw_total = 0.0;
|
|
452 win_cnt = 0;
|
|
453 lw_total_x = 0.0;
|
|
454 win_cnt_x = 0;
|
|
455
|
|
456 for (i=0; i<num_chrom; i++){
|
|
457 for (j=0; j<chromosomes[i]->lw_cnt; j++){
|
|
458
|
|
459 if (chromosomes[i]->lw[j].isControl == 1){
|
|
460
|
|
461 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
462 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
|
|
463
|
|
464 lw_var += (LW_MEAN - chromosomes[i]->lw[j].depth) * (LW_MEAN - chromosomes[i]->lw[j].depth);
|
|
465 win_cnt++;
|
|
466
|
|
467 lw_total += chromosomes[i]->lw[j].depth;
|
|
468 gc_index = chromosomes[i]->lw[j].gc * GC_BIN;
|
|
469 gc_total[gc_index] += chromosomes[i]->lw[j].depth;
|
|
470 gc_wincount[gc_index]++;
|
|
471
|
|
472 if (chromosomes[i]->lw[j].depth > max)
|
|
473 max = chromosomes[i]->lw[j].depth;
|
|
474 if (chromosomes[i]->lw[j].depth < min)
|
|
475 min = chromosomes[i]->lw[j].depth;
|
|
476
|
|
477 } // AUTOSOMES
|
|
478
|
|
479 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
480 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
|
|
481
|
|
482 lw_var_x += (LW_MEAN_X - chromosomes[i]->lw[j].depth) * (LW_MEAN_X - chromosomes[i]->lw[j].depth);
|
|
483 win_cnt_x++;
|
|
484
|
|
485 lw_total_x += chromosomes[i]->lw[j].depth;
|
|
486 gc_index = chromosomes[i]->lw[j].gc * GC_BIN;
|
|
487 gc_total_x[gc_index] += chromosomes[i]->lw[j].depth;
|
|
488 gc_wincount_x[gc_index]++;
|
|
489
|
|
490 if (chromosomes[i]->lw[j].depth > max_x)
|
|
491 max_x = chromosomes[i]->lw[j].depth;
|
|
492 if (chromosomes[i]->lw[j].depth < min_x)
|
|
493 min_x = chromosomes[i]->lw[j].depth;
|
|
494
|
|
495 } // chrX
|
|
496
|
|
497
|
|
498 } // if control
|
|
499 } // outer for
|
|
500 } // if (!doClean)
|
|
501
|
|
502 LW_MEAN_X = lw_total_x / win_cnt_x;
|
|
503 LW_STD_X = sqrt(lw_var_x / win_cnt_x);
|
|
504
|
|
505 LW_MEAN = lw_total / win_cnt;
|
|
506 LW_STD = sqrt(lw_var / win_cnt);
|
|
507
|
|
508 MEAN = LW_MEAN;
|
|
509 MEAN_X = LW_MEAN_X;
|
|
510
|
|
511 break;
|
|
512
|
|
513 case SW:
|
|
514
|
|
515
|
|
516 if (doClean){
|
|
517 for (i=0; i<num_chrom; i++){
|
|
518 for (j=0; j<chromosomes[i]->sw_cnt; j++){
|
|
519
|
|
520 if (chromosomes[i]->sw[j].isControl == 1){
|
|
521
|
|
522 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
523 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
|
|
524
|
|
525 maxcut = SW_MEAN + 2.5 * SW_STD;
|
|
526 mincut = SW_MEAN - 2.5 * SW_STD;
|
|
527 if (mincut < SW_MEAN / 10.0) mincut = SW_MEAN / 10.0;
|
|
528
|
|
529 if (maxcut - SW_MEAN > SW_MEAN - mincut)
|
|
530 maxcut = SW_MEAN + (SW_MEAN - mincut);
|
|
531
|
|
532 if (chromosomes[i]->sw[j].depth > maxcut || chromosomes[i]->sw[j].depth < mincut){
|
|
533
|
|
534 /* Remove this window and its neighbors from controls */
|
|
535 chromosomes[i]->sw[j].isControl = 0;
|
|
536
|
|
537 k = j;
|
|
538
|
|
539 while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){
|
|
540 chromosomes[i]->sw[--k].isControl = 0;
|
|
541 }
|
|
542
|
|
543 k = j;
|
|
544
|
|
545 while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){
|
|
546 chromosomes[i]->sw[++k].isControl = 0;
|
|
547 }
|
|
548
|
|
549 }
|
|
550
|
|
551 else{
|
|
552 sw_total += chromosomes[i]->sw[j].depth;
|
|
553 win_cnt++;
|
|
554 }
|
|
555 } // if AUTOSOME
|
|
556
|
|
557
|
|
558 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
559 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
|
|
560
|
|
561 maxcut_x = SW_MEAN_X + 2 * SW_STD_X;
|
|
562 mincut_x = SW_MEAN_X - 2 * SW_STD_X;
|
|
563 if (mincut_x < SW_MEAN_X / 10.0) mincut_x = SW_MEAN_X / 10.0;
|
|
564
|
|
565 if (chromosomes[i]->sw[j].depth > maxcut_x || chromosomes[i]->sw[j].depth < mincut_x){
|
|
566
|
|
567 /* Remove this window and its neighbors from controls */
|
|
568 chromosomes[i]->sw[j].isControl = 0;
|
|
569
|
|
570 k = j;
|
|
571
|
|
572 while (k > 0 && chromosomes[i]->sw[k-1].end >= chromosomes[i]->sw[j].start){
|
|
573 chromosomes[i]->sw[--k].isControl = 0;
|
|
574 }
|
|
575
|
|
576 k = j;
|
|
577
|
|
578 while (k < chromosomes[i]->sw_cnt-1 && chromosomes[i]->sw[k+1].start <= chromosomes[i]->sw[j].end){
|
|
579 chromosomes[i]->sw[++k].isControl = 0;
|
|
580 }
|
|
581 }
|
|
582
|
|
583 else{
|
|
584 sw_total_x += chromosomes[i]->sw[j].depth;
|
|
585 win_cnt_x++;
|
|
586 }
|
|
587 } // if chrX
|
|
588
|
|
589 }
|
|
590
|
|
591 }
|
|
592 }
|
|
593
|
|
594 SW_MEAN_X = sw_total_x / win_cnt_x;
|
|
595 SW_MEAN = sw_total / win_cnt;
|
|
596
|
|
597 } // do clean
|
|
598
|
|
599 sw_total = 0.0;
|
|
600 win_cnt = 0;
|
|
601 sw_total_x = 0.0;
|
|
602 win_cnt_x = 0;
|
|
603
|
|
604 for (i=0; i<num_chrom; i++){
|
|
605 for (j=0; j<chromosomes[i]->sw_cnt; j++){
|
|
606
|
|
607 if (chromosomes[i]->sw[j].isControl == 1){
|
|
608
|
|
609 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
610 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
|
|
611
|
|
612 sw_var += (SW_MEAN - chromosomes[i]->sw[j].depth) * (SW_MEAN - chromosomes[i]->sw[j].depth);
|
|
613 win_cnt++;
|
|
614
|
|
615 sw_total += chromosomes[i]->sw[j].depth;
|
|
616 gc_index = chromosomes[i]->sw[j].gc * GC_BIN;
|
|
617 gc_total[gc_index] += chromosomes[i]->sw[j].depth;
|
|
618 gc_wincount[gc_index]++;
|
|
619
|
|
620 if (chromosomes[i]->sw[j].depth > max)
|
|
621 max = chromosomes[i]->sw[j].depth;
|
|
622 if (chromosomes[i]->sw[j].depth < min)
|
|
623 min = chromosomes[i]->sw[j].depth;
|
|
624
|
|
625 } // AUTOSOMES
|
|
626
|
|
627 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
628 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
|
|
629
|
|
630 sw_var_x += (SW_MEAN_X - chromosomes[i]->sw[j].depth) * (SW_MEAN_X - chromosomes[i]->sw[j].depth);
|
|
631 win_cnt_x++;
|
|
632
|
|
633 sw_total_x += chromosomes[i]->sw[j].depth;
|
|
634 gc_index = chromosomes[i]->sw[j].gc * GC_BIN;
|
|
635 gc_total_x[gc_index] += chromosomes[i]->sw[j].depth;
|
|
636 gc_wincount_x[gc_index]++;
|
|
637
|
|
638 if (chromosomes[i]->sw[j].depth > max_x)
|
|
639 max_x = chromosomes[i]->sw[j].depth;
|
|
640 if (chromosomes[i]->sw[j].depth < min_x)
|
|
641 min_x = chromosomes[i]->sw[j].depth;
|
|
642
|
|
643 } // chrX
|
|
644
|
|
645
|
|
646 } // if control
|
|
647 } // outer for
|
|
648 } // if (!doClean)
|
|
649
|
|
650 SW_MEAN_X = sw_total_x / win_cnt_x;
|
|
651 SW_STD_X = sqrt(sw_var_x / win_cnt_x);
|
|
652
|
|
653 SW_MEAN = sw_total / win_cnt;
|
|
654 SW_STD = sqrt(sw_var / win_cnt);
|
|
655
|
|
656 MEAN = SW_MEAN;
|
|
657 MEAN_X = SW_MEAN_X;
|
|
658
|
|
659
|
|
660 break;
|
|
661
|
|
662 case CW:
|
|
663
|
|
664 if (doClean){
|
|
665 for (i=0; i<num_chrom; i++){
|
|
666 for (j=0; j<chromosomes[i]->cw_cnt; j++){
|
|
667
|
|
668
|
|
669 if (chromosomes[i]->cw[j].isControl == 1){
|
|
670
|
|
671 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
672 if (!strstr(chromosomes[i]->name, "chrX") || GENDER == FEMALE){
|
|
673
|
|
674 maxcut = CW_MEAN + 2.5 * CW_STD;
|
|
675 mincut = CW_MEAN - 2.5 * CW_STD;
|
|
676 if (mincut < CW_MEAN / 10.0) mincut = CW_MEAN / 10.0;
|
|
677
|
|
678 if (maxcut - CW_MEAN > CW_MEAN - mincut)
|
|
679 maxcut = CW_MEAN + (CW_MEAN - mincut);
|
|
680
|
|
681 if (chromosomes[i]->cw[j].depth > maxcut || chromosomes[i]->cw[j].depth < mincut){
|
|
682
|
|
683 /* Remove this window and its neighbors from controls */
|
|
684 chromosomes[i]->cw[j].isControl = 0;
|
|
685
|
|
686 if (j > 0)
|
|
687 chromosomes[i]->cw[j-1].isControl = 0;
|
|
688 if (j < chromosomes[i]->cw_cnt-1)
|
|
689 chromosomes[i]->cw[j+1].isControl = 0;
|
|
690 }
|
|
691 else{
|
|
692 cw_total += chromosomes[i]->cw[j].depth;
|
|
693 win_cnt++;
|
|
694 }
|
|
695 } // if AUTOSOME
|
|
696
|
|
697
|
|
698 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
699 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
|
|
700
|
|
701 maxcut_x = CW_MEAN_X + 2 * CW_STD_X;
|
|
702 mincut_x = CW_MEAN_X - 2 * CW_STD_X;
|
|
703 if (mincut_x < CW_MEAN_X / 10.0) mincut_x = CW_MEAN_X / 10.0;
|
|
704
|
|
705 if (chromosomes[i]->cw[j].depth > maxcut_x || chromosomes[i]->cw[j].depth < mincut_x){
|
|
706
|
|
707 /* Remove this window and its neighbors from controls */
|
|
708 chromosomes[i]->cw[j].isControl = 0;
|
|
709
|
|
710 if (j > 0)
|
|
711 chromosomes[i]->cw[j-1].isControl = 0;
|
|
712 if (j < chromosomes[i]->cw_cnt-1)
|
|
713 chromosomes[i]->cw[j+1].isControl = 0;
|
|
714 }
|
|
715 else{
|
|
716 cw_total_x += chromosomes[i]->cw[j].depth;
|
|
717 win_cnt_x++;
|
|
718 }
|
|
719 } // if chrX
|
|
720
|
|
721 }
|
|
722
|
|
723 }
|
|
724 }
|
|
725
|
|
726
|
|
727 CW_MEAN_X = cw_total_x / win_cnt_x;
|
|
728
|
|
729 CW_MEAN = cw_total / win_cnt;
|
|
730
|
|
731 } // do clean
|
|
732
|
|
733 cw_total = 0.0;
|
|
734 win_cnt = 0;
|
|
735 cw_total_x = 0.0;
|
|
736 win_cnt_x = 0;
|
|
737
|
|
738 for (i=0; i<num_chrom; i++){
|
|
739 for (j=0; j<chromosomes[i]->cw_cnt; j++){
|
|
740
|
|
741 if (chromosomes[i]->cw[j].isControl == 1){
|
|
742
|
|
743 /* AUTOSOMES -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
744 if (!strstr(chromosomes[i]->name, "chrX") || GENDER==FEMALE){
|
|
745
|
|
746 cw_var += (CW_MEAN - chromosomes[i]->cw[j].depth) * (CW_MEAN - chromosomes[i]->cw[j].depth);
|
|
747 win_cnt++;
|
|
748
|
|
749 cw_total += chromosomes[i]->cw[j].depth;
|
|
750 gc_index = chromosomes[i]->cw[j].gc * GC_BIN;
|
|
751 gc_total[gc_index] += chromosomes[i]->cw[j].depth;
|
|
752 gc_wincount[gc_index]++;
|
|
753
|
|
754 if (chromosomes[i]->cw[j].depth > max)
|
|
755 max = chromosomes[i]->cw[j].depth;
|
|
756 if (chromosomes[i]->cw[j].depth < min)
|
|
757 min = chromosomes[i]->cw[j].depth;
|
|
758
|
|
759 } // AUTOSOMES
|
|
760
|
|
761 /* chrX -- NOTE: chrY is NOT a control; it is prefiltered, no need to check for it here */
|
|
762 else if (strstr(chromosomes[i]->name, "chrX") && GENDER != FEMALE){
|
|
763
|
|
764 cw_var_x += (CW_MEAN_X - chromosomes[i]->cw[j].depth) * (CW_MEAN_X - chromosomes[i]->cw[j].depth);
|
|
765 win_cnt_x++;
|
|
766
|
|
767 cw_total_x += chromosomes[i]->cw[j].depth;
|
|
768 gc_index = chromosomes[i]->cw[j].gc * GC_BIN;
|
|
769 gc_total_x[gc_index] += chromosomes[i]->cw[j].depth;
|
|
770 gc_wincount_x[gc_index]++;
|
|
771
|
|
772 if (chromosomes[i]->cw[j].depth > max_x)
|
|
773 max_x = chromosomes[i]->cw[j].depth;
|
|
774 if (chromosomes[i]->cw[j].depth < min_x)
|
|
775 min_x = chromosomes[i]->cw[j].depth;
|
|
776
|
|
777 } // chrX
|
|
778
|
|
779
|
|
780 } // if control
|
|
781 } // outer for
|
|
782 } // if (!doClean)
|
|
783
|
|
784
|
|
785 CW_MEAN_X = cw_total_x / win_cnt_x;
|
|
786 CW_STD_X = sqrt(cw_var_x / win_cnt_x);
|
|
787
|
|
788 CW_MEAN = cw_total / win_cnt;
|
|
789 CW_STD = sqrt(cw_var / win_cnt);
|
|
790
|
|
791 MEAN = CW_MEAN;
|
|
792 MEAN_X = CW_MEAN_X;
|
|
793
|
|
794 break;
|
|
795 }
|
|
796
|
|
797
|
|
798 /* calculate the gclookup table */
|
|
799
|
|
800 for (i=0; i<GC_BIN; i++){
|
|
801
|
|
802 /* AUTOSOMES */
|
|
803
|
|
804 if (gc_wincount[i] == 0 || gc_total[i] == 0){
|
|
805 j=i-1;
|
|
806
|
|
807 while (j >= 0 && gc_total[j] == 0)
|
|
808 j--;
|
|
809
|
|
810 if (gc_total[j] == 0 || gc_wincount[j] == 0){
|
|
811 j=i+1;
|
|
812 while (j < GC_BIN && gc_total[j] == 0) j++;
|
|
813 }
|
|
814
|
|
815 gc_total[i] = gc_total[j];
|
|
816 gc_wincount[i] = gc_wincount[j];
|
|
817 }
|
|
818
|
|
819 if (gc_total[i] != 0.0){
|
|
820 gclookup[i] = gc_total[i] / (float) gc_wincount[i];
|
|
821
|
|
822 if (MULTGC){
|
|
823
|
|
824 /*
|
|
825 if (VERBOSE && i > 200)
|
|
826 fprintf(stdout, "GC\%: %f\tlookup: %f\tnow: ", ((float)i / 10.0), gclookup[i]);
|
|
827 */
|
|
828
|
|
829 gclookup[i] = MEAN / gclookup[i];
|
|
830
|
|
831 /*
|
|
832 if (VERBOSE && i > 200)
|
|
833 fprintf(stdout, "%f\n", gclookup[i]);
|
|
834 */
|
|
835
|
|
836 if (gclookup[i] > MAX_GC_CORR)
|
|
837 gclookup[i] = MAX_GC_CORR;
|
|
838 else if (gclookup[i] < MIN_GC_CORR)
|
|
839 gclookup[i] = MIN_GC_CORR;
|
|
840 }
|
|
841
|
|
842 /*
|
|
843 if (!MULTGC && VERBOSE && i > 200)
|
|
844 fprintf(stdout, "GC\%: %f\tlookup: %f\n", ((float)i / 10.0), gclookup[i]);
|
|
845 */
|
|
846
|
|
847 }
|
|
848
|
|
849
|
|
850
|
|
851 /* chrX */
|
|
852
|
|
853 if (GENDER != FEMALE){
|
|
854
|
|
855 if (gc_wincount_x[i] == 0 || gc_total_x[i] == 0){
|
|
856 j=i-1;
|
|
857
|
|
858 while (j >= 0 && gc_total_x[j] == 0)
|
|
859 j--;
|
|
860
|
|
861 if (gc_total_x[j] == 0 || gc_wincount_x[j] == 0){
|
|
862 j=i+1;
|
|
863 while (j < GC_BIN && gc_total_x[j] == 0) j++;
|
|
864 }
|
|
865
|
|
866 gc_total_x[i] = gc_total_x[j];
|
|
867 gc_wincount_x[i] = gc_wincount_x[j];
|
|
868 }
|
|
869
|
|
870 if (gc_total_x[i] != 0.0){
|
|
871 gclookup_x[i] = gc_total_x[i] / (float) gc_wincount_x[i];
|
|
872
|
|
873 if (MULTGC){
|
|
874 gclookup_x[i] = MEAN_X / gclookup_x[i];
|
|
875 if (gclookup_x[i] > MAX_GC_CORR)
|
|
876 gclookup_x[i] = MAX_GC_CORR;
|
|
877 else if (gclookup_x[i] < MIN_GC_CORR)
|
|
878 gclookup_x[i] = MIN_GC_CORR;
|
|
879 }
|
|
880
|
|
881 }
|
|
882
|
|
883 }
|
|
884
|
|
885
|
|
886 }
|
|
887
|
|
888
|
|
889 *_max = max;
|
|
890 *_max_x = max_x;
|
|
891
|
|
892 *_min = min;
|
|
893 *_min_x = min_x;
|
|
894
|
|
895
|
|
896 }
|
|
897
|
|
898
|
|
899
|
|
900
|
|
901
|