Mercurial > repos > calkan > mrcanavar
comparison mrcanavar-0.34/callcnv.c @ 0:86522a0b5f59 default tip
Uploaded source code for mrCaNaVaR
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:44:13 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:86522a0b5f59 |
---|---|
1 #include "callcnv.h" | |
2 | |
3 void call_cnv(char *depthFile, char *out_prefix){ | |
4 | |
5 int i, j; | |
6 | |
7 float *gclookup; | |
8 float *gclookup_x; | |
9 | |
10 char *fname; | |
11 char logname[2 * MAX_STR]; | |
12 FILE *log; | |
13 | |
14 if (GENOME_CONF == NULL) | |
15 print_error("Select genome configuration file (input) through the -conf parameter.\n"); | |
16 if (depthFile == NULL) | |
17 print_error("Select read depth output file through the -depth parameter.\n"); | |
18 if (out_prefix == NULL) | |
19 print_error("Select output file prefix through the -o parameter.\n"); | |
20 | |
21 | |
22 | |
23 loadRefConfig(GENOME_CONF); | |
24 | |
25 readDepth(depthFile); | |
26 | |
27 | |
28 sprintf(logname, "%s.log", out_prefix); | |
29 log = my_fopen(logname, "w", 0); | |
30 | |
31 /* trivial control removal */ | |
32 | |
33 | |
34 fprintf(stdout, "Control region cleanup..."); | |
35 fflush(stdout); | |
36 | |
37 /* add stdev calculation here */ | |
38 | |
39 | |
40 for (i=0; i<num_chrom; i++){ | |
41 for (j=0; j<chromosomes[i]->lw_cnt; j++){ | |
42 if (chromosomes[i]->lw[j].depth > (float) LW_MEAN * 2.0 || chromosomes[i]->lw[j].depth < (float) LW_MEAN / 10.0) | |
43 chromosomes[i]->lw[j].isControl = 0; | |
44 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn")) | |
45 chromosomes[i]->lw[j].isControl = 0; | |
46 } | |
47 for (j=0; j<chromosomes[i]->sw_cnt; j++){ | |
48 if (chromosomes[i]->sw[j].depth > (float) SW_MEAN * 2.0 || chromosomes[i]->sw[j].depth < (float) SW_MEAN / 10.0) | |
49 chromosomes[i]->sw[j].isControl = 0; | |
50 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn")) | |
51 chromosomes[i]->sw[j].isControl = 0; | |
52 } | |
53 | |
54 for (j=0; j<chromosomes[i]->cw_cnt; j++){ | |
55 if (chromosomes[i]->cw[j].depth > (float) CW_MEAN * 2.0 || chromosomes[i]->cw[j].depth < (float) CW_MEAN / 10.0) | |
56 chromosomes[i]->cw[j].isControl = 0; | |
57 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn")) | |
58 chromosomes[i]->cw[j].isControl = 0; | |
59 } | |
60 } | |
61 | |
62 | |
63 | |
64 fprintf(stdout, "\n"); | |
65 | |
66 gclookup = (float *) malloc(sizeof(float) * GC_BIN); | |
67 gclookup_x = (float *) malloc(sizeof(float) * GC_BIN); | |
68 | |
69 fprintf (log, "\nmrCaNaVaR version %s\nLast update: %s\n\n", VERSION, LAST_UPDATE); | |
70 fprintf (log, "Calculating library %s\n", out_prefix); | |
71 fprintf (log, "GC correction mode: %s\n", MULTGC == 1 ? "MULTIPLICATIVE" : "ADDITIVE"); | |
72 | |
73 norm_until_converges(CW, gclookup, gclookup_x); | |
74 | |
75 fprintf (log, "\nAfter GC Correction:\n--------------------\n"); | |
76 fprintf (log, "Sample Gender: %s.\n", GENDER == MALE ? "Male" : "Female"); | |
77 fprintf (log, "CW Average Read Depth: %f, Standard Deviation: %f\n", CW_MEAN, CW_STD); | |
78 | |
79 | |
80 if (GENDER == MALE) | |
81 fprintf (log, "CW Average chrX Read Depth: %f, Standard Deviation: %f\n", CW_MEAN_X, CW_STD_X); | |
82 | |
83 norm_until_converges(LW, gclookup, gclookup_x); | |
84 fprintf (log, "LW Average Read Depth: %f, Standard Deviation: %f\n", LW_MEAN, LW_STD); | |
85 if (GENDER == MALE) | |
86 fprintf (log, "LW Average chrX Read Depth: %f, Standard Deviation: %f\n", LW_MEAN_X, LW_STD_X); | |
87 | |
88 norm_until_converges(SW, gclookup, gclookup_x); | |
89 fprintf (log, "SW Average Read Depth: %f, Standard Deviation: %f\n", SW_MEAN, SW_STD); | |
90 if (GENDER == MALE) | |
91 fprintf (log, "SW Average chrX Read Depth: %f, Standard Deviation: %f\n", SW_MEAN_X, SW_STD_X); | |
92 | |
93 | |
94 fprintf (stdout, "Writing normalized CW depth to: %s.cw_norm.bed.\n", out_prefix); | |
95 | |
96 fname = (char *) malloc(sizeof (char) * (strlen(out_prefix) + strlen(".cw_norm.bed") + 1)); | |
97 | |
98 sprintf (fname, "%s.cw_norm.bed", out_prefix); | |
99 dump_text_windows(fname, CW); | |
100 | |
101 fprintf (stdout, "Writing normalized LW depth to: %s.lw_norm.bed.\n", out_prefix); | |
102 | |
103 sprintf (fname, "%s.lw_norm.bed", out_prefix); | |
104 dump_text_windows(fname, LW); | |
105 | |
106 fprintf (stdout, "Writing normalized SW depth to: %s.sw_norm.bed.\n", out_prefix); | |
107 | |
108 sprintf (fname, "%s.sw_norm.bed", out_prefix); | |
109 dump_text_windows(fname, SW); | |
110 | |
111 sprintf (fname, "%s.copynumber.bed", out_prefix); | |
112 fprintf (stdout, "Writing copy numbers to: %s.copynumber.bed. \n", out_prefix); | |
113 print_copy_numbers(fname); | |
114 | |
115 free(fname); | |
116 | |
117 free(gclookup); | |
118 free(gclookup_x); | |
119 fclose(log); | |
120 | |
121 } | |
122 | |
123 | |
124 | |
125 void readDepth(char *depthFile){ | |
126 | |
127 FILE *binDepth; | |
128 int i, j; | |
129 int retVal; | |
130 | |
131 float lw_total; | |
132 float sw_total; | |
133 float cw_total; | |
134 | |
135 int lw_cnt; | |
136 int sw_cnt; | |
137 int cw_cnt; | |
138 | |
139 int isMagicNum; | |
140 | |
141 | |
142 binDepth = my_fopen(depthFile, "r", 0); | |
143 | |
144 retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, binDepth); | |
145 | |
146 if (isMagicNum != magicNum) | |
147 print_error("Read depth file seems to be invalid or corrupt.\n"); | |
148 | |
149 | |
150 lw_total = 0.0; | |
151 sw_total = 0.0; | |
152 cw_total = 0.0; | |
153 | |
154 lw_cnt = 0; | |
155 sw_cnt = 0; | |
156 cw_cnt = 0; | |
157 | |
158 /* read LW */ | |
159 | |
160 for (i = 0; i < num_chrom; i++){ | |
161 for (j = 0; j < chromosomes[i]->lw_cnt; j++){ | |
162 retVal = fread(&(chromosomes[i]->lw[j].depth), sizeof(chromosomes[i]->lw[j].depth), 1, binDepth); | |
163 lw_total += chromosomes[i]->lw[j].depth; | |
164 chromosomes[i]->lw[j].isControl = 1; | |
165 lw_cnt++; | |
166 } | |
167 } | |
168 | |
169 /* read SW */ | |
170 | |
171 for (i = 0; i < num_chrom; i++){ | |
172 for (j = 0; j < chromosomes[i]->sw_cnt; j++){ | |
173 retVal = fread(&(chromosomes[i]->sw[j].depth), sizeof(chromosomes[i]->sw[j].depth), 1, binDepth); | |
174 sw_total += chromosomes[i]->sw[j].depth; | |
175 chromosomes[i]->sw[j].isControl = 1; | |
176 sw_cnt++; | |
177 } | |
178 } | |
179 | |
180 /* read CW */ | |
181 | |
182 for (i = 0; i < num_chrom; i++){ | |
183 for (j = 0; j < chromosomes[i]->cw_cnt; j++){ | |
184 retVal = fread(&(chromosomes[i]->cw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth); | |
185 cw_total += chromosomes[i]->cw[j].depth; | |
186 chromosomes[i]->cw[j].isControl = 1; | |
187 cw_cnt++; | |
188 } | |
189 } | |
190 | |
191 LW_MEAN = lw_total / lw_cnt; | |
192 SW_MEAN = sw_total / sw_cnt; | |
193 CW_MEAN = cw_total / cw_cnt; | |
194 | |
195 fprintf(stdout, "[OK] depth file %s is loaded.\n", depthFile); | |
196 if (VERBOSE) | |
197 fprintf(stdout, "LW_MEAN: %f\tSW_MEAN: %f\tCW_MEAN:%f\n", LW_MEAN, SW_MEAN, CW_MEAN); | |
198 | |
199 fclose(binDepth); | |
200 } | |
201 | |
202 | |
203 void dump_text_windows(char *fname, enum WINDOWTYPE wt){ | |
204 | |
205 FILE *txtDepth; | |
206 int i, j; | |
207 | |
208 txtDepth = my_fopen(fname, "w", 0); | |
209 | |
210 fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "READ_DEPTH", "IS_CONTROL"); | |
211 | |
212 switch (wt){ | |
213 case LW: | |
214 for (i = 0; i < num_chrom; i++){ | |
215 for (j = 0; j < chromosomes[i]->lw_cnt; j++) | |
216 if (chromosomes[i]->lw[j].isControl == 1) | |
217 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth); | |
218 else | |
219 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth); | |
220 } | |
221 break; | |
222 | |
223 case SW: | |
224 for (i = 0; i < num_chrom; i++){ | |
225 for (j = 0; j < chromosomes[i]->sw_cnt; j++) | |
226 if (chromosomes[i]->sw[j].isControl == 1) | |
227 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth); | |
228 else | |
229 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth); | |
230 } | |
231 break; | |
232 | |
233 case CW: | |
234 for (i = 0; i < num_chrom; i++){ | |
235 for (j = 0; j < chromosomes[i]->cw_cnt; j++) | |
236 if (chromosomes[i]->cw[j].isControl == 1) | |
237 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth); | |
238 else | |
239 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth); | |
240 } | |
241 break; | |
242 } | |
243 | |
244 fclose(txtDepth); | |
245 | |
246 } | |
247 | |
248 | |
249 | |
250 void print_copy_numbers(char *fname){ | |
251 /* UNFINISHED */ | |
252 | |
253 FILE *txtDepth; | |
254 int i, j; | |
255 float copy_num; | |
256 | |
257 txtDepth = my_fopen(fname, "w", 0); | |
258 | |
259 fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "COPYNUMBER"); | |
260 | |
261 for (i = 0; i < num_chrom; i++){ | |
262 for (j = 0; j < chromosomes[i]->cw_cnt; j++){ | |
263 if (GENDER == FEMALE) | |
264 copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2; | |
265 else{ | |
266 if (!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY")) | |
267 copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2; | |
268 else | |
269 copy_num = chromosomes[i]->cw[j].depth / CW_MEAN_X; | |
270 } | |
271 | |
272 if (GENDER == FEMALE && (strstr(chromosomes[i]->name, "chrY"))) | |
273 continue; | |
274 | |
275 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, copy_num); | |
276 } | |
277 } | |
278 | |
279 | |
280 fclose(txtDepth); | |
281 | |
282 | |
283 } | |
284 | |
285 | |
286 |