0
|
1 #include "callcnv.h"
|
|
2
|
|
3 void call_cnv(char *depthFile, char *out_prefix){
|
|
4
|
|
5 int i, j;
|
|
6
|
|
7 float *gclookup;
|
|
8 float *gclookup_x;
|
|
9
|
|
10 char *fname;
|
|
11 char logname[2 * MAX_STR];
|
|
12 FILE *log;
|
|
13
|
|
14 if (GENOME_CONF == NULL)
|
|
15 print_error("Select genome configuration file (input) through the -conf parameter.\n");
|
|
16 if (depthFile == NULL)
|
|
17 print_error("Select read depth output file through the -depth parameter.\n");
|
|
18 if (out_prefix == NULL)
|
|
19 print_error("Select output file prefix through the -o parameter.\n");
|
|
20
|
|
21
|
|
22
|
|
23 loadRefConfig(GENOME_CONF);
|
|
24
|
|
25 readDepth(depthFile);
|
|
26
|
|
27
|
|
28 sprintf(logname, "%s.log", out_prefix);
|
|
29 log = my_fopen(logname, "w", 0);
|
|
30
|
|
31 /* trivial control removal */
|
|
32
|
|
33
|
|
34 fprintf(stdout, "Control region cleanup...");
|
|
35 fflush(stdout);
|
|
36
|
|
37 /* add stdev calculation here */
|
|
38
|
|
39
|
|
40 for (i=0; i<num_chrom; i++){
|
|
41 for (j=0; j<chromosomes[i]->lw_cnt; j++){
|
|
42 if (chromosomes[i]->lw[j].depth > (float) LW_MEAN * 2.0 || chromosomes[i]->lw[j].depth < (float) LW_MEAN / 10.0)
|
|
43 chromosomes[i]->lw[j].isControl = 0;
|
|
44 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
|
|
45 chromosomes[i]->lw[j].isControl = 0;
|
|
46 }
|
|
47 for (j=0; j<chromosomes[i]->sw_cnt; j++){
|
|
48 if (chromosomes[i]->sw[j].depth > (float) SW_MEAN * 2.0 || chromosomes[i]->sw[j].depth < (float) SW_MEAN / 10.0)
|
|
49 chromosomes[i]->sw[j].isControl = 0;
|
|
50 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
|
|
51 chromosomes[i]->sw[j].isControl = 0;
|
|
52 }
|
|
53
|
|
54 for (j=0; j<chromosomes[i]->cw_cnt; j++){
|
|
55 if (chromosomes[i]->cw[j].depth > (float) CW_MEAN * 2.0 || chromosomes[i]->cw[j].depth < (float) CW_MEAN / 10.0)
|
|
56 chromosomes[i]->cw[j].isControl = 0;
|
|
57 if (strstr(chromosomes[i]->name, "random") || strstr(chromosomes[i]->name, "chrY") || strstr(chromosomes[i]->name, "hap") || strstr(chromosomes[i]->name, "chrUn"))
|
|
58 chromosomes[i]->cw[j].isControl = 0;
|
|
59 }
|
|
60 }
|
|
61
|
|
62
|
|
63
|
|
64 fprintf(stdout, "\n");
|
|
65
|
|
66 gclookup = (float *) malloc(sizeof(float) * GC_BIN);
|
|
67 gclookup_x = (float *) malloc(sizeof(float) * GC_BIN);
|
|
68
|
|
69 fprintf (log, "\nmrCaNaVaR version %s\nLast update: %s\n\n", VERSION, LAST_UPDATE);
|
|
70 fprintf (log, "Calculating library %s\n", out_prefix);
|
|
71 fprintf (log, "GC correction mode: %s\n", MULTGC == 1 ? "MULTIPLICATIVE" : "ADDITIVE");
|
|
72
|
|
73 norm_until_converges(CW, gclookup, gclookup_x);
|
|
74
|
|
75 fprintf (log, "\nAfter GC Correction:\n--------------------\n");
|
|
76 fprintf (log, "Sample Gender: %s.\n", GENDER == MALE ? "Male" : "Female");
|
|
77 fprintf (log, "CW Average Read Depth: %f, Standard Deviation: %f\n", CW_MEAN, CW_STD);
|
|
78
|
|
79
|
|
80 if (GENDER == MALE)
|
|
81 fprintf (log, "CW Average chrX Read Depth: %f, Standard Deviation: %f\n", CW_MEAN_X, CW_STD_X);
|
|
82
|
|
83 norm_until_converges(LW, gclookup, gclookup_x);
|
|
84 fprintf (log, "LW Average Read Depth: %f, Standard Deviation: %f\n", LW_MEAN, LW_STD);
|
|
85 if (GENDER == MALE)
|
|
86 fprintf (log, "LW Average chrX Read Depth: %f, Standard Deviation: %f\n", LW_MEAN_X, LW_STD_X);
|
|
87
|
|
88 norm_until_converges(SW, gclookup, gclookup_x);
|
|
89 fprintf (log, "SW Average Read Depth: %f, Standard Deviation: %f\n", SW_MEAN, SW_STD);
|
|
90 if (GENDER == MALE)
|
|
91 fprintf (log, "SW Average chrX Read Depth: %f, Standard Deviation: %f\n", SW_MEAN_X, SW_STD_X);
|
|
92
|
|
93
|
|
94 fprintf (stdout, "Writing normalized CW depth to: %s.cw_norm.bed.\n", out_prefix);
|
|
95
|
|
96 fname = (char *) malloc(sizeof (char) * (strlen(out_prefix) + strlen(".cw_norm.bed") + 1));
|
|
97
|
|
98 sprintf (fname, "%s.cw_norm.bed", out_prefix);
|
|
99 dump_text_windows(fname, CW);
|
|
100
|
|
101 fprintf (stdout, "Writing normalized LW depth to: %s.lw_norm.bed.\n", out_prefix);
|
|
102
|
|
103 sprintf (fname, "%s.lw_norm.bed", out_prefix);
|
|
104 dump_text_windows(fname, LW);
|
|
105
|
|
106 fprintf (stdout, "Writing normalized SW depth to: %s.sw_norm.bed.\n", out_prefix);
|
|
107
|
|
108 sprintf (fname, "%s.sw_norm.bed", out_prefix);
|
|
109 dump_text_windows(fname, SW);
|
|
110
|
|
111 sprintf (fname, "%s.copynumber.bed", out_prefix);
|
|
112 fprintf (stdout, "Writing copy numbers to: %s.copynumber.bed. \n", out_prefix);
|
|
113 print_copy_numbers(fname);
|
|
114
|
|
115 free(fname);
|
|
116
|
|
117 free(gclookup);
|
|
118 free(gclookup_x);
|
|
119 fclose(log);
|
|
120
|
|
121 }
|
|
122
|
|
123
|
|
124
|
|
125 void readDepth(char *depthFile){
|
|
126
|
|
127 FILE *binDepth;
|
|
128 int i, j;
|
|
129 int retVal;
|
|
130
|
|
131 float lw_total;
|
|
132 float sw_total;
|
|
133 float cw_total;
|
|
134
|
|
135 int lw_cnt;
|
|
136 int sw_cnt;
|
|
137 int cw_cnt;
|
|
138
|
|
139 int isMagicNum;
|
|
140
|
|
141
|
|
142 binDepth = my_fopen(depthFile, "r", 0);
|
|
143
|
|
144 retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, binDepth);
|
|
145
|
|
146 if (isMagicNum != magicNum)
|
|
147 print_error("Read depth file seems to be invalid or corrupt.\n");
|
|
148
|
|
149
|
|
150 lw_total = 0.0;
|
|
151 sw_total = 0.0;
|
|
152 cw_total = 0.0;
|
|
153
|
|
154 lw_cnt = 0;
|
|
155 sw_cnt = 0;
|
|
156 cw_cnt = 0;
|
|
157
|
|
158 /* read LW */
|
|
159
|
|
160 for (i = 0; i < num_chrom; i++){
|
|
161 for (j = 0; j < chromosomes[i]->lw_cnt; j++){
|
|
162 retVal = fread(&(chromosomes[i]->lw[j].depth), sizeof(chromosomes[i]->lw[j].depth), 1, binDepth);
|
|
163 lw_total += chromosomes[i]->lw[j].depth;
|
|
164 chromosomes[i]->lw[j].isControl = 1;
|
|
165 lw_cnt++;
|
|
166 }
|
|
167 }
|
|
168
|
|
169 /* read SW */
|
|
170
|
|
171 for (i = 0; i < num_chrom; i++){
|
|
172 for (j = 0; j < chromosomes[i]->sw_cnt; j++){
|
|
173 retVal = fread(&(chromosomes[i]->sw[j].depth), sizeof(chromosomes[i]->sw[j].depth), 1, binDepth);
|
|
174 sw_total += chromosomes[i]->sw[j].depth;
|
|
175 chromosomes[i]->sw[j].isControl = 1;
|
|
176 sw_cnt++;
|
|
177 }
|
|
178 }
|
|
179
|
|
180 /* read CW */
|
|
181
|
|
182 for (i = 0; i < num_chrom; i++){
|
|
183 for (j = 0; j < chromosomes[i]->cw_cnt; j++){
|
|
184 retVal = fread(&(chromosomes[i]->cw[j].depth), sizeof(chromosomes[i]->cw[j].depth), 1, binDepth);
|
|
185 cw_total += chromosomes[i]->cw[j].depth;
|
|
186 chromosomes[i]->cw[j].isControl = 1;
|
|
187 cw_cnt++;
|
|
188 }
|
|
189 }
|
|
190
|
|
191 LW_MEAN = lw_total / lw_cnt;
|
|
192 SW_MEAN = sw_total / sw_cnt;
|
|
193 CW_MEAN = cw_total / cw_cnt;
|
|
194
|
|
195 fprintf(stdout, "[OK] depth file %s is loaded.\n", depthFile);
|
|
196 if (VERBOSE)
|
|
197 fprintf(stdout, "LW_MEAN: %f\tSW_MEAN: %f\tCW_MEAN:%f\n", LW_MEAN, SW_MEAN, CW_MEAN);
|
|
198
|
|
199 fclose(binDepth);
|
|
200 }
|
|
201
|
|
202
|
|
203 void dump_text_windows(char *fname, enum WINDOWTYPE wt){
|
|
204
|
|
205 FILE *txtDepth;
|
|
206 int i, j;
|
|
207
|
|
208 txtDepth = my_fopen(fname, "w", 0);
|
|
209
|
|
210 fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "READ_DEPTH", "IS_CONTROL");
|
|
211
|
|
212 switch (wt){
|
|
213 case LW:
|
|
214 for (i = 0; i < num_chrom; i++){
|
|
215 for (j = 0; j < chromosomes[i]->lw_cnt; j++)
|
|
216 if (chromosomes[i]->lw[j].isControl == 1)
|
|
217 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth);
|
|
218 else
|
|
219 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->lw[j].start, chromosomes[i]->lw[j].end, chromosomes[i]->lw[j].gc, chromosomes[i]->lw[j].depth);
|
|
220 }
|
|
221 break;
|
|
222
|
|
223 case SW:
|
|
224 for (i = 0; i < num_chrom; i++){
|
|
225 for (j = 0; j < chromosomes[i]->sw_cnt; j++)
|
|
226 if (chromosomes[i]->sw[j].isControl == 1)
|
|
227 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth);
|
|
228 else
|
|
229 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->sw[j].start, chromosomes[i]->sw[j].end, chromosomes[i]->sw[j].gc, chromosomes[i]->sw[j].depth);
|
|
230 }
|
|
231 break;
|
|
232
|
|
233 case CW:
|
|
234 for (i = 0; i < num_chrom; i++){
|
|
235 for (j = 0; j < chromosomes[i]->cw_cnt; j++)
|
|
236 if (chromosomes[i]->cw[j].isControl == 1)
|
|
237 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tY\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth);
|
|
238 else
|
|
239 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\tN\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, chromosomes[i]->cw[j].depth);
|
|
240 }
|
|
241 break;
|
|
242 }
|
|
243
|
|
244 fclose(txtDepth);
|
|
245
|
|
246 }
|
|
247
|
|
248
|
|
249
|
|
250 void print_copy_numbers(char *fname){
|
|
251 /* UNFINISHED */
|
|
252
|
|
253 FILE *txtDepth;
|
|
254 int i, j;
|
|
255 float copy_num;
|
|
256
|
|
257 txtDepth = my_fopen(fname, "w", 0);
|
|
258
|
|
259 fprintf(txtDepth, "#%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "GC\%", "COPYNUMBER");
|
|
260
|
|
261 for (i = 0; i < num_chrom; i++){
|
|
262 for (j = 0; j < chromosomes[i]->cw_cnt; j++){
|
|
263 if (GENDER == FEMALE)
|
|
264 copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2;
|
|
265 else{
|
|
266 if (!strstr(chromosomes[i]->name, "chrX") && !strstr(chromosomes[i]->name, "chrY"))
|
|
267 copy_num = (chromosomes[i]->cw[j].depth / CW_MEAN) * 2;
|
|
268 else
|
|
269 copy_num = chromosomes[i]->cw[j].depth / CW_MEAN_X;
|
|
270 }
|
|
271
|
|
272 if (GENDER == FEMALE && (strstr(chromosomes[i]->name, "chrY")))
|
|
273 continue;
|
|
274
|
|
275 fprintf(txtDepth, "%s\t%d\t%d\t%f\t%f\n", chromosomes[i]->name, chromosomes[i]->cw[j].start, chromosomes[i]->cw[j].end, chromosomes[i]->cw[j].gc, copy_num);
|
|
276 }
|
|
277 }
|
|
278
|
|
279
|
|
280 fclose(txtDepth);
|
|
281
|
|
282
|
|
283 }
|
|
284
|
|
285
|
|
286
|