Mercurial > repos > calkan > mrcanavar
comparison mrcanavar-0.34/utils.c @ 0:86522a0b5f59 default tip
Uploaded source code for mrCaNaVaR
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:44:13 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:86522a0b5f59 |
---|---|
1 | |
2 #include "utils.h" | |
3 | |
4 | |
5 | |
6 void print_error(char *msg){ | |
7 fprintf(stderr, "\nmrCaNaVaR version %s.\nLast update: %s.\n", VERSION, LAST_UPDATE); | |
8 fprintf(stderr, "\n%s\n", msg); | |
9 fprintf(stderr, "Invoke parameter -h for help.\n"); | |
10 exit (0); | |
11 } | |
12 | |
13 void set_runmode(enum MODETYPE NEWMODE){ | |
14 if (RUNMODE != NONE && RUNMODE != NEWMODE){ | |
15 print_error("Select one run mode only.\n"); | |
16 } | |
17 RUNMODE = NEWMODE; | |
18 } | |
19 | |
20 void set_gender(enum GENDERTYPE SET){ | |
21 if (GENDER != AUTODETECT && RUNMODE != SET){ | |
22 print_error("Select only one of --xx or --xy.\n"); | |
23 } | |
24 GENDER= SET; | |
25 } | |
26 | |
27 void set_str(char **target, char *source){ | |
28 | |
29 if (*target != NULL) free((*target)); | |
30 | |
31 (*target) = (char *) malloc(sizeof(char) * (strlen(source)+1)); | |
32 | |
33 strncpy((*target), source, (strlen(source)+1)); | |
34 | |
35 } | |
36 | |
37 | |
38 void init_globals(void){ | |
39 GENOME_FASTA = NULL; | |
40 GENOME_CONF = NULL; | |
41 GENOME_GAPS = NULL; | |
42 RUNMODE = NONE; | |
43 GENDER = AUTODETECT; | |
44 MULTGC = 0; | |
45 MAX_GC_CORR = 20.0; | |
46 MIN_GC_CORR = 0.05; | |
47 VERBOSE = 0; | |
48 | |
49 num_chrom = 0; | |
50 | |
51 CHECKSAM = 1; | |
52 | |
53 /* set window size defaults */ | |
54 | |
55 LW_SIZE = 5000; | |
56 SW_SIZE = 1000; | |
57 CW_SIZE = 1000; | |
58 LW_SLIDE = 1000; | |
59 SW_SLIDE = 1000; | |
60 | |
61 LW_MEAN = 0.0; | |
62 SW_MEAN = 0.0; | |
63 CW_MEAN = 0.0; | |
64 | |
65 LW_STD = 0.0; | |
66 SW_STD = 0.0; | |
67 CW_STD = 0.0; | |
68 | |
69 CONT_WINDOW = 7; | |
70 CUT_WINDOW = 6; | |
71 | |
72 } | |
73 | |
74 | |
75 FILE *my_fopen(char *fname, char *mode, char GZ){ | |
76 FILE *fp; | |
77 | |
78 | |
79 if (!GZ) | |
80 fp = fopen(fname, mode); | |
81 else | |
82 fp = gzopen(fname, mode); | |
83 | |
84 if (fp == NULL){ | |
85 fprintf(stderr, "Unable to open file %s in %s mode.", fname, mode[0]=='w' ? "write" : "read"); | |
86 exit (0); | |
87 } | |
88 | |
89 return fp; | |
90 } | |
91 | |
92 | |
93 void saveRefConfig(char *configFile){ | |
94 | |
95 int i; | |
96 int wcnt; | |
97 char chrom_name_len; | |
98 int retVal; | |
99 FILE *config; | |
100 | |
101 config = my_fopen(configFile, "w", 0); | |
102 | |
103 /* sort the chromosomes pointer array */ | |
104 | |
105 qsort(chromosomes, num_chrom, sizeof (struct chrom *), compChr); | |
106 | |
107 | |
108 /* start with the magicNum, I will use this as a format check when loading */ | |
109 retVal = fwrite(&magicNum, sizeof(magicNum), 1, config); | |
110 | |
111 /* window sizes / slides */ | |
112 | |
113 retVal = fwrite(&LW_SIZE, sizeof(LW_SIZE), 1, config); | |
114 retVal = fwrite(&SW_SIZE, sizeof(SW_SIZE), 1, config); | |
115 retVal = fwrite(&CW_SIZE, sizeof(CW_SIZE), 1, config); | |
116 retVal = fwrite(&LW_SLIDE, sizeof(LW_SLIDE), 1, config); | |
117 retVal = fwrite(&SW_SLIDE, sizeof(SW_SLIDE), 1, config); | |
118 | |
119 | |
120 /* reference genome numbers */ | |
121 | |
122 /* number of chromosomes */ | |
123 retVal = fwrite(&num_chrom, sizeof(num_chrom), 1, config); | |
124 | |
125 /* iterate through chromosomes, write their names, and window counts */ | |
126 | |
127 for (i=0; i<num_chrom; i++){ | |
128 chrom_name_len = (char) strlen(chromosomes[i]->name); | |
129 retVal = fwrite(&chrom_name_len, sizeof(chrom_name_len), 1, config); | |
130 retVal = fwrite(chromosomes[i]->name, chrom_name_len * sizeof(char), 1, config); | |
131 | |
132 retVal = fwrite(&(chromosomes[i]->length), sizeof(chromosomes[i]->length), 1, config); | |
133 retVal = fwrite(&(chromosomes[i]->lw_cnt), sizeof(chromosomes[i]->lw_cnt), 1, config); | |
134 retVal = fwrite(&(chromosomes[i]->sw_cnt), sizeof(chromosomes[i]->sw_cnt), 1, config); | |
135 retVal = fwrite(&(chromosomes[i]->cw_cnt), sizeof(chromosomes[i]->cw_cnt), 1, config); | |
136 | |
137 /* long windows */ | |
138 for (wcnt=0; wcnt<chromosomes[i]->lw_cnt; wcnt++){ | |
139 retVal = fwrite(&(chromosomes[i]->lw[wcnt].start), sizeof(chromosomes[i]->lw[wcnt].start), 1, config); | |
140 retVal = fwrite(&(chromosomes[i]->lw[wcnt].end), sizeof(chromosomes[i]->lw[wcnt].end), 1, config); | |
141 retVal = fwrite(&(chromosomes[i]->lw[wcnt].gc), sizeof(chromosomes[i]->lw[wcnt].gc), 1, config); | |
142 } | |
143 | |
144 /* short windows */ | |
145 for (wcnt=0; wcnt<chromosomes[i]->sw_cnt; wcnt++){ | |
146 retVal = fwrite(&(chromosomes[i]->sw[wcnt].start), sizeof(chromosomes[i]->sw[wcnt].start), 1, config); | |
147 retVal = fwrite(&(chromosomes[i]->sw[wcnt].end), sizeof(chromosomes[i]->sw[wcnt].end), 1, config); | |
148 retVal = fwrite(&(chromosomes[i]->sw[wcnt].gc), sizeof(chromosomes[i]->sw[wcnt].gc), 1, config); | |
149 } | |
150 | |
151 /* copy windows */ | |
152 for (wcnt=0; wcnt<chromosomes[i]->cw_cnt; wcnt++){ | |
153 retVal = fwrite(&(chromosomes[i]->cw[wcnt].start), sizeof(chromosomes[i]->cw[wcnt].start), 1, config); | |
154 retVal = fwrite(&(chromosomes[i]->cw[wcnt].end), sizeof(chromosomes[i]->cw[wcnt].end), 1, config); | |
155 retVal = fwrite(&(chromosomes[i]->cw[wcnt].gc), sizeof(chromosomes[i]->cw[wcnt].gc), 1, config); | |
156 } | |
157 | |
158 } | |
159 | |
160 fclose(config); | |
161 | |
162 } | |
163 | |
164 | |
165 | |
166 | |
167 void loadRefConfig(char *configFile){ | |
168 | |
169 int i; | |
170 int wcnt; | |
171 char chrom_name_len; | |
172 char readString[MAX_STR]; | |
173 int retVal; | |
174 | |
175 int isMagicNum; | |
176 | |
177 FILE *config; | |
178 | |
179 config = my_fopen(configFile, "r", 0); | |
180 | |
181 /* start with the magicNum, use this as a format check when loading */ | |
182 retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, config); | |
183 | |
184 if (isMagicNum != magicNum) | |
185 print_error("Reference configuration file seems to be invalid or corrupt.\n"); | |
186 | |
187 | |
188 fprintf(stdout, "Loading reference configuration, hold on ... "); | |
189 fflush(stdout); | |
190 | |
191 /* window sizes / slides */ | |
192 | |
193 retVal = fread(&LW_SIZE, sizeof(LW_SIZE), 1, config); | |
194 retVal = fread(&SW_SIZE, sizeof(SW_SIZE), 1, config); | |
195 retVal = fread(&CW_SIZE, sizeof(CW_SIZE), 1, config); | |
196 retVal = fread(&LW_SLIDE, sizeof(LW_SLIDE), 1, config); | |
197 retVal = fread(&SW_SLIDE, sizeof(SW_SLIDE), 1, config); | |
198 | |
199 | |
200 /* reference genome numbers */ | |
201 | |
202 /* number of chromosomes */ | |
203 retVal = fread(&num_chrom, sizeof(num_chrom), 1, config); | |
204 | |
205 /* create chromosomes data structure */ | |
206 | |
207 chromosomes = (struct chrom **) malloc(sizeof(struct chrom *) * num_chrom); | |
208 for (i=0; i<num_chrom; i++) | |
209 chromosomes[i] = (struct chrom *) malloc(sizeof(struct chrom) * num_chrom); | |
210 | |
211 /* iterate through chromosomes, write their names, and window counts */ | |
212 | |
213 for (i=0; i<num_chrom; i++){ | |
214 | |
215 retVal = fread(&chrom_name_len, sizeof(chrom_name_len), 1, config); | |
216 | |
217 retVal = fread(readString, chrom_name_len * sizeof(char), 1, config); | |
218 readString[chrom_name_len] = 0; | |
219 trimspace(readString); | |
220 | |
221 set_str(&(chromosomes[i]->name), readString); | |
222 | |
223 retVal = fread(&(chromosomes[i]->length), sizeof(chromosomes[i]->length), 1, config); | |
224 retVal = fread(&(chromosomes[i]->lw_cnt), sizeof(chromosomes[i]->lw_cnt), 1, config); | |
225 retVal = fread(&(chromosomes[i]->sw_cnt), sizeof(chromosomes[i]->sw_cnt), 1, config); | |
226 retVal = fread(&(chromosomes[i]->cw_cnt), sizeof(chromosomes[i]->cw_cnt), 1, config); | |
227 | |
228 /* create windows structures */ | |
229 | |
230 | |
231 chromosomes[i]->lw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->lw_cnt); | |
232 chromosomes[i]->sw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->sw_cnt); | |
233 chromosomes[i]->cw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->cw_cnt); | |
234 | |
235 | |
236 | |
237 /* long windows */ | |
238 for (wcnt=0; wcnt<chromosomes[i]->lw_cnt; wcnt++){ | |
239 retVal = fread(&(chromosomes[i]->lw[wcnt].start), sizeof(chromosomes[i]->lw[wcnt].start), 1, config); | |
240 retVal = fread(&(chromosomes[i]->lw[wcnt].end), sizeof(chromosomes[i]->lw[wcnt].end), 1, config); | |
241 retVal = fread(&(chromosomes[i]->lw[wcnt].gc), sizeof(chromosomes[i]->lw[wcnt].gc), 1, config); | |
242 // this is the default, auto-control-picker will fix this | |
243 chromosomes[i]->lw[wcnt].isControl = 1; | |
244 chromosomes[i]->lw[wcnt].depth = 0.0; | |
245 } | |
246 | |
247 /* short windows */ | |
248 for (wcnt=0; wcnt<chromosomes[i]->sw_cnt; wcnt++){ | |
249 retVal = fread(&(chromosomes[i]->sw[wcnt].start), sizeof(chromosomes[i]->sw[wcnt].start), 1, config); | |
250 retVal = fread(&(chromosomes[i]->sw[wcnt].end), sizeof(chromosomes[i]->sw[wcnt].end), 1, config); | |
251 retVal = fread(&(chromosomes[i]->sw[wcnt].gc), sizeof(chromosomes[i]->sw[wcnt].gc), 1, config); | |
252 chromosomes[i]->sw[wcnt].isControl = 1; | |
253 chromosomes[i]->sw[wcnt].depth = 0.0; | |
254 } | |
255 | |
256 /* copy windows */ | |
257 for (wcnt=0; wcnt<chromosomes[i]->cw_cnt; wcnt++){ | |
258 retVal = fread(&(chromosomes[i]->cw[wcnt].start), sizeof(chromosomes[i]->cw[wcnt].start), 1, config); | |
259 retVal = fread(&(chromosomes[i]->cw[wcnt].end), sizeof(chromosomes[i]->cw[wcnt].end), 1, config); | |
260 retVal = fread(&(chromosomes[i]->cw[wcnt].gc), sizeof(chromosomes[i]->cw[wcnt].gc), 1, config); | |
261 chromosomes[i]->cw[wcnt].isControl = 1; | |
262 chromosomes[i]->cw[wcnt].depth = 0.0; | |
263 } | |
264 | |
265 /* fill in the rest of the chromosomes structure */ | |
266 | |
267 } | |
268 | |
269 fprintf(stdout, "[OK]. %d chromosomes loaded.\n", num_chrom); | |
270 | |
271 fclose(config); | |
272 } | |
273 | |
274 | |
275 | |
276 static int compChr(const void *p1, const void *p2){ | |
277 /* compare function to sort the chromosome pointer array */ | |
278 struct chrom *a, *b; | |
279 | |
280 a = *((struct chrom **)p1); | |
281 b = *((struct chrom **)p2); | |
282 | |
283 | |
284 return (strcmp ( a->name, b->name) ); | |
285 | |
286 } | |
287 | |
288 | |
289 int endswith(char *src, char *end){ | |
290 | |
291 | |
292 int endlen = strlen(end); | |
293 int srclen = strlen(src); | |
294 int copyindex; | |
295 | |
296 | |
297 if (endlen > srclen) | |
298 return 0; | |
299 | |
300 copyindex = srclen - endlen; | |
301 | |
302 if (memcmp(src+copyindex, end, endlen)) | |
303 return 0; | |
304 else | |
305 return 1; | |
306 | |
307 | |
308 } | |
309 | |
310 void trimspace(char *str){ | |
311 int len; | |
312 len = strlen(str) - 1; | |
313 while(1){ | |
314 if (isspace(str[len])) | |
315 str[len--]=0; | |
316 else | |
317 break; | |
318 } | |
319 } |