Mercurial > repos > calkan > mrcanavar
comparison mrcanavar-0.34/utils.c @ 0:86522a0b5f59 default tip
Uploaded source code for mrCaNaVaR
| author | calkan |
|---|---|
| date | Tue, 21 Feb 2012 10:44:13 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:86522a0b5f59 |
|---|---|
| 1 | |
| 2 #include "utils.h" | |
| 3 | |
| 4 | |
| 5 | |
| 6 void print_error(char *msg){ | |
| 7 fprintf(stderr, "\nmrCaNaVaR version %s.\nLast update: %s.\n", VERSION, LAST_UPDATE); | |
| 8 fprintf(stderr, "\n%s\n", msg); | |
| 9 fprintf(stderr, "Invoke parameter -h for help.\n"); | |
| 10 exit (0); | |
| 11 } | |
| 12 | |
| 13 void set_runmode(enum MODETYPE NEWMODE){ | |
| 14 if (RUNMODE != NONE && RUNMODE != NEWMODE){ | |
| 15 print_error("Select one run mode only.\n"); | |
| 16 } | |
| 17 RUNMODE = NEWMODE; | |
| 18 } | |
| 19 | |
| 20 void set_gender(enum GENDERTYPE SET){ | |
| 21 if (GENDER != AUTODETECT && RUNMODE != SET){ | |
| 22 print_error("Select only one of --xx or --xy.\n"); | |
| 23 } | |
| 24 GENDER= SET; | |
| 25 } | |
| 26 | |
| 27 void set_str(char **target, char *source){ | |
| 28 | |
| 29 if (*target != NULL) free((*target)); | |
| 30 | |
| 31 (*target) = (char *) malloc(sizeof(char) * (strlen(source)+1)); | |
| 32 | |
| 33 strncpy((*target), source, (strlen(source)+1)); | |
| 34 | |
| 35 } | |
| 36 | |
| 37 | |
| 38 void init_globals(void){ | |
| 39 GENOME_FASTA = NULL; | |
| 40 GENOME_CONF = NULL; | |
| 41 GENOME_GAPS = NULL; | |
| 42 RUNMODE = NONE; | |
| 43 GENDER = AUTODETECT; | |
| 44 MULTGC = 0; | |
| 45 MAX_GC_CORR = 20.0; | |
| 46 MIN_GC_CORR = 0.05; | |
| 47 VERBOSE = 0; | |
| 48 | |
| 49 num_chrom = 0; | |
| 50 | |
| 51 CHECKSAM = 1; | |
| 52 | |
| 53 /* set window size defaults */ | |
| 54 | |
| 55 LW_SIZE = 5000; | |
| 56 SW_SIZE = 1000; | |
| 57 CW_SIZE = 1000; | |
| 58 LW_SLIDE = 1000; | |
| 59 SW_SLIDE = 1000; | |
| 60 | |
| 61 LW_MEAN = 0.0; | |
| 62 SW_MEAN = 0.0; | |
| 63 CW_MEAN = 0.0; | |
| 64 | |
| 65 LW_STD = 0.0; | |
| 66 SW_STD = 0.0; | |
| 67 CW_STD = 0.0; | |
| 68 | |
| 69 CONT_WINDOW = 7; | |
| 70 CUT_WINDOW = 6; | |
| 71 | |
| 72 } | |
| 73 | |
| 74 | |
| 75 FILE *my_fopen(char *fname, char *mode, char GZ){ | |
| 76 FILE *fp; | |
| 77 | |
| 78 | |
| 79 if (!GZ) | |
| 80 fp = fopen(fname, mode); | |
| 81 else | |
| 82 fp = gzopen(fname, mode); | |
| 83 | |
| 84 if (fp == NULL){ | |
| 85 fprintf(stderr, "Unable to open file %s in %s mode.", fname, mode[0]=='w' ? "write" : "read"); | |
| 86 exit (0); | |
| 87 } | |
| 88 | |
| 89 return fp; | |
| 90 } | |
| 91 | |
| 92 | |
| 93 void saveRefConfig(char *configFile){ | |
| 94 | |
| 95 int i; | |
| 96 int wcnt; | |
| 97 char chrom_name_len; | |
| 98 int retVal; | |
| 99 FILE *config; | |
| 100 | |
| 101 config = my_fopen(configFile, "w", 0); | |
| 102 | |
| 103 /* sort the chromosomes pointer array */ | |
| 104 | |
| 105 qsort(chromosomes, num_chrom, sizeof (struct chrom *), compChr); | |
| 106 | |
| 107 | |
| 108 /* start with the magicNum, I will use this as a format check when loading */ | |
| 109 retVal = fwrite(&magicNum, sizeof(magicNum), 1, config); | |
| 110 | |
| 111 /* window sizes / slides */ | |
| 112 | |
| 113 retVal = fwrite(&LW_SIZE, sizeof(LW_SIZE), 1, config); | |
| 114 retVal = fwrite(&SW_SIZE, sizeof(SW_SIZE), 1, config); | |
| 115 retVal = fwrite(&CW_SIZE, sizeof(CW_SIZE), 1, config); | |
| 116 retVal = fwrite(&LW_SLIDE, sizeof(LW_SLIDE), 1, config); | |
| 117 retVal = fwrite(&SW_SLIDE, sizeof(SW_SLIDE), 1, config); | |
| 118 | |
| 119 | |
| 120 /* reference genome numbers */ | |
| 121 | |
| 122 /* number of chromosomes */ | |
| 123 retVal = fwrite(&num_chrom, sizeof(num_chrom), 1, config); | |
| 124 | |
| 125 /* iterate through chromosomes, write their names, and window counts */ | |
| 126 | |
| 127 for (i=0; i<num_chrom; i++){ | |
| 128 chrom_name_len = (char) strlen(chromosomes[i]->name); | |
| 129 retVal = fwrite(&chrom_name_len, sizeof(chrom_name_len), 1, config); | |
| 130 retVal = fwrite(chromosomes[i]->name, chrom_name_len * sizeof(char), 1, config); | |
| 131 | |
| 132 retVal = fwrite(&(chromosomes[i]->length), sizeof(chromosomes[i]->length), 1, config); | |
| 133 retVal = fwrite(&(chromosomes[i]->lw_cnt), sizeof(chromosomes[i]->lw_cnt), 1, config); | |
| 134 retVal = fwrite(&(chromosomes[i]->sw_cnt), sizeof(chromosomes[i]->sw_cnt), 1, config); | |
| 135 retVal = fwrite(&(chromosomes[i]->cw_cnt), sizeof(chromosomes[i]->cw_cnt), 1, config); | |
| 136 | |
| 137 /* long windows */ | |
| 138 for (wcnt=0; wcnt<chromosomes[i]->lw_cnt; wcnt++){ | |
| 139 retVal = fwrite(&(chromosomes[i]->lw[wcnt].start), sizeof(chromosomes[i]->lw[wcnt].start), 1, config); | |
| 140 retVal = fwrite(&(chromosomes[i]->lw[wcnt].end), sizeof(chromosomes[i]->lw[wcnt].end), 1, config); | |
| 141 retVal = fwrite(&(chromosomes[i]->lw[wcnt].gc), sizeof(chromosomes[i]->lw[wcnt].gc), 1, config); | |
| 142 } | |
| 143 | |
| 144 /* short windows */ | |
| 145 for (wcnt=0; wcnt<chromosomes[i]->sw_cnt; wcnt++){ | |
| 146 retVal = fwrite(&(chromosomes[i]->sw[wcnt].start), sizeof(chromosomes[i]->sw[wcnt].start), 1, config); | |
| 147 retVal = fwrite(&(chromosomes[i]->sw[wcnt].end), sizeof(chromosomes[i]->sw[wcnt].end), 1, config); | |
| 148 retVal = fwrite(&(chromosomes[i]->sw[wcnt].gc), sizeof(chromosomes[i]->sw[wcnt].gc), 1, config); | |
| 149 } | |
| 150 | |
| 151 /* copy windows */ | |
| 152 for (wcnt=0; wcnt<chromosomes[i]->cw_cnt; wcnt++){ | |
| 153 retVal = fwrite(&(chromosomes[i]->cw[wcnt].start), sizeof(chromosomes[i]->cw[wcnt].start), 1, config); | |
| 154 retVal = fwrite(&(chromosomes[i]->cw[wcnt].end), sizeof(chromosomes[i]->cw[wcnt].end), 1, config); | |
| 155 retVal = fwrite(&(chromosomes[i]->cw[wcnt].gc), sizeof(chromosomes[i]->cw[wcnt].gc), 1, config); | |
| 156 } | |
| 157 | |
| 158 } | |
| 159 | |
| 160 fclose(config); | |
| 161 | |
| 162 } | |
| 163 | |
| 164 | |
| 165 | |
| 166 | |
| 167 void loadRefConfig(char *configFile){ | |
| 168 | |
| 169 int i; | |
| 170 int wcnt; | |
| 171 char chrom_name_len; | |
| 172 char readString[MAX_STR]; | |
| 173 int retVal; | |
| 174 | |
| 175 int isMagicNum; | |
| 176 | |
| 177 FILE *config; | |
| 178 | |
| 179 config = my_fopen(configFile, "r", 0); | |
| 180 | |
| 181 /* start with the magicNum, use this as a format check when loading */ | |
| 182 retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, config); | |
| 183 | |
| 184 if (isMagicNum != magicNum) | |
| 185 print_error("Reference configuration file seems to be invalid or corrupt.\n"); | |
| 186 | |
| 187 | |
| 188 fprintf(stdout, "Loading reference configuration, hold on ... "); | |
| 189 fflush(stdout); | |
| 190 | |
| 191 /* window sizes / slides */ | |
| 192 | |
| 193 retVal = fread(&LW_SIZE, sizeof(LW_SIZE), 1, config); | |
| 194 retVal = fread(&SW_SIZE, sizeof(SW_SIZE), 1, config); | |
| 195 retVal = fread(&CW_SIZE, sizeof(CW_SIZE), 1, config); | |
| 196 retVal = fread(&LW_SLIDE, sizeof(LW_SLIDE), 1, config); | |
| 197 retVal = fread(&SW_SLIDE, sizeof(SW_SLIDE), 1, config); | |
| 198 | |
| 199 | |
| 200 /* reference genome numbers */ | |
| 201 | |
| 202 /* number of chromosomes */ | |
| 203 retVal = fread(&num_chrom, sizeof(num_chrom), 1, config); | |
| 204 | |
| 205 /* create chromosomes data structure */ | |
| 206 | |
| 207 chromosomes = (struct chrom **) malloc(sizeof(struct chrom *) * num_chrom); | |
| 208 for (i=0; i<num_chrom; i++) | |
| 209 chromosomes[i] = (struct chrom *) malloc(sizeof(struct chrom) * num_chrom); | |
| 210 | |
| 211 /* iterate through chromosomes, write their names, and window counts */ | |
| 212 | |
| 213 for (i=0; i<num_chrom; i++){ | |
| 214 | |
| 215 retVal = fread(&chrom_name_len, sizeof(chrom_name_len), 1, config); | |
| 216 | |
| 217 retVal = fread(readString, chrom_name_len * sizeof(char), 1, config); | |
| 218 readString[chrom_name_len] = 0; | |
| 219 trimspace(readString); | |
| 220 | |
| 221 set_str(&(chromosomes[i]->name), readString); | |
| 222 | |
| 223 retVal = fread(&(chromosomes[i]->length), sizeof(chromosomes[i]->length), 1, config); | |
| 224 retVal = fread(&(chromosomes[i]->lw_cnt), sizeof(chromosomes[i]->lw_cnt), 1, config); | |
| 225 retVal = fread(&(chromosomes[i]->sw_cnt), sizeof(chromosomes[i]->sw_cnt), 1, config); | |
| 226 retVal = fread(&(chromosomes[i]->cw_cnt), sizeof(chromosomes[i]->cw_cnt), 1, config); | |
| 227 | |
| 228 /* create windows structures */ | |
| 229 | |
| 230 | |
| 231 chromosomes[i]->lw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->lw_cnt); | |
| 232 chromosomes[i]->sw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->sw_cnt); | |
| 233 chromosomes[i]->cw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->cw_cnt); | |
| 234 | |
| 235 | |
| 236 | |
| 237 /* long windows */ | |
| 238 for (wcnt=0; wcnt<chromosomes[i]->lw_cnt; wcnt++){ | |
| 239 retVal = fread(&(chromosomes[i]->lw[wcnt].start), sizeof(chromosomes[i]->lw[wcnt].start), 1, config); | |
| 240 retVal = fread(&(chromosomes[i]->lw[wcnt].end), sizeof(chromosomes[i]->lw[wcnt].end), 1, config); | |
| 241 retVal = fread(&(chromosomes[i]->lw[wcnt].gc), sizeof(chromosomes[i]->lw[wcnt].gc), 1, config); | |
| 242 // this is the default, auto-control-picker will fix this | |
| 243 chromosomes[i]->lw[wcnt].isControl = 1; | |
| 244 chromosomes[i]->lw[wcnt].depth = 0.0; | |
| 245 } | |
| 246 | |
| 247 /* short windows */ | |
| 248 for (wcnt=0; wcnt<chromosomes[i]->sw_cnt; wcnt++){ | |
| 249 retVal = fread(&(chromosomes[i]->sw[wcnt].start), sizeof(chromosomes[i]->sw[wcnt].start), 1, config); | |
| 250 retVal = fread(&(chromosomes[i]->sw[wcnt].end), sizeof(chromosomes[i]->sw[wcnt].end), 1, config); | |
| 251 retVal = fread(&(chromosomes[i]->sw[wcnt].gc), sizeof(chromosomes[i]->sw[wcnt].gc), 1, config); | |
| 252 chromosomes[i]->sw[wcnt].isControl = 1; | |
| 253 chromosomes[i]->sw[wcnt].depth = 0.0; | |
| 254 } | |
| 255 | |
| 256 /* copy windows */ | |
| 257 for (wcnt=0; wcnt<chromosomes[i]->cw_cnt; wcnt++){ | |
| 258 retVal = fread(&(chromosomes[i]->cw[wcnt].start), sizeof(chromosomes[i]->cw[wcnt].start), 1, config); | |
| 259 retVal = fread(&(chromosomes[i]->cw[wcnt].end), sizeof(chromosomes[i]->cw[wcnt].end), 1, config); | |
| 260 retVal = fread(&(chromosomes[i]->cw[wcnt].gc), sizeof(chromosomes[i]->cw[wcnt].gc), 1, config); | |
| 261 chromosomes[i]->cw[wcnt].isControl = 1; | |
| 262 chromosomes[i]->cw[wcnt].depth = 0.0; | |
| 263 } | |
| 264 | |
| 265 /* fill in the rest of the chromosomes structure */ | |
| 266 | |
| 267 } | |
| 268 | |
| 269 fprintf(stdout, "[OK]. %d chromosomes loaded.\n", num_chrom); | |
| 270 | |
| 271 fclose(config); | |
| 272 } | |
| 273 | |
| 274 | |
| 275 | |
| 276 static int compChr(const void *p1, const void *p2){ | |
| 277 /* compare function to sort the chromosome pointer array */ | |
| 278 struct chrom *a, *b; | |
| 279 | |
| 280 a = *((struct chrom **)p1); | |
| 281 b = *((struct chrom **)p2); | |
| 282 | |
| 283 | |
| 284 return (strcmp ( a->name, b->name) ); | |
| 285 | |
| 286 } | |
| 287 | |
| 288 | |
| 289 int endswith(char *src, char *end){ | |
| 290 | |
| 291 | |
| 292 int endlen = strlen(end); | |
| 293 int srclen = strlen(src); | |
| 294 int copyindex; | |
| 295 | |
| 296 | |
| 297 if (endlen > srclen) | |
| 298 return 0; | |
| 299 | |
| 300 copyindex = srclen - endlen; | |
| 301 | |
| 302 if (memcmp(src+copyindex, end, endlen)) | |
| 303 return 0; | |
| 304 else | |
| 305 return 1; | |
| 306 | |
| 307 | |
| 308 } | |
| 309 | |
| 310 void trimspace(char *str){ | |
| 311 int len; | |
| 312 len = strlen(str) - 1; | |
| 313 while(1){ | |
| 314 if (isspace(str[len])) | |
| 315 str[len--]=0; | |
| 316 else | |
| 317 break; | |
| 318 } | |
| 319 } |
