Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/samtools-0.1.16/sam_header.c @ 0:acc2ca1a3ba4
Uploaded
| author | siyuan |
|---|---|
| date | Thu, 20 Feb 2014 00:44:58 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:acc2ca1a3ba4 |
|---|---|
| 1 #include "sam_header.h" | |
| 2 #include <stdio.h> | |
| 3 #include <string.h> | |
| 4 #include <ctype.h> | |
| 5 #include <stdlib.h> | |
| 6 #include <stdarg.h> | |
| 7 | |
| 8 #include "khash.h" | |
| 9 KHASH_MAP_INIT_STR(str, const char *) | |
| 10 | |
| 11 struct _HeaderList | |
| 12 { | |
| 13 struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only. | |
| 14 struct _HeaderList *next; | |
| 15 void *data; | |
| 16 }; | |
| 17 typedef struct _HeaderList list_t; | |
| 18 typedef list_t HeaderDict; | |
| 19 | |
| 20 typedef struct | |
| 21 { | |
| 22 char key[2]; | |
| 23 char *value; | |
| 24 } | |
| 25 HeaderTag; | |
| 26 | |
| 27 typedef struct | |
| 28 { | |
| 29 char type[2]; | |
| 30 list_t *tags; | |
| 31 } | |
| 32 HeaderLine; | |
| 33 | |
| 34 const char *o_hd_tags[] = {"SO","GO",NULL}; | |
| 35 const char *r_hd_tags[] = {"VN",NULL}; | |
| 36 | |
| 37 const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL}; | |
| 38 const char *r_sq_tags[] = {"SN","LN",NULL}; | |
| 39 const char *u_sq_tags[] = {"SN",NULL}; | |
| 40 | |
| 41 const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL}; | |
| 42 const char *r_rg_tags[] = {"ID",NULL}; | |
| 43 const char *u_rg_tags[] = {"ID",NULL}; | |
| 44 | |
| 45 const char *o_pg_tags[] = {"VN","CL",NULL}; | |
| 46 const char *r_pg_tags[] = {"ID",NULL}; | |
| 47 | |
| 48 const char *types[] = {"HD","SQ","RG","PG","CO",NULL}; | |
| 49 const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL}; | |
| 50 const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL}; | |
| 51 const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL}; | |
| 52 | |
| 53 | |
| 54 static void debug(const char *format, ...) | |
| 55 { | |
| 56 va_list ap; | |
| 57 va_start(ap, format); | |
| 58 vfprintf(stderr, format, ap); | |
| 59 va_end(ap); | |
| 60 } | |
| 61 | |
| 62 #if 0 | |
| 63 // Replaced by list_append_to_end | |
| 64 static list_t *list_prepend(list_t *root, void *data) | |
| 65 { | |
| 66 list_t *l = malloc(sizeof(list_t)); | |
| 67 l->next = root; | |
| 68 l->data = data; | |
| 69 return l; | |
| 70 } | |
| 71 #endif | |
| 72 | |
| 73 // Relies on the root->last being correct. Do not use with the other list_* | |
| 74 // routines unless they are fixed to modify root->last as well. | |
| 75 static list_t *list_append_to_end(list_t *root, void *data) | |
| 76 { | |
| 77 list_t *l = malloc(sizeof(list_t)); | |
| 78 l->last = l; | |
| 79 l->next = NULL; | |
| 80 l->data = data; | |
| 81 | |
| 82 if ( !root ) | |
| 83 return l; | |
| 84 | |
| 85 root->last->next = l; | |
| 86 root->last = l; | |
| 87 return root; | |
| 88 } | |
| 89 | |
| 90 static list_t *list_append(list_t *root, void *data) | |
| 91 { | |
| 92 list_t *l = root; | |
| 93 while (l && l->next) | |
| 94 l = l->next; | |
| 95 if ( l ) | |
| 96 { | |
| 97 l->next = malloc(sizeof(list_t)); | |
| 98 l = l->next; | |
| 99 } | |
| 100 else | |
| 101 { | |
| 102 l = malloc(sizeof(list_t)); | |
| 103 root = l; | |
| 104 } | |
| 105 l->data = data; | |
| 106 l->next = NULL; | |
| 107 return root; | |
| 108 } | |
| 109 | |
| 110 static void list_free(list_t *root) | |
| 111 { | |
| 112 list_t *l = root; | |
| 113 while (root) | |
| 114 { | |
| 115 l = root; | |
| 116 root = root->next; | |
| 117 free(l); | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 | |
| 122 | |
| 123 // Look for a tag "XY" in a predefined const char *[] array. | |
| 124 static int tag_exists(const char *tag, const char **tags) | |
| 125 { | |
| 126 int itag=0; | |
| 127 if ( !tags ) return -1; | |
| 128 while ( tags[itag] ) | |
| 129 { | |
| 130 if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag; | |
| 131 itag++; | |
| 132 } | |
| 133 return -1; | |
| 134 } | |
| 135 | |
| 136 | |
| 137 | |
| 138 // Mimics the behaviour of getline, except it returns pointer to the next chunk of the text | |
| 139 // or NULL if everything has been read. The lineptr should be freed by the caller. The | |
| 140 // newline character is stripped. | |
| 141 static const char *nextline(char **lineptr, size_t *n, const char *text) | |
| 142 { | |
| 143 int len; | |
| 144 const char *to = text; | |
| 145 | |
| 146 if ( !*to ) return NULL; | |
| 147 | |
| 148 while ( *to && *to!='\n' && *to!='\r' ) to++; | |
| 149 len = to - text + 1; | |
| 150 | |
| 151 if ( *to ) | |
| 152 { | |
| 153 // Advance the pointer for the next call | |
| 154 if ( *to=='\n' ) to++; | |
| 155 else if ( *to=='\r' && *(to+1)=='\n' ) to+=2; | |
| 156 } | |
| 157 if ( !len ) | |
| 158 return to; | |
| 159 | |
| 160 if ( !*lineptr ) | |
| 161 { | |
| 162 *lineptr = malloc(len); | |
| 163 *n = len; | |
| 164 } | |
| 165 else if ( *n<len ) | |
| 166 { | |
| 167 *lineptr = realloc(*lineptr, len); | |
| 168 *n = len; | |
| 169 } | |
| 170 if ( !*lineptr ) { | |
| 171 debug("[nextline] Insufficient memory!\n"); | |
| 172 return 0; | |
| 173 } | |
| 174 | |
| 175 memcpy(*lineptr,text,len); | |
| 176 (*lineptr)[len-1] = 0; | |
| 177 | |
| 178 return to; | |
| 179 } | |
| 180 | |
| 181 // name points to "XY", value_from points to the first character of the value string and | |
| 182 // value_to points to the last character of the value string. | |
| 183 static HeaderTag *new_tag(const char *name, const char *value_from, const char *value_to) | |
| 184 { | |
| 185 HeaderTag *tag = malloc(sizeof(HeaderTag)); | |
| 186 int len = value_to-value_from+1; | |
| 187 | |
| 188 tag->key[0] = name[0]; | |
| 189 tag->key[1] = name[1]; | |
| 190 tag->value = malloc(len+1); | |
| 191 memcpy(tag->value,value_from,len+1); | |
| 192 tag->value[len] = 0; | |
| 193 return tag; | |
| 194 } | |
| 195 | |
| 196 static HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key) | |
| 197 { | |
| 198 list_t *tags = hline->tags; | |
| 199 while (tags) | |
| 200 { | |
| 201 HeaderTag *tag = tags->data; | |
| 202 if ( tag->key[0]==key[0] && tag->key[1]==key[1] ) return tag; | |
| 203 tags = tags->next; | |
| 204 } | |
| 205 return NULL; | |
| 206 } | |
| 207 | |
| 208 | |
| 209 // Return codes: | |
| 210 // 0 .. different types or unique tags differ or conflicting tags, cannot be merged | |
| 211 // 1 .. all tags identical -> no need to merge, drop one | |
| 212 // 2 .. the unique tags match and there are some conflicting tags (same tag, different value) -> error, cannot be merged nor duplicated | |
| 213 // 3 .. there are some missing complementary tags and no unique conflict -> can be merged into a single line | |
| 214 static int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2) | |
| 215 { | |
| 216 HeaderTag *t1, *t2; | |
| 217 | |
| 218 if ( hline1->type[0]!=hline2->type[0] || hline1->type[1]!=hline2->type[1] ) | |
| 219 return 0; | |
| 220 | |
| 221 int itype = tag_exists(hline1->type,types); | |
| 222 if ( itype==-1 ) { | |
| 223 debug("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]); | |
| 224 return -1; // FIXME (lh3): error; I do not know how this will be handled in Petr's code | |
| 225 } | |
| 226 | |
| 227 if ( unique_tags[itype] ) | |
| 228 { | |
| 229 t1 = header_line_has_tag(hline1,unique_tags[itype][0]); | |
| 230 t2 = header_line_has_tag(hline2,unique_tags[itype][0]); | |
| 231 if ( !t1 || !t2 ) // this should never happen, the unique tags are required | |
| 232 return 2; | |
| 233 | |
| 234 if ( strcmp(t1->value,t2->value) ) | |
| 235 return 0; // the unique tags differ, cannot be merged | |
| 236 } | |
| 237 if ( !required_tags[itype] && !optional_tags[itype] ) | |
| 238 { | |
| 239 t1 = hline1->tags->data; | |
| 240 t2 = hline2->tags->data; | |
| 241 if ( !strcmp(t1->value,t2->value) ) return 1; // identical comments | |
| 242 return 0; | |
| 243 } | |
| 244 | |
| 245 int missing=0, itag=0; | |
| 246 while ( required_tags[itype] && required_tags[itype][itag] ) | |
| 247 { | |
| 248 t1 = header_line_has_tag(hline1,required_tags[itype][itag]); | |
| 249 t2 = header_line_has_tag(hline2,required_tags[itype][itag]); | |
| 250 if ( !t1 && !t2 ) | |
| 251 return 2; // this should never happen | |
| 252 else if ( !t1 || !t2 ) | |
| 253 missing = 1; // there is some tag missing in one of the hlines | |
| 254 else if ( strcmp(t1->value,t2->value) ) | |
| 255 { | |
| 256 if ( unique_tags[itype] ) | |
| 257 return 2; // the lines have a matching unique tag but have a conflicting tag | |
| 258 | |
| 259 return 0; // the lines contain conflicting tags, cannot be merged | |
| 260 } | |
| 261 itag++; | |
| 262 } | |
| 263 itag = 0; | |
| 264 while ( optional_tags[itype] && optional_tags[itype][itag] ) | |
| 265 { | |
| 266 t1 = header_line_has_tag(hline1,optional_tags[itype][itag]); | |
| 267 t2 = header_line_has_tag(hline2,optional_tags[itype][itag]); | |
| 268 if ( !t1 && !t2 ) | |
| 269 { | |
| 270 itag++; | |
| 271 continue; | |
| 272 } | |
| 273 if ( !t1 || !t2 ) | |
| 274 missing = 1; // there is some tag missing in one of the hlines | |
| 275 else if ( strcmp(t1->value,t2->value) ) | |
| 276 { | |
| 277 if ( unique_tags[itype] ) | |
| 278 return 2; // the lines have a matching unique tag but have a conflicting tag | |
| 279 | |
| 280 return 0; // the lines contain conflicting tags, cannot be merged | |
| 281 } | |
| 282 itag++; | |
| 283 } | |
| 284 if ( missing ) return 3; // there are some missing complementary tags with no conflicts, can be merged | |
| 285 return 1; | |
| 286 } | |
| 287 | |
| 288 | |
| 289 static HeaderLine *sam_header_line_clone(const HeaderLine *hline) | |
| 290 { | |
| 291 list_t *tags; | |
| 292 HeaderLine *out = malloc(sizeof(HeaderLine)); | |
| 293 out->type[0] = hline->type[0]; | |
| 294 out->type[1] = hline->type[1]; | |
| 295 out->tags = NULL; | |
| 296 | |
| 297 tags = hline->tags; | |
| 298 while (tags) | |
| 299 { | |
| 300 HeaderTag *old = tags->data; | |
| 301 | |
| 302 HeaderTag *new = malloc(sizeof(HeaderTag)); | |
| 303 new->key[0] = old->key[0]; | |
| 304 new->key[1] = old->key[1]; | |
| 305 new->value = strdup(old->value); | |
| 306 out->tags = list_append(out->tags, new); | |
| 307 | |
| 308 tags = tags->next; | |
| 309 } | |
| 310 return out; | |
| 311 } | |
| 312 | |
| 313 static int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline) | |
| 314 { | |
| 315 list_t *tmpl_tags; | |
| 316 | |
| 317 if ( out_hline->type[0]!=tmpl_hline->type[0] || out_hline->type[1]!=tmpl_hline->type[1] ) | |
| 318 return 0; | |
| 319 | |
| 320 tmpl_tags = tmpl_hline->tags; | |
| 321 while (tmpl_tags) | |
| 322 { | |
| 323 HeaderTag *tmpl_tag = tmpl_tags->data; | |
| 324 HeaderTag *out_tag = header_line_has_tag(out_hline, tmpl_tag->key); | |
| 325 if ( !out_tag ) | |
| 326 { | |
| 327 HeaderTag *tag = malloc(sizeof(HeaderTag)); | |
| 328 tag->key[0] = tmpl_tag->key[0]; | |
| 329 tag->key[1] = tmpl_tag->key[1]; | |
| 330 tag->value = strdup(tmpl_tag->value); | |
| 331 out_hline->tags = list_append(out_hline->tags,tag); | |
| 332 } | |
| 333 tmpl_tags = tmpl_tags->next; | |
| 334 } | |
| 335 return 1; | |
| 336 } | |
| 337 | |
| 338 | |
| 339 static HeaderLine *sam_header_line_parse(const char *headerLine) | |
| 340 { | |
| 341 HeaderLine *hline; | |
| 342 HeaderTag *tag; | |
| 343 const char *from, *to; | |
| 344 from = headerLine; | |
| 345 | |
| 346 if ( *from != '@' ) { | |
| 347 debug("[sam_header_line_parse] expected '@', got [%s]\n", headerLine); | |
| 348 return 0; | |
| 349 } | |
| 350 to = ++from; | |
| 351 | |
| 352 while (*to && *to!='\t') to++; | |
| 353 if ( to-from != 2 ) { | |
| 354 debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine); | |
| 355 return 0; | |
| 356 } | |
| 357 | |
| 358 hline = malloc(sizeof(HeaderLine)); | |
| 359 hline->type[0] = from[0]; | |
| 360 hline->type[1] = from[1]; | |
| 361 hline->tags = NULL; | |
| 362 | |
| 363 int itype = tag_exists(hline->type, types); | |
| 364 | |
| 365 from = to; | |
| 366 while (*to && *to=='\t') to++; | |
| 367 if ( to-from != 1 ) { | |
| 368 debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); | |
| 369 return 0; | |
| 370 } | |
| 371 from = to; | |
| 372 while (*from) | |
| 373 { | |
| 374 while (*to && *to!='\t') to++; | |
| 375 | |
| 376 if ( !required_tags[itype] && !optional_tags[itype] ) | |
| 377 { | |
| 378 // CO is a special case, it can contain anything, including tabs | |
| 379 if ( *to ) { to++; continue; } | |
| 380 tag = new_tag(" ",from,to-1); | |
| 381 } | |
| 382 else | |
| 383 tag = new_tag(from,from+3,to-1); | |
| 384 | |
| 385 if ( header_line_has_tag(hline,tag->key) ) | |
| 386 debug("The tag '%c%c' present (at least) twice on line [%s]\n", tag->key[0],tag->key[1], headerLine); | |
| 387 hline->tags = list_append(hline->tags, tag); | |
| 388 | |
| 389 from = to; | |
| 390 while (*to && *to=='\t') to++; | |
| 391 if ( *to && to-from != 1 ) { | |
| 392 debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); | |
| 393 return 0; | |
| 394 } | |
| 395 | |
| 396 from = to; | |
| 397 } | |
| 398 return hline; | |
| 399 } | |
| 400 | |
| 401 | |
| 402 // Must be of an existing type, all tags must be recognised and all required tags must be present | |
| 403 static int sam_header_line_validate(HeaderLine *hline) | |
| 404 { | |
| 405 list_t *tags; | |
| 406 HeaderTag *tag; | |
| 407 int itype, itag; | |
| 408 | |
| 409 // Is the type correct? | |
| 410 itype = tag_exists(hline->type, types); | |
| 411 if ( itype==-1 ) | |
| 412 { | |
| 413 debug("The type [%c%c] not recognised.\n", hline->type[0],hline->type[1]); | |
| 414 return 0; | |
| 415 } | |
| 416 | |
| 417 // Has all required tags? | |
| 418 itag = 0; | |
| 419 while ( required_tags[itype] && required_tags[itype][itag] ) | |
| 420 { | |
| 421 if ( !header_line_has_tag(hline,required_tags[itype][itag]) ) | |
| 422 { | |
| 423 debug("The tag [%c%c] required for [%c%c] not present.\n", required_tags[itype][itag][0],required_tags[itype][itag][1], | |
| 424 hline->type[0],hline->type[1]); | |
| 425 return 0; | |
| 426 } | |
| 427 itag++; | |
| 428 } | |
| 429 | |
| 430 // Are all tags recognised? | |
| 431 tags = hline->tags; | |
| 432 while ( tags ) | |
| 433 { | |
| 434 tag = tags->data; | |
| 435 if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) | |
| 436 { | |
| 437 debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); | |
| 438 return 0; | |
| 439 } | |
| 440 tags = tags->next; | |
| 441 } | |
| 442 | |
| 443 return 1; | |
| 444 } | |
| 445 | |
| 446 | |
| 447 static void print_header_line(FILE *fp, HeaderLine *hline) | |
| 448 { | |
| 449 list_t *tags = hline->tags; | |
| 450 HeaderTag *tag; | |
| 451 | |
| 452 fprintf(fp, "@%c%c", hline->type[0],hline->type[1]); | |
| 453 while (tags) | |
| 454 { | |
| 455 tag = tags->data; | |
| 456 | |
| 457 fprintf(fp, "\t"); | |
| 458 if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) | |
| 459 fprintf(fp, "%c%c:", tag->key[0],tag->key[1]); | |
| 460 fprintf(fp, "%s", tag->value); | |
| 461 | |
| 462 tags = tags->next; | |
| 463 } | |
| 464 fprintf(fp,"\n"); | |
| 465 } | |
| 466 | |
| 467 | |
| 468 static void sam_header_line_free(HeaderLine *hline) | |
| 469 { | |
| 470 list_t *tags = hline->tags; | |
| 471 while (tags) | |
| 472 { | |
| 473 HeaderTag *tag = tags->data; | |
| 474 free(tag->value); | |
| 475 free(tag); | |
| 476 tags = tags->next; | |
| 477 } | |
| 478 list_free(hline->tags); | |
| 479 free(hline); | |
| 480 } | |
| 481 | |
| 482 void sam_header_free(void *_header) | |
| 483 { | |
| 484 HeaderDict *header = (HeaderDict*)_header; | |
| 485 list_t *hlines = header; | |
| 486 while (hlines) | |
| 487 { | |
| 488 sam_header_line_free(hlines->data); | |
| 489 hlines = hlines->next; | |
| 490 } | |
| 491 list_free(header); | |
| 492 } | |
| 493 | |
| 494 HeaderDict *sam_header_clone(const HeaderDict *dict) | |
| 495 { | |
| 496 HeaderDict *out = NULL; | |
| 497 while (dict) | |
| 498 { | |
| 499 HeaderLine *hline = dict->data; | |
| 500 out = list_append(out, sam_header_line_clone(hline)); | |
| 501 dict = dict->next; | |
| 502 } | |
| 503 return out; | |
| 504 } | |
| 505 | |
| 506 // Returns a newly allocated string | |
| 507 char *sam_header_write(const void *_header) | |
| 508 { | |
| 509 const HeaderDict *header = (const HeaderDict*)_header; | |
| 510 char *out = NULL; | |
| 511 int len=0, nout=0; | |
| 512 const list_t *hlines; | |
| 513 | |
| 514 // Calculate the length of the string to allocate | |
| 515 hlines = header; | |
| 516 while (hlines) | |
| 517 { | |
| 518 len += 4; // @XY and \n | |
| 519 | |
| 520 HeaderLine *hline = hlines->data; | |
| 521 list_t *tags = hline->tags; | |
| 522 while (tags) | |
| 523 { | |
| 524 HeaderTag *tag = tags->data; | |
| 525 len += strlen(tag->value) + 1; // \t | |
| 526 if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) | |
| 527 len += strlen(tag->value) + 3; // XY: | |
| 528 tags = tags->next; | |
| 529 } | |
| 530 hlines = hlines->next; | |
| 531 } | |
| 532 | |
| 533 nout = 0; | |
| 534 out = malloc(len+1); | |
| 535 hlines = header; | |
| 536 while (hlines) | |
| 537 { | |
| 538 HeaderLine *hline = hlines->data; | |
| 539 | |
| 540 nout += sprintf(out+nout,"@%c%c",hline->type[0],hline->type[1]); | |
| 541 | |
| 542 list_t *tags = hline->tags; | |
| 543 while (tags) | |
| 544 { | |
| 545 HeaderTag *tag = tags->data; | |
| 546 nout += sprintf(out+nout,"\t"); | |
| 547 if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) | |
| 548 nout += sprintf(out+nout,"%c%c:", tag->key[0],tag->key[1]); | |
| 549 nout += sprintf(out+nout,"%s", tag->value); | |
| 550 tags = tags->next; | |
| 551 } | |
| 552 hlines = hlines->next; | |
| 553 nout += sprintf(out+nout,"\n"); | |
| 554 } | |
| 555 out[len] = 0; | |
| 556 return out; | |
| 557 } | |
| 558 | |
| 559 void *sam_header_parse2(const char *headerText) | |
| 560 { | |
| 561 list_t *hlines = NULL; | |
| 562 HeaderLine *hline; | |
| 563 const char *text; | |
| 564 char *buf=NULL; | |
| 565 size_t nbuf = 0; | |
| 566 int tovalidate = 0; | |
| 567 | |
| 568 if ( !headerText ) | |
| 569 return 0; | |
| 570 | |
| 571 text = headerText; | |
| 572 while ( (text=nextline(&buf, &nbuf, text)) ) | |
| 573 { | |
| 574 hline = sam_header_line_parse(buf); | |
| 575 if ( hline && (!tovalidate || sam_header_line_validate(hline)) ) | |
| 576 // With too many (~250,000) reference sequences the header parsing was too slow with list_append. | |
| 577 hlines = list_append_to_end(hlines, hline); | |
| 578 else | |
| 579 { | |
| 580 if (hline) sam_header_line_free(hline); | |
| 581 sam_header_free(hlines); | |
| 582 if ( buf ) free(buf); | |
| 583 return NULL; | |
| 584 } | |
| 585 } | |
| 586 if ( buf ) free(buf); | |
| 587 | |
| 588 return hlines; | |
| 589 } | |
| 590 | |
| 591 void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2]) | |
| 592 { | |
| 593 const HeaderDict *dict = (const HeaderDict*)_dict; | |
| 594 const list_t *l = dict; | |
| 595 khash_t(str) *tbl = kh_init(str); | |
| 596 khiter_t k; | |
| 597 int ret; | |
| 598 | |
| 599 if (_dict == 0) return tbl; // return an empty (not null) hash table | |
| 600 while (l) | |
| 601 { | |
| 602 HeaderLine *hline = l->data; | |
| 603 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) | |
| 604 { | |
| 605 l = l->next; | |
| 606 continue; | |
| 607 } | |
| 608 | |
| 609 HeaderTag *key, *value; | |
| 610 key = header_line_has_tag(hline,key_tag); | |
| 611 value = header_line_has_tag(hline,value_tag); | |
| 612 if ( !key || !value ) | |
| 613 { | |
| 614 l = l->next; | |
| 615 continue; | |
| 616 } | |
| 617 | |
| 618 k = kh_get(str, tbl, key->value); | |
| 619 if ( k != kh_end(tbl) ) | |
| 620 debug("[sam_header_lookup_table] They key %s not unique.\n", key->value); | |
| 621 k = kh_put(str, tbl, key->value, &ret); | |
| 622 kh_value(tbl, k) = value->value; | |
| 623 | |
| 624 l = l->next; | |
| 625 } | |
| 626 return tbl; | |
| 627 } | |
| 628 | |
| 629 char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n) | |
| 630 { | |
| 631 const HeaderDict *dict = (const HeaderDict*)_dict; | |
| 632 const list_t *l = dict; | |
| 633 int max, n; | |
| 634 char **ret; | |
| 635 | |
| 636 ret = 0; *_n = max = n = 0; | |
| 637 while (l) | |
| 638 { | |
| 639 HeaderLine *hline = l->data; | |
| 640 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) | |
| 641 { | |
| 642 l = l->next; | |
| 643 continue; | |
| 644 } | |
| 645 | |
| 646 HeaderTag *key; | |
| 647 key = header_line_has_tag(hline,key_tag); | |
| 648 if ( !key ) | |
| 649 { | |
| 650 l = l->next; | |
| 651 continue; | |
| 652 } | |
| 653 | |
| 654 if (n == max) { | |
| 655 max = max? max<<1 : 4; | |
| 656 ret = realloc(ret, max * sizeof(void*)); | |
| 657 } | |
| 658 ret[n++] = key->value; | |
| 659 | |
| 660 l = l->next; | |
| 661 } | |
| 662 *_n = n; | |
| 663 return ret; | |
| 664 } | |
| 665 | |
| 666 const char *sam_tbl_get(void *h, const char *key) | |
| 667 { | |
| 668 khash_t(str) *tbl = (khash_t(str)*)h; | |
| 669 khint_t k; | |
| 670 k = kh_get(str, tbl, key); | |
| 671 return k == kh_end(tbl)? 0 : kh_val(tbl, k); | |
| 672 } | |
| 673 | |
| 674 int sam_tbl_size(void *h) | |
| 675 { | |
| 676 khash_t(str) *tbl = (khash_t(str)*)h; | |
| 677 return h? kh_size(tbl) : 0; | |
| 678 } | |
| 679 | |
| 680 void sam_tbl_destroy(void *h) | |
| 681 { | |
| 682 khash_t(str) *tbl = (khash_t(str)*)h; | |
| 683 kh_destroy(str, tbl); | |
| 684 } | |
| 685 | |
| 686 void *sam_header_merge(int n, const void **_dicts) | |
| 687 { | |
| 688 const HeaderDict **dicts = (const HeaderDict**)_dicts; | |
| 689 HeaderDict *out_dict; | |
| 690 int idict, status; | |
| 691 | |
| 692 if ( n<2 ) return NULL; | |
| 693 | |
| 694 out_dict = sam_header_clone(dicts[0]); | |
| 695 | |
| 696 for (idict=1; idict<n; idict++) | |
| 697 { | |
| 698 const list_t *tmpl_hlines = dicts[idict]; | |
| 699 | |
| 700 while ( tmpl_hlines ) | |
| 701 { | |
| 702 list_t *out_hlines = out_dict; | |
| 703 int inserted = 0; | |
| 704 while ( out_hlines ) | |
| 705 { | |
| 706 status = sam_header_compare_lines(tmpl_hlines->data, out_hlines->data); | |
| 707 if ( status==0 ) | |
| 708 { | |
| 709 out_hlines = out_hlines->next; | |
| 710 continue; | |
| 711 } | |
| 712 | |
| 713 if ( status==2 ) | |
| 714 { | |
| 715 print_header_line(stderr,tmpl_hlines->data); | |
| 716 print_header_line(stderr,out_hlines->data); | |
| 717 debug("Conflicting lines, cannot merge the headers.\n"); | |
| 718 return 0; | |
| 719 } | |
| 720 if ( status==3 ) | |
| 721 sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data); | |
| 722 | |
| 723 inserted = 1; | |
| 724 break; | |
| 725 } | |
| 726 if ( !inserted ) | |
| 727 out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data)); | |
| 728 | |
| 729 tmpl_hlines = tmpl_hlines->next; | |
| 730 } | |
| 731 } | |
| 732 | |
| 733 return out_dict; | |
| 734 } | |
| 735 | |
| 736 |
