Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/sam_header.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #include "sam_header.h" | |
2 #include <stdio.h> | |
3 #include <string.h> | |
4 #include <ctype.h> | |
5 #include <stdlib.h> | |
6 #include <stdarg.h> | |
7 | |
8 #include "khash.h" | |
9 KHASH_MAP_INIT_STR(str, const char *) | |
10 | |
11 struct _HeaderList | |
12 { | |
13 struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only. | |
14 struct _HeaderList *next; | |
15 void *data; | |
16 }; | |
17 typedef struct _HeaderList list_t; | |
18 typedef list_t HeaderDict; | |
19 | |
20 typedef struct | |
21 { | |
22 char key[2]; | |
23 char *value; | |
24 } | |
25 HeaderTag; | |
26 | |
27 typedef struct | |
28 { | |
29 char type[2]; | |
30 list_t *tags; | |
31 } | |
32 HeaderLine; | |
33 | |
34 const char *o_hd_tags[] = {"SO","GO",NULL}; | |
35 const char *r_hd_tags[] = {"VN",NULL}; | |
36 | |
37 const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL}; | |
38 const char *r_sq_tags[] = {"SN","LN",NULL}; | |
39 const char *u_sq_tags[] = {"SN",NULL}; | |
40 | |
41 const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL}; | |
42 const char *r_rg_tags[] = {"ID",NULL}; | |
43 const char *u_rg_tags[] = {"ID",NULL}; | |
44 | |
45 const char *o_pg_tags[] = {"VN","CL",NULL}; | |
46 const char *r_pg_tags[] = {"ID",NULL}; | |
47 | |
48 const char *types[] = {"HD","SQ","RG","PG","CO",NULL}; | |
49 const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL}; | |
50 const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL}; | |
51 const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL}; | |
52 | |
53 | |
54 static void debug(const char *format, ...) | |
55 { | |
56 va_list ap; | |
57 va_start(ap, format); | |
58 vfprintf(stderr, format, ap); | |
59 va_end(ap); | |
60 } | |
61 | |
62 #if 0 | |
63 // Replaced by list_append_to_end | |
64 static list_t *list_prepend(list_t *root, void *data) | |
65 { | |
66 list_t *l = malloc(sizeof(list_t)); | |
67 l->next = root; | |
68 l->data = data; | |
69 return l; | |
70 } | |
71 #endif | |
72 | |
73 // Relies on the root->last being correct. Do not use with the other list_* | |
74 // routines unless they are fixed to modify root->last as well. | |
75 static list_t *list_append_to_end(list_t *root, void *data) | |
76 { | |
77 list_t *l = malloc(sizeof(list_t)); | |
78 l->last = l; | |
79 l->next = NULL; | |
80 l->data = data; | |
81 | |
82 if ( !root ) | |
83 return l; | |
84 | |
85 root->last->next = l; | |
86 root->last = l; | |
87 return root; | |
88 } | |
89 | |
90 static list_t *list_append(list_t *root, void *data) | |
91 { | |
92 list_t *l = root; | |
93 while (l && l->next) | |
94 l = l->next; | |
95 if ( l ) | |
96 { | |
97 l->next = malloc(sizeof(list_t)); | |
98 l = l->next; | |
99 } | |
100 else | |
101 { | |
102 l = malloc(sizeof(list_t)); | |
103 root = l; | |
104 } | |
105 l->data = data; | |
106 l->next = NULL; | |
107 return root; | |
108 } | |
109 | |
110 static void list_free(list_t *root) | |
111 { | |
112 list_t *l = root; | |
113 while (root) | |
114 { | |
115 l = root; | |
116 root = root->next; | |
117 free(l); | |
118 } | |
119 } | |
120 | |
121 | |
122 | |
123 // Look for a tag "XY" in a predefined const char *[] array. | |
124 static int tag_exists(const char *tag, const char **tags) | |
125 { | |
126 int itag=0; | |
127 if ( !tags ) return -1; | |
128 while ( tags[itag] ) | |
129 { | |
130 if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag; | |
131 itag++; | |
132 } | |
133 return -1; | |
134 } | |
135 | |
136 | |
137 | |
138 // Mimics the behaviour of getline, except it returns pointer to the next chunk of the text | |
139 // or NULL if everything has been read. The lineptr should be freed by the caller. The | |
140 // newline character is stripped. | |
141 static const char *nextline(char **lineptr, size_t *n, const char *text) | |
142 { | |
143 int len; | |
144 const char *to = text; | |
145 | |
146 if ( !*to ) return NULL; | |
147 | |
148 while ( *to && *to!='\n' && *to!='\r' ) to++; | |
149 len = to - text + 1; | |
150 | |
151 if ( *to ) | |
152 { | |
153 // Advance the pointer for the next call | |
154 if ( *to=='\n' ) to++; | |
155 else if ( *to=='\r' && *(to+1)=='\n' ) to+=2; | |
156 } | |
157 if ( !len ) | |
158 return to; | |
159 | |
160 if ( !*lineptr ) | |
161 { | |
162 *lineptr = malloc(len); | |
163 *n = len; | |
164 } | |
165 else if ( *n<len ) | |
166 { | |
167 *lineptr = realloc(*lineptr, len); | |
168 *n = len; | |
169 } | |
170 if ( !*lineptr ) { | |
171 debug("[nextline] Insufficient memory!\n"); | |
172 return 0; | |
173 } | |
174 | |
175 memcpy(*lineptr,text,len); | |
176 (*lineptr)[len-1] = 0; | |
177 | |
178 return to; | |
179 } | |
180 | |
181 // name points to "XY", value_from points to the first character of the value string and | |
182 // value_to points to the last character of the value string. | |
183 static HeaderTag *new_tag(const char *name, const char *value_from, const char *value_to) | |
184 { | |
185 HeaderTag *tag = malloc(sizeof(HeaderTag)); | |
186 int len = value_to-value_from+1; | |
187 | |
188 tag->key[0] = name[0]; | |
189 tag->key[1] = name[1]; | |
190 tag->value = malloc(len+1); | |
191 memcpy(tag->value,value_from,len+1); | |
192 tag->value[len] = 0; | |
193 return tag; | |
194 } | |
195 | |
196 static HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key) | |
197 { | |
198 list_t *tags = hline->tags; | |
199 while (tags) | |
200 { | |
201 HeaderTag *tag = tags->data; | |
202 if ( tag->key[0]==key[0] && tag->key[1]==key[1] ) return tag; | |
203 tags = tags->next; | |
204 } | |
205 return NULL; | |
206 } | |
207 | |
208 | |
209 // Return codes: | |
210 // 0 .. different types or unique tags differ or conflicting tags, cannot be merged | |
211 // 1 .. all tags identical -> no need to merge, drop one | |
212 // 2 .. the unique tags match and there are some conflicting tags (same tag, different value) -> error, cannot be merged nor duplicated | |
213 // 3 .. there are some missing complementary tags and no unique conflict -> can be merged into a single line | |
214 static int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2) | |
215 { | |
216 HeaderTag *t1, *t2; | |
217 | |
218 if ( hline1->type[0]!=hline2->type[0] || hline1->type[1]!=hline2->type[1] ) | |
219 return 0; | |
220 | |
221 int itype = tag_exists(hline1->type,types); | |
222 if ( itype==-1 ) { | |
223 debug("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]); | |
224 return -1; // FIXME (lh3): error; I do not know how this will be handled in Petr's code | |
225 } | |
226 | |
227 if ( unique_tags[itype] ) | |
228 { | |
229 t1 = header_line_has_tag(hline1,unique_tags[itype][0]); | |
230 t2 = header_line_has_tag(hline2,unique_tags[itype][0]); | |
231 if ( !t1 || !t2 ) // this should never happen, the unique tags are required | |
232 return 2; | |
233 | |
234 if ( strcmp(t1->value,t2->value) ) | |
235 return 0; // the unique tags differ, cannot be merged | |
236 } | |
237 if ( !required_tags[itype] && !optional_tags[itype] ) | |
238 { | |
239 t1 = hline1->tags->data; | |
240 t2 = hline2->tags->data; | |
241 if ( !strcmp(t1->value,t2->value) ) return 1; // identical comments | |
242 return 0; | |
243 } | |
244 | |
245 int missing=0, itag=0; | |
246 while ( required_tags[itype] && required_tags[itype][itag] ) | |
247 { | |
248 t1 = header_line_has_tag(hline1,required_tags[itype][itag]); | |
249 t2 = header_line_has_tag(hline2,required_tags[itype][itag]); | |
250 if ( !t1 && !t2 ) | |
251 return 2; // this should never happen | |
252 else if ( !t1 || !t2 ) | |
253 missing = 1; // there is some tag missing in one of the hlines | |
254 else if ( strcmp(t1->value,t2->value) ) | |
255 { | |
256 if ( unique_tags[itype] ) | |
257 return 2; // the lines have a matching unique tag but have a conflicting tag | |
258 | |
259 return 0; // the lines contain conflicting tags, cannot be merged | |
260 } | |
261 itag++; | |
262 } | |
263 itag = 0; | |
264 while ( optional_tags[itype] && optional_tags[itype][itag] ) | |
265 { | |
266 t1 = header_line_has_tag(hline1,optional_tags[itype][itag]); | |
267 t2 = header_line_has_tag(hline2,optional_tags[itype][itag]); | |
268 if ( !t1 && !t2 ) | |
269 { | |
270 itag++; | |
271 continue; | |
272 } | |
273 if ( !t1 || !t2 ) | |
274 missing = 1; // there is some tag missing in one of the hlines | |
275 else if ( strcmp(t1->value,t2->value) ) | |
276 { | |
277 if ( unique_tags[itype] ) | |
278 return 2; // the lines have a matching unique tag but have a conflicting tag | |
279 | |
280 return 0; // the lines contain conflicting tags, cannot be merged | |
281 } | |
282 itag++; | |
283 } | |
284 if ( missing ) return 3; // there are some missing complementary tags with no conflicts, can be merged | |
285 return 1; | |
286 } | |
287 | |
288 | |
289 static HeaderLine *sam_header_line_clone(const HeaderLine *hline) | |
290 { | |
291 list_t *tags; | |
292 HeaderLine *out = malloc(sizeof(HeaderLine)); | |
293 out->type[0] = hline->type[0]; | |
294 out->type[1] = hline->type[1]; | |
295 out->tags = NULL; | |
296 | |
297 tags = hline->tags; | |
298 while (tags) | |
299 { | |
300 HeaderTag *old = tags->data; | |
301 | |
302 HeaderTag *new = malloc(sizeof(HeaderTag)); | |
303 new->key[0] = old->key[0]; | |
304 new->key[1] = old->key[1]; | |
305 new->value = strdup(old->value); | |
306 out->tags = list_append(out->tags, new); | |
307 | |
308 tags = tags->next; | |
309 } | |
310 return out; | |
311 } | |
312 | |
313 static int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline) | |
314 { | |
315 list_t *tmpl_tags; | |
316 | |
317 if ( out_hline->type[0]!=tmpl_hline->type[0] || out_hline->type[1]!=tmpl_hline->type[1] ) | |
318 return 0; | |
319 | |
320 tmpl_tags = tmpl_hline->tags; | |
321 while (tmpl_tags) | |
322 { | |
323 HeaderTag *tmpl_tag = tmpl_tags->data; | |
324 HeaderTag *out_tag = header_line_has_tag(out_hline, tmpl_tag->key); | |
325 if ( !out_tag ) | |
326 { | |
327 HeaderTag *tag = malloc(sizeof(HeaderTag)); | |
328 tag->key[0] = tmpl_tag->key[0]; | |
329 tag->key[1] = tmpl_tag->key[1]; | |
330 tag->value = strdup(tmpl_tag->value); | |
331 out_hline->tags = list_append(out_hline->tags,tag); | |
332 } | |
333 tmpl_tags = tmpl_tags->next; | |
334 } | |
335 return 1; | |
336 } | |
337 | |
338 | |
339 static HeaderLine *sam_header_line_parse(const char *headerLine) | |
340 { | |
341 HeaderLine *hline; | |
342 HeaderTag *tag; | |
343 const char *from, *to; | |
344 from = headerLine; | |
345 | |
346 if ( *from != '@' ) { | |
347 debug("[sam_header_line_parse] expected '@', got [%s]\n", headerLine); | |
348 return 0; | |
349 } | |
350 to = ++from; | |
351 | |
352 while (*to && *to!='\t') to++; | |
353 if ( to-from != 2 ) { | |
354 debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine); | |
355 return 0; | |
356 } | |
357 | |
358 hline = malloc(sizeof(HeaderLine)); | |
359 hline->type[0] = from[0]; | |
360 hline->type[1] = from[1]; | |
361 hline->tags = NULL; | |
362 | |
363 int itype = tag_exists(hline->type, types); | |
364 | |
365 from = to; | |
366 while (*to && *to=='\t') to++; | |
367 if ( to-from != 1 ) { | |
368 debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); | |
369 free(hline); | |
370 return 0; | |
371 } | |
372 from = to; | |
373 while (*from) | |
374 { | |
375 while (*to && *to!='\t') to++; | |
376 | |
377 if ( !required_tags[itype] && !optional_tags[itype] ) | |
378 { | |
379 // CO is a special case, it can contain anything, including tabs | |
380 if ( *to ) { to++; continue; } | |
381 tag = new_tag(" ",from,to-1); | |
382 } | |
383 else | |
384 tag = new_tag(from,from+3,to-1); | |
385 | |
386 if ( header_line_has_tag(hline,tag->key) ) | |
387 debug("The tag '%c%c' present (at least) twice on line [%s]\n", tag->key[0],tag->key[1], headerLine); | |
388 hline->tags = list_append(hline->tags, tag); | |
389 | |
390 from = to; | |
391 while (*to && *to=='\t') to++; | |
392 if ( *to && to-from != 1 ) { | |
393 debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); | |
394 return 0; | |
395 } | |
396 | |
397 from = to; | |
398 } | |
399 return hline; | |
400 } | |
401 | |
402 | |
403 // Must be of an existing type, all tags must be recognised and all required tags must be present | |
404 static int sam_header_line_validate(HeaderLine *hline) | |
405 { | |
406 list_t *tags; | |
407 HeaderTag *tag; | |
408 int itype, itag; | |
409 | |
410 // Is the type correct? | |
411 itype = tag_exists(hline->type, types); | |
412 if ( itype==-1 ) | |
413 { | |
414 debug("The type [%c%c] not recognised.\n", hline->type[0],hline->type[1]); | |
415 return 0; | |
416 } | |
417 | |
418 // Has all required tags? | |
419 itag = 0; | |
420 while ( required_tags[itype] && required_tags[itype][itag] ) | |
421 { | |
422 if ( !header_line_has_tag(hline,required_tags[itype][itag]) ) | |
423 { | |
424 debug("The tag [%c%c] required for [%c%c] not present.\n", required_tags[itype][itag][0],required_tags[itype][itag][1], | |
425 hline->type[0],hline->type[1]); | |
426 return 0; | |
427 } | |
428 itag++; | |
429 } | |
430 | |
431 // Are all tags recognised? | |
432 tags = hline->tags; | |
433 while ( tags ) | |
434 { | |
435 tag = tags->data; | |
436 if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) | |
437 { | |
438 // Lower case tags are user-defined values. | |
439 if( !(islower(tag->key[0]) || islower(tag->key[1])) ) | |
440 { | |
441 // Neither is lower case, but tag was not recognized. | |
442 debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); | |
443 // return 0; // Even unknown tags are allowed - for forward compatibility with new attributes | |
444 } | |
445 // else - allow user defined tag | |
446 } | |
447 tags = tags->next; | |
448 } | |
449 | |
450 return 1; | |
451 } | |
452 | |
453 | |
454 static void print_header_line(FILE *fp, HeaderLine *hline) | |
455 { | |
456 list_t *tags = hline->tags; | |
457 HeaderTag *tag; | |
458 | |
459 fprintf(fp, "@%c%c", hline->type[0],hline->type[1]); | |
460 while (tags) | |
461 { | |
462 tag = tags->data; | |
463 | |
464 fprintf(fp, "\t"); | |
465 if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) | |
466 fprintf(fp, "%c%c:", tag->key[0],tag->key[1]); | |
467 fprintf(fp, "%s", tag->value); | |
468 | |
469 tags = tags->next; | |
470 } | |
471 fprintf(fp,"\n"); | |
472 } | |
473 | |
474 | |
475 static void sam_header_line_free(HeaderLine *hline) | |
476 { | |
477 list_t *tags = hline->tags; | |
478 while (tags) | |
479 { | |
480 HeaderTag *tag = tags->data; | |
481 free(tag->value); | |
482 free(tag); | |
483 tags = tags->next; | |
484 } | |
485 list_free(hline->tags); | |
486 free(hline); | |
487 } | |
488 | |
489 void sam_header_free(void *_header) | |
490 { | |
491 HeaderDict *header = (HeaderDict*)_header; | |
492 list_t *hlines = header; | |
493 while (hlines) | |
494 { | |
495 sam_header_line_free(hlines->data); | |
496 hlines = hlines->next; | |
497 } | |
498 list_free(header); | |
499 } | |
500 | |
501 HeaderDict *sam_header_clone(const HeaderDict *dict) | |
502 { | |
503 HeaderDict *out = NULL; | |
504 while (dict) | |
505 { | |
506 HeaderLine *hline = dict->data; | |
507 out = list_append(out, sam_header_line_clone(hline)); | |
508 dict = dict->next; | |
509 } | |
510 return out; | |
511 } | |
512 | |
513 // Returns a newly allocated string | |
514 char *sam_header_write(const void *_header) | |
515 { | |
516 const HeaderDict *header = (const HeaderDict*)_header; | |
517 char *out = NULL; | |
518 int len=0, nout=0; | |
519 const list_t *hlines; | |
520 | |
521 // Calculate the length of the string to allocate | |
522 hlines = header; | |
523 while (hlines) | |
524 { | |
525 len += 4; // @XY and \n | |
526 | |
527 HeaderLine *hline = hlines->data; | |
528 list_t *tags = hline->tags; | |
529 while (tags) | |
530 { | |
531 HeaderTag *tag = tags->data; | |
532 len += strlen(tag->value) + 1; // \t | |
533 if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) | |
534 len += strlen(tag->value) + 3; // XY: | |
535 tags = tags->next; | |
536 } | |
537 hlines = hlines->next; | |
538 } | |
539 | |
540 nout = 0; | |
541 out = malloc(len+1); | |
542 hlines = header; | |
543 while (hlines) | |
544 { | |
545 HeaderLine *hline = hlines->data; | |
546 | |
547 nout += sprintf(out+nout,"@%c%c",hline->type[0],hline->type[1]); | |
548 | |
549 list_t *tags = hline->tags; | |
550 while (tags) | |
551 { | |
552 HeaderTag *tag = tags->data; | |
553 nout += sprintf(out+nout,"\t"); | |
554 if ( tag->key[0]!=' ' || tag->key[1]!=' ' ) | |
555 nout += sprintf(out+nout,"%c%c:", tag->key[0],tag->key[1]); | |
556 nout += sprintf(out+nout,"%s", tag->value); | |
557 tags = tags->next; | |
558 } | |
559 hlines = hlines->next; | |
560 nout += sprintf(out+nout,"\n"); | |
561 } | |
562 out[len] = 0; | |
563 return out; | |
564 } | |
565 | |
566 void *sam_header_parse2(const char *headerText) | |
567 { | |
568 list_t *hlines = NULL; | |
569 HeaderLine *hline; | |
570 const char *text; | |
571 char *buf=NULL; | |
572 size_t nbuf = 0; | |
573 int tovalidate = 0; | |
574 | |
575 if ( !headerText ) | |
576 return 0; | |
577 | |
578 text = headerText; | |
579 while ( (text=nextline(&buf, &nbuf, text)) ) | |
580 { | |
581 hline = sam_header_line_parse(buf); | |
582 if ( hline && (!tovalidate || sam_header_line_validate(hline)) ) | |
583 // With too many (~250,000) reference sequences the header parsing was too slow with list_append. | |
584 hlines = list_append_to_end(hlines, hline); | |
585 else | |
586 { | |
587 if (hline) sam_header_line_free(hline); | |
588 sam_header_free(hlines); | |
589 if ( buf ) free(buf); | |
590 return NULL; | |
591 } | |
592 } | |
593 if ( buf ) free(buf); | |
594 | |
595 return hlines; | |
596 } | |
597 | |
598 void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2]) | |
599 { | |
600 const HeaderDict *dict = (const HeaderDict*)_dict; | |
601 const list_t *l = dict; | |
602 khash_t(str) *tbl = kh_init(str); | |
603 khiter_t k; | |
604 int ret; | |
605 | |
606 if (_dict == 0) return tbl; // return an empty (not null) hash table | |
607 while (l) | |
608 { | |
609 HeaderLine *hline = l->data; | |
610 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) | |
611 { | |
612 l = l->next; | |
613 continue; | |
614 } | |
615 | |
616 HeaderTag *key, *value; | |
617 key = header_line_has_tag(hline,key_tag); | |
618 value = header_line_has_tag(hline,value_tag); | |
619 if ( !key || !value ) | |
620 { | |
621 l = l->next; | |
622 continue; | |
623 } | |
624 | |
625 k = kh_get(str, tbl, key->value); | |
626 if ( k != kh_end(tbl) ) | |
627 debug("[sam_header_lookup_table] They key %s not unique.\n", key->value); | |
628 k = kh_put(str, tbl, key->value, &ret); | |
629 kh_value(tbl, k) = value->value; | |
630 | |
631 l = l->next; | |
632 } | |
633 return tbl; | |
634 } | |
635 | |
636 char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n) | |
637 { | |
638 const HeaderDict *dict = (const HeaderDict*)_dict; | |
639 const list_t *l = dict; | |
640 int max, n; | |
641 char **ret; | |
642 | |
643 ret = 0; *_n = max = n = 0; | |
644 while (l) | |
645 { | |
646 HeaderLine *hline = l->data; | |
647 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) | |
648 { | |
649 l = l->next; | |
650 continue; | |
651 } | |
652 | |
653 HeaderTag *key; | |
654 key = header_line_has_tag(hline,key_tag); | |
655 if ( !key ) | |
656 { | |
657 l = l->next; | |
658 continue; | |
659 } | |
660 | |
661 if (n == max) { | |
662 max = max? max<<1 : 4; | |
663 ret = realloc(ret, max * sizeof(void*)); | |
664 } | |
665 ret[n++] = key->value; | |
666 | |
667 l = l->next; | |
668 } | |
669 *_n = n; | |
670 return ret; | |
671 } | |
672 | |
673 void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **_key, const char **_value) | |
674 { | |
675 list_t *l = iter; | |
676 if ( !l ) return NULL; | |
677 | |
678 while (l) | |
679 { | |
680 HeaderLine *hline = l->data; | |
681 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) | |
682 { | |
683 l = l->next; | |
684 continue; | |
685 } | |
686 | |
687 HeaderTag *key, *value; | |
688 key = header_line_has_tag(hline,key_tag); | |
689 value = header_line_has_tag(hline,value_tag); | |
690 if ( !key && !value ) | |
691 { | |
692 l = l->next; | |
693 continue; | |
694 } | |
695 | |
696 *_key = key->value; | |
697 *_value = value->value; | |
698 return l->next; | |
699 } | |
700 return l; | |
701 } | |
702 | |
703 const char *sam_tbl_get(void *h, const char *key) | |
704 { | |
705 khash_t(str) *tbl = (khash_t(str)*)h; | |
706 khint_t k; | |
707 k = kh_get(str, tbl, key); | |
708 return k == kh_end(tbl)? 0 : kh_val(tbl, k); | |
709 } | |
710 | |
711 int sam_tbl_size(void *h) | |
712 { | |
713 khash_t(str) *tbl = (khash_t(str)*)h; | |
714 return h? kh_size(tbl) : 0; | |
715 } | |
716 | |
717 void sam_tbl_destroy(void *h) | |
718 { | |
719 khash_t(str) *tbl = (khash_t(str)*)h; | |
720 kh_destroy(str, tbl); | |
721 } | |
722 | |
723 void *sam_header_merge(int n, const void **_dicts) | |
724 { | |
725 const HeaderDict **dicts = (const HeaderDict**)_dicts; | |
726 HeaderDict *out_dict; | |
727 int idict, status; | |
728 | |
729 if ( n<2 ) return NULL; | |
730 | |
731 out_dict = sam_header_clone(dicts[0]); | |
732 | |
733 for (idict=1; idict<n; idict++) | |
734 { | |
735 const list_t *tmpl_hlines = dicts[idict]; | |
736 | |
737 while ( tmpl_hlines ) | |
738 { | |
739 list_t *out_hlines = out_dict; | |
740 int inserted = 0; | |
741 while ( out_hlines ) | |
742 { | |
743 status = sam_header_compare_lines(tmpl_hlines->data, out_hlines->data); | |
744 if ( status==0 ) | |
745 { | |
746 out_hlines = out_hlines->next; | |
747 continue; | |
748 } | |
749 | |
750 if ( status==2 ) | |
751 { | |
752 print_header_line(stderr,tmpl_hlines->data); | |
753 print_header_line(stderr,out_hlines->data); | |
754 debug("Conflicting lines, cannot merge the headers.\n"); | |
755 return 0; | |
756 } | |
757 if ( status==3 ) | |
758 sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data); | |
759 | |
760 inserted = 1; | |
761 break; | |
762 } | |
763 if ( !inserted ) | |
764 out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data)); | |
765 | |
766 tmpl_hlines = tmpl_hlines->next; | |
767 } | |
768 } | |
769 | |
770 return out_dict; | |
771 } | |
772 | |
773 char **sam_header2tbl_n(const void *dict, const char type[2], const char *tags[], int *n) | |
774 { | |
775 int nout = 0; | |
776 char **out = NULL; | |
777 | |
778 *n = 0; | |
779 list_t *l = (list_t *)dict; | |
780 if ( !l ) return NULL; | |
781 | |
782 int i, ntags = 0; | |
783 while ( tags[ntags] ) ntags++; | |
784 | |
785 while (l) | |
786 { | |
787 HeaderLine *hline = l->data; | |
788 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) | |
789 { | |
790 l = l->next; | |
791 continue; | |
792 } | |
793 out = (char**) realloc(out, sizeof(char*)*(nout+1)*ntags); | |
794 for (i=0; i<ntags; i++) | |
795 { | |
796 HeaderTag *key = header_line_has_tag(hline, tags[i]); | |
797 if ( !key ) | |
798 { | |
799 out[nout*ntags+i] = NULL; | |
800 continue; | |
801 } | |
802 out[nout*ntags+i] = key->value; | |
803 } | |
804 nout++; | |
805 l = l->next; | |
806 } | |
807 *n = nout; | |
808 return out; | |
809 } | |
810 |