0
|
1 #include "sam_header.h"
|
|
2 #include <stdio.h>
|
|
3 #include <string.h>
|
|
4 #include <ctype.h>
|
|
5 #include <stdlib.h>
|
|
6 #include <stdarg.h>
|
|
7
|
|
8 #include "khash.h"
|
|
9 KHASH_MAP_INIT_STR(str, const char *)
|
|
10
|
|
11 struct _HeaderList
|
|
12 {
|
|
13 struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only.
|
|
14 struct _HeaderList *next;
|
|
15 void *data;
|
|
16 };
|
|
17 typedef struct _HeaderList list_t;
|
|
18 typedef list_t HeaderDict;
|
|
19
|
|
20 typedef struct
|
|
21 {
|
|
22 char key[2];
|
|
23 char *value;
|
|
24 }
|
|
25 HeaderTag;
|
|
26
|
|
27 typedef struct
|
|
28 {
|
|
29 char type[2];
|
|
30 list_t *tags;
|
|
31 }
|
|
32 HeaderLine;
|
|
33
|
|
34 const char *o_hd_tags[] = {"SO","GO",NULL};
|
|
35 const char *r_hd_tags[] = {"VN",NULL};
|
|
36
|
|
37 const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL};
|
|
38 const char *r_sq_tags[] = {"SN","LN",NULL};
|
|
39 const char *u_sq_tags[] = {"SN",NULL};
|
|
40
|
|
41 const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL};
|
|
42 const char *r_rg_tags[] = {"ID",NULL};
|
|
43 const char *u_rg_tags[] = {"ID",NULL};
|
|
44
|
|
45 const char *o_pg_tags[] = {"VN","CL",NULL};
|
|
46 const char *r_pg_tags[] = {"ID",NULL};
|
|
47
|
|
48 const char *types[] = {"HD","SQ","RG","PG","CO",NULL};
|
|
49 const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL};
|
|
50 const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL};
|
|
51 const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL};
|
|
52
|
|
53
|
|
54 static void debug(const char *format, ...)
|
|
55 {
|
|
56 va_list ap;
|
|
57 va_start(ap, format);
|
|
58 vfprintf(stderr, format, ap);
|
|
59 va_end(ap);
|
|
60 }
|
|
61
|
|
62 #if 0
|
|
63 // Replaced by list_append_to_end
|
|
64 static list_t *list_prepend(list_t *root, void *data)
|
|
65 {
|
|
66 list_t *l = malloc(sizeof(list_t));
|
|
67 l->next = root;
|
|
68 l->data = data;
|
|
69 return l;
|
|
70 }
|
|
71 #endif
|
|
72
|
|
73 // Relies on the root->last being correct. Do not use with the other list_*
|
|
74 // routines unless they are fixed to modify root->last as well.
|
|
75 static list_t *list_append_to_end(list_t *root, void *data)
|
|
76 {
|
|
77 list_t *l = malloc(sizeof(list_t));
|
|
78 l->last = l;
|
|
79 l->next = NULL;
|
|
80 l->data = data;
|
|
81
|
|
82 if ( !root )
|
|
83 return l;
|
|
84
|
|
85 root->last->next = l;
|
|
86 root->last = l;
|
|
87 return root;
|
|
88 }
|
|
89
|
|
90 static list_t *list_append(list_t *root, void *data)
|
|
91 {
|
|
92 list_t *l = root;
|
|
93 while (l && l->next)
|
|
94 l = l->next;
|
|
95 if ( l )
|
|
96 {
|
|
97 l->next = malloc(sizeof(list_t));
|
|
98 l = l->next;
|
|
99 }
|
|
100 else
|
|
101 {
|
|
102 l = malloc(sizeof(list_t));
|
|
103 root = l;
|
|
104 }
|
|
105 l->data = data;
|
|
106 l->next = NULL;
|
|
107 return root;
|
|
108 }
|
|
109
|
|
110 static void list_free(list_t *root)
|
|
111 {
|
|
112 list_t *l = root;
|
|
113 while (root)
|
|
114 {
|
|
115 l = root;
|
|
116 root = root->next;
|
|
117 free(l);
|
|
118 }
|
|
119 }
|
|
120
|
|
121
|
|
122
|
|
123 // Look for a tag "XY" in a predefined const char *[] array.
|
|
124 static int tag_exists(const char *tag, const char **tags)
|
|
125 {
|
|
126 int itag=0;
|
|
127 if ( !tags ) return -1;
|
|
128 while ( tags[itag] )
|
|
129 {
|
|
130 if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag;
|
|
131 itag++;
|
|
132 }
|
|
133 return -1;
|
|
134 }
|
|
135
|
|
136
|
|
137
|
|
138 // Mimics the behaviour of getline, except it returns pointer to the next chunk of the text
|
|
139 // or NULL if everything has been read. The lineptr should be freed by the caller. The
|
|
140 // newline character is stripped.
|
|
141 static const char *nextline(char **lineptr, size_t *n, const char *text)
|
|
142 {
|
|
143 int len;
|
|
144 const char *to = text;
|
|
145
|
|
146 if ( !*to ) return NULL;
|
|
147
|
|
148 while ( *to && *to!='\n' && *to!='\r' ) to++;
|
|
149 len = to - text + 1;
|
|
150
|
|
151 if ( *to )
|
|
152 {
|
|
153 // Advance the pointer for the next call
|
|
154 if ( *to=='\n' ) to++;
|
|
155 else if ( *to=='\r' && *(to+1)=='\n' ) to+=2;
|
|
156 }
|
|
157 if ( !len )
|
|
158 return to;
|
|
159
|
|
160 if ( !*lineptr )
|
|
161 {
|
|
162 *lineptr = malloc(len);
|
|
163 *n = len;
|
|
164 }
|
|
165 else if ( *n<len )
|
|
166 {
|
|
167 *lineptr = realloc(*lineptr, len);
|
|
168 *n = len;
|
|
169 }
|
|
170 if ( !*lineptr ) {
|
|
171 debug("[nextline] Insufficient memory!\n");
|
|
172 return 0;
|
|
173 }
|
|
174
|
|
175 memcpy(*lineptr,text,len);
|
|
176 (*lineptr)[len-1] = 0;
|
|
177
|
|
178 return to;
|
|
179 }
|
|
180
|
|
181 // name points to "XY", value_from points to the first character of the value string and
|
|
182 // value_to points to the last character of the value string.
|
|
183 static HeaderTag *new_tag(const char *name, const char *value_from, const char *value_to)
|
|
184 {
|
|
185 HeaderTag *tag = malloc(sizeof(HeaderTag));
|
|
186 int len = value_to-value_from+1;
|
|
187
|
|
188 tag->key[0] = name[0];
|
|
189 tag->key[1] = name[1];
|
|
190 tag->value = malloc(len+1);
|
|
191 memcpy(tag->value,value_from,len+1);
|
|
192 tag->value[len] = 0;
|
|
193 return tag;
|
|
194 }
|
|
195
|
|
196 static HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key)
|
|
197 {
|
|
198 list_t *tags = hline->tags;
|
|
199 while (tags)
|
|
200 {
|
|
201 HeaderTag *tag = tags->data;
|
|
202 if ( tag->key[0]==key[0] && tag->key[1]==key[1] ) return tag;
|
|
203 tags = tags->next;
|
|
204 }
|
|
205 return NULL;
|
|
206 }
|
|
207
|
|
208
|
|
209 // Return codes:
|
|
210 // 0 .. different types or unique tags differ or conflicting tags, cannot be merged
|
|
211 // 1 .. all tags identical -> no need to merge, drop one
|
|
212 // 2 .. the unique tags match and there are some conflicting tags (same tag, different value) -> error, cannot be merged nor duplicated
|
|
213 // 3 .. there are some missing complementary tags and no unique conflict -> can be merged into a single line
|
|
214 static int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2)
|
|
215 {
|
|
216 HeaderTag *t1, *t2;
|
|
217
|
|
218 if ( hline1->type[0]!=hline2->type[0] || hline1->type[1]!=hline2->type[1] )
|
|
219 return 0;
|
|
220
|
|
221 int itype = tag_exists(hline1->type,types);
|
|
222 if ( itype==-1 ) {
|
|
223 debug("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]);
|
|
224 return -1; // FIXME (lh3): error; I do not know how this will be handled in Petr's code
|
|
225 }
|
|
226
|
|
227 if ( unique_tags[itype] )
|
|
228 {
|
|
229 t1 = header_line_has_tag(hline1,unique_tags[itype][0]);
|
|
230 t2 = header_line_has_tag(hline2,unique_tags[itype][0]);
|
|
231 if ( !t1 || !t2 ) // this should never happen, the unique tags are required
|
|
232 return 2;
|
|
233
|
|
234 if ( strcmp(t1->value,t2->value) )
|
|
235 return 0; // the unique tags differ, cannot be merged
|
|
236 }
|
|
237 if ( !required_tags[itype] && !optional_tags[itype] )
|
|
238 {
|
|
239 t1 = hline1->tags->data;
|
|
240 t2 = hline2->tags->data;
|
|
241 if ( !strcmp(t1->value,t2->value) ) return 1; // identical comments
|
|
242 return 0;
|
|
243 }
|
|
244
|
|
245 int missing=0, itag=0;
|
|
246 while ( required_tags[itype] && required_tags[itype][itag] )
|
|
247 {
|
|
248 t1 = header_line_has_tag(hline1,required_tags[itype][itag]);
|
|
249 t2 = header_line_has_tag(hline2,required_tags[itype][itag]);
|
|
250 if ( !t1 && !t2 )
|
|
251 return 2; // this should never happen
|
|
252 else if ( !t1 || !t2 )
|
|
253 missing = 1; // there is some tag missing in one of the hlines
|
|
254 else if ( strcmp(t1->value,t2->value) )
|
|
255 {
|
|
256 if ( unique_tags[itype] )
|
|
257 return 2; // the lines have a matching unique tag but have a conflicting tag
|
|
258
|
|
259 return 0; // the lines contain conflicting tags, cannot be merged
|
|
260 }
|
|
261 itag++;
|
|
262 }
|
|
263 itag = 0;
|
|
264 while ( optional_tags[itype] && optional_tags[itype][itag] )
|
|
265 {
|
|
266 t1 = header_line_has_tag(hline1,optional_tags[itype][itag]);
|
|
267 t2 = header_line_has_tag(hline2,optional_tags[itype][itag]);
|
|
268 if ( !t1 && !t2 )
|
|
269 {
|
|
270 itag++;
|
|
271 continue;
|
|
272 }
|
|
273 if ( !t1 || !t2 )
|
|
274 missing = 1; // there is some tag missing in one of the hlines
|
|
275 else if ( strcmp(t1->value,t2->value) )
|
|
276 {
|
|
277 if ( unique_tags[itype] )
|
|
278 return 2; // the lines have a matching unique tag but have a conflicting tag
|
|
279
|
|
280 return 0; // the lines contain conflicting tags, cannot be merged
|
|
281 }
|
|
282 itag++;
|
|
283 }
|
|
284 if ( missing ) return 3; // there are some missing complementary tags with no conflicts, can be merged
|
|
285 return 1;
|
|
286 }
|
|
287
|
|
288
|
|
289 static HeaderLine *sam_header_line_clone(const HeaderLine *hline)
|
|
290 {
|
|
291 list_t *tags;
|
|
292 HeaderLine *out = malloc(sizeof(HeaderLine));
|
|
293 out->type[0] = hline->type[0];
|
|
294 out->type[1] = hline->type[1];
|
|
295 out->tags = NULL;
|
|
296
|
|
297 tags = hline->tags;
|
|
298 while (tags)
|
|
299 {
|
|
300 HeaderTag *old = tags->data;
|
|
301
|
|
302 HeaderTag *new = malloc(sizeof(HeaderTag));
|
|
303 new->key[0] = old->key[0];
|
|
304 new->key[1] = old->key[1];
|
|
305 new->value = strdup(old->value);
|
|
306 out->tags = list_append(out->tags, new);
|
|
307
|
|
308 tags = tags->next;
|
|
309 }
|
|
310 return out;
|
|
311 }
|
|
312
|
|
313 static int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline)
|
|
314 {
|
|
315 list_t *tmpl_tags;
|
|
316
|
|
317 if ( out_hline->type[0]!=tmpl_hline->type[0] || out_hline->type[1]!=tmpl_hline->type[1] )
|
|
318 return 0;
|
|
319
|
|
320 tmpl_tags = tmpl_hline->tags;
|
|
321 while (tmpl_tags)
|
|
322 {
|
|
323 HeaderTag *tmpl_tag = tmpl_tags->data;
|
|
324 HeaderTag *out_tag = header_line_has_tag(out_hline, tmpl_tag->key);
|
|
325 if ( !out_tag )
|
|
326 {
|
|
327 HeaderTag *tag = malloc(sizeof(HeaderTag));
|
|
328 tag->key[0] = tmpl_tag->key[0];
|
|
329 tag->key[1] = tmpl_tag->key[1];
|
|
330 tag->value = strdup(tmpl_tag->value);
|
|
331 out_hline->tags = list_append(out_hline->tags,tag);
|
|
332 }
|
|
333 tmpl_tags = tmpl_tags->next;
|
|
334 }
|
|
335 return 1;
|
|
336 }
|
|
337
|
|
338
|
|
339 static HeaderLine *sam_header_line_parse(const char *headerLine)
|
|
340 {
|
|
341 HeaderLine *hline;
|
|
342 HeaderTag *tag;
|
|
343 const char *from, *to;
|
|
344 from = headerLine;
|
|
345
|
|
346 if ( *from != '@' ) {
|
|
347 debug("[sam_header_line_parse] expected '@', got [%s]\n", headerLine);
|
|
348 return 0;
|
|
349 }
|
|
350 to = ++from;
|
|
351
|
|
352 while (*to && *to!='\t') to++;
|
|
353 if ( to-from != 2 ) {
|
|
354 debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine);
|
|
355 return 0;
|
|
356 }
|
|
357
|
|
358 hline = malloc(sizeof(HeaderLine));
|
|
359 hline->type[0] = from[0];
|
|
360 hline->type[1] = from[1];
|
|
361 hline->tags = NULL;
|
|
362
|
|
363 int itype = tag_exists(hline->type, types);
|
|
364
|
|
365 from = to;
|
|
366 while (*to && *to=='\t') to++;
|
|
367 if ( to-from != 1 ) {
|
|
368 debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
|
|
369 return 0;
|
|
370 }
|
|
371 from = to;
|
|
372 while (*from)
|
|
373 {
|
|
374 while (*to && *to!='\t') to++;
|
|
375
|
|
376 if ( !required_tags[itype] && !optional_tags[itype] )
|
|
377 {
|
|
378 // CO is a special case, it can contain anything, including tabs
|
|
379 if ( *to ) { to++; continue; }
|
|
380 tag = new_tag(" ",from,to-1);
|
|
381 }
|
|
382 else
|
|
383 tag = new_tag(from,from+3,to-1);
|
|
384
|
|
385 if ( header_line_has_tag(hline,tag->key) )
|
|
386 debug("The tag '%c%c' present (at least) twice on line [%s]\n", tag->key[0],tag->key[1], headerLine);
|
|
387 hline->tags = list_append(hline->tags, tag);
|
|
388
|
|
389 from = to;
|
|
390 while (*to && *to=='\t') to++;
|
|
391 if ( *to && to-from != 1 ) {
|
|
392 debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
|
|
393 return 0;
|
|
394 }
|
|
395
|
|
396 from = to;
|
|
397 }
|
|
398 return hline;
|
|
399 }
|
|
400
|
|
401
|
|
402 // Must be of an existing type, all tags must be recognised and all required tags must be present
|
|
403 static int sam_header_line_validate(HeaderLine *hline)
|
|
404 {
|
|
405 list_t *tags;
|
|
406 HeaderTag *tag;
|
|
407 int itype, itag;
|
|
408
|
|
409 // Is the type correct?
|
|
410 itype = tag_exists(hline->type, types);
|
|
411 if ( itype==-1 )
|
|
412 {
|
|
413 debug("The type [%c%c] not recognised.\n", hline->type[0],hline->type[1]);
|
|
414 return 0;
|
|
415 }
|
|
416
|
|
417 // Has all required tags?
|
|
418 itag = 0;
|
|
419 while ( required_tags[itype] && required_tags[itype][itag] )
|
|
420 {
|
|
421 if ( !header_line_has_tag(hline,required_tags[itype][itag]) )
|
|
422 {
|
|
423 debug("The tag [%c%c] required for [%c%c] not present.\n", required_tags[itype][itag][0],required_tags[itype][itag][1],
|
|
424 hline->type[0],hline->type[1]);
|
|
425 return 0;
|
|
426 }
|
|
427 itag++;
|
|
428 }
|
|
429
|
|
430 // Are all tags recognised?
|
|
431 tags = hline->tags;
|
|
432 while ( tags )
|
|
433 {
|
|
434 tag = tags->data;
|
|
435 if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) )
|
|
436 {
|
|
437 debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]);
|
|
438 return 0;
|
|
439 }
|
|
440 tags = tags->next;
|
|
441 }
|
|
442
|
|
443 return 1;
|
|
444 }
|
|
445
|
|
446
|
|
447 static void print_header_line(FILE *fp, HeaderLine *hline)
|
|
448 {
|
|
449 list_t *tags = hline->tags;
|
|
450 HeaderTag *tag;
|
|
451
|
|
452 fprintf(fp, "@%c%c", hline->type[0],hline->type[1]);
|
|
453 while (tags)
|
|
454 {
|
|
455 tag = tags->data;
|
|
456
|
|
457 fprintf(fp, "\t");
|
|
458 if ( tag->key[0]!=' ' || tag->key[1]!=' ' )
|
|
459 fprintf(fp, "%c%c:", tag->key[0],tag->key[1]);
|
|
460 fprintf(fp, "%s", tag->value);
|
|
461
|
|
462 tags = tags->next;
|
|
463 }
|
|
464 fprintf(fp,"\n");
|
|
465 }
|
|
466
|
|
467
|
|
468 static void sam_header_line_free(HeaderLine *hline)
|
|
469 {
|
|
470 list_t *tags = hline->tags;
|
|
471 while (tags)
|
|
472 {
|
|
473 HeaderTag *tag = tags->data;
|
|
474 free(tag->value);
|
|
475 free(tag);
|
|
476 tags = tags->next;
|
|
477 }
|
|
478 list_free(hline->tags);
|
|
479 free(hline);
|
|
480 }
|
|
481
|
|
482 void sam_header_free(void *_header)
|
|
483 {
|
|
484 HeaderDict *header = (HeaderDict*)_header;
|
|
485 list_t *hlines = header;
|
|
486 while (hlines)
|
|
487 {
|
|
488 sam_header_line_free(hlines->data);
|
|
489 hlines = hlines->next;
|
|
490 }
|
|
491 list_free(header);
|
|
492 }
|
|
493
|
|
494 HeaderDict *sam_header_clone(const HeaderDict *dict)
|
|
495 {
|
|
496 HeaderDict *out = NULL;
|
|
497 while (dict)
|
|
498 {
|
|
499 HeaderLine *hline = dict->data;
|
|
500 out = list_append(out, sam_header_line_clone(hline));
|
|
501 dict = dict->next;
|
|
502 }
|
|
503 return out;
|
|
504 }
|
|
505
|
|
506 // Returns a newly allocated string
|
|
507 char *sam_header_write(const void *_header)
|
|
508 {
|
|
509 const HeaderDict *header = (const HeaderDict*)_header;
|
|
510 char *out = NULL;
|
|
511 int len=0, nout=0;
|
|
512 const list_t *hlines;
|
|
513
|
|
514 // Calculate the length of the string to allocate
|
|
515 hlines = header;
|
|
516 while (hlines)
|
|
517 {
|
|
518 len += 4; // @XY and \n
|
|
519
|
|
520 HeaderLine *hline = hlines->data;
|
|
521 list_t *tags = hline->tags;
|
|
522 while (tags)
|
|
523 {
|
|
524 HeaderTag *tag = tags->data;
|
|
525 len += strlen(tag->value) + 1; // \t
|
|
526 if ( tag->key[0]!=' ' || tag->key[1]!=' ' )
|
|
527 len += strlen(tag->value) + 3; // XY:
|
|
528 tags = tags->next;
|
|
529 }
|
|
530 hlines = hlines->next;
|
|
531 }
|
|
532
|
|
533 nout = 0;
|
|
534 out = malloc(len+1);
|
|
535 hlines = header;
|
|
536 while (hlines)
|
|
537 {
|
|
538 HeaderLine *hline = hlines->data;
|
|
539
|
|
540 nout += sprintf(out+nout,"@%c%c",hline->type[0],hline->type[1]);
|
|
541
|
|
542 list_t *tags = hline->tags;
|
|
543 while (tags)
|
|
544 {
|
|
545 HeaderTag *tag = tags->data;
|
|
546 nout += sprintf(out+nout,"\t");
|
|
547 if ( tag->key[0]!=' ' || tag->key[1]!=' ' )
|
|
548 nout += sprintf(out+nout,"%c%c:", tag->key[0],tag->key[1]);
|
|
549 nout += sprintf(out+nout,"%s", tag->value);
|
|
550 tags = tags->next;
|
|
551 }
|
|
552 hlines = hlines->next;
|
|
553 nout += sprintf(out+nout,"\n");
|
|
554 }
|
|
555 out[len] = 0;
|
|
556 return out;
|
|
557 }
|
|
558
|
|
559 void *sam_header_parse2(const char *headerText)
|
|
560 {
|
|
561 list_t *hlines = NULL;
|
|
562 HeaderLine *hline;
|
|
563 const char *text;
|
|
564 char *buf=NULL;
|
|
565 size_t nbuf = 0;
|
|
566 int tovalidate = 0;
|
|
567
|
|
568 if ( !headerText )
|
|
569 return 0;
|
|
570
|
|
571 text = headerText;
|
|
572 while ( (text=nextline(&buf, &nbuf, text)) )
|
|
573 {
|
|
574 hline = sam_header_line_parse(buf);
|
|
575 if ( hline && (!tovalidate || sam_header_line_validate(hline)) )
|
|
576 // With too many (~250,000) reference sequences the header parsing was too slow with list_append.
|
|
577 hlines = list_append_to_end(hlines, hline);
|
|
578 else
|
|
579 {
|
|
580 if (hline) sam_header_line_free(hline);
|
|
581 sam_header_free(hlines);
|
|
582 if ( buf ) free(buf);
|
|
583 return NULL;
|
|
584 }
|
|
585 }
|
|
586 if ( buf ) free(buf);
|
|
587
|
|
588 return hlines;
|
|
589 }
|
|
590
|
|
591 void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2])
|
|
592 {
|
|
593 const HeaderDict *dict = (const HeaderDict*)_dict;
|
|
594 const list_t *l = dict;
|
|
595 khash_t(str) *tbl = kh_init(str);
|
|
596 khiter_t k;
|
|
597 int ret;
|
|
598
|
|
599 if (_dict == 0) return tbl; // return an empty (not null) hash table
|
|
600 while (l)
|
|
601 {
|
|
602 HeaderLine *hline = l->data;
|
|
603 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] )
|
|
604 {
|
|
605 l = l->next;
|
|
606 continue;
|
|
607 }
|
|
608
|
|
609 HeaderTag *key, *value;
|
|
610 key = header_line_has_tag(hline,key_tag);
|
|
611 value = header_line_has_tag(hline,value_tag);
|
|
612 if ( !key || !value )
|
|
613 {
|
|
614 l = l->next;
|
|
615 continue;
|
|
616 }
|
|
617
|
|
618 k = kh_get(str, tbl, key->value);
|
|
619 if ( k != kh_end(tbl) )
|
|
620 debug("[sam_header_lookup_table] They key %s not unique.\n", key->value);
|
|
621 k = kh_put(str, tbl, key->value, &ret);
|
|
622 kh_value(tbl, k) = value->value;
|
|
623
|
|
624 l = l->next;
|
|
625 }
|
|
626 return tbl;
|
|
627 }
|
|
628
|
|
629 char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n)
|
|
630 {
|
|
631 const HeaderDict *dict = (const HeaderDict*)_dict;
|
|
632 const list_t *l = dict;
|
|
633 int max, n;
|
|
634 char **ret;
|
|
635
|
|
636 ret = 0; *_n = max = n = 0;
|
|
637 while (l)
|
|
638 {
|
|
639 HeaderLine *hline = l->data;
|
|
640 if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] )
|
|
641 {
|
|
642 l = l->next;
|
|
643 continue;
|
|
644 }
|
|
645
|
|
646 HeaderTag *key;
|
|
647 key = header_line_has_tag(hline,key_tag);
|
|
648 if ( !key )
|
|
649 {
|
|
650 l = l->next;
|
|
651 continue;
|
|
652 }
|
|
653
|
|
654 if (n == max) {
|
|
655 max = max? max<<1 : 4;
|
|
656 ret = realloc(ret, max * sizeof(void*));
|
|
657 }
|
|
658 ret[n++] = key->value;
|
|
659
|
|
660 l = l->next;
|
|
661 }
|
|
662 *_n = n;
|
|
663 return ret;
|
|
664 }
|
|
665
|
|
666 const char *sam_tbl_get(void *h, const char *key)
|
|
667 {
|
|
668 khash_t(str) *tbl = (khash_t(str)*)h;
|
|
669 khint_t k;
|
|
670 k = kh_get(str, tbl, key);
|
|
671 return k == kh_end(tbl)? 0 : kh_val(tbl, k);
|
|
672 }
|
|
673
|
|
674 int sam_tbl_size(void *h)
|
|
675 {
|
|
676 khash_t(str) *tbl = (khash_t(str)*)h;
|
|
677 return h? kh_size(tbl) : 0;
|
|
678 }
|
|
679
|
|
680 void sam_tbl_destroy(void *h)
|
|
681 {
|
|
682 khash_t(str) *tbl = (khash_t(str)*)h;
|
|
683 kh_destroy(str, tbl);
|
|
684 }
|
|
685
|
|
686 void *sam_header_merge(int n, const void **_dicts)
|
|
687 {
|
|
688 const HeaderDict **dicts = (const HeaderDict**)_dicts;
|
|
689 HeaderDict *out_dict;
|
|
690 int idict, status;
|
|
691
|
|
692 if ( n<2 ) return NULL;
|
|
693
|
|
694 out_dict = sam_header_clone(dicts[0]);
|
|
695
|
|
696 for (idict=1; idict<n; idict++)
|
|
697 {
|
|
698 const list_t *tmpl_hlines = dicts[idict];
|
|
699
|
|
700 while ( tmpl_hlines )
|
|
701 {
|
|
702 list_t *out_hlines = out_dict;
|
|
703 int inserted = 0;
|
|
704 while ( out_hlines )
|
|
705 {
|
|
706 status = sam_header_compare_lines(tmpl_hlines->data, out_hlines->data);
|
|
707 if ( status==0 )
|
|
708 {
|
|
709 out_hlines = out_hlines->next;
|
|
710 continue;
|
|
711 }
|
|
712
|
|
713 if ( status==2 )
|
|
714 {
|
|
715 print_header_line(stderr,tmpl_hlines->data);
|
|
716 print_header_line(stderr,out_hlines->data);
|
|
717 debug("Conflicting lines, cannot merge the headers.\n");
|
|
718 return 0;
|
|
719 }
|
|
720 if ( status==3 )
|
|
721 sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data);
|
|
722
|
|
723 inserted = 1;
|
|
724 break;
|
|
725 }
|
|
726 if ( !inserted )
|
|
727 out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data));
|
|
728
|
|
729 tmpl_hlines = tmpl_hlines->next;
|
|
730 }
|
|
731 }
|
|
732
|
|
733 return out_dict;
|
|
734 }
|
|
735
|
|
736
|