0
|
1 /*
|
|
2 Copyright (c) 2013 Genome Research Ltd.
|
|
3 Author: James Bonfield <jkb@sanger.ac.uk>
|
|
4
|
|
5 Redistribution and use in source and binary forms, with or without
|
|
6 modification, are permitted provided that the following conditions are met:
|
|
7
|
|
8 1. Redistributions of source code must retain the above copyright notice,
|
|
9 this list of conditions and the following disclaimer.
|
|
10
|
|
11 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12 this list of conditions and the following disclaimer in the documentation
|
|
13 and/or other materials provided with the distribution.
|
|
14
|
|
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
|
|
16 Institute nor the names of its contributors may be used to endorse or promote
|
|
17 products derived from this software without specific prior written permission.
|
|
18
|
|
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
|
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
|
|
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31 #ifdef HAVE_CONFIG_H
|
|
32 #include "io_lib_config.h"
|
|
33 #endif
|
|
34
|
|
35 #include <string.h>
|
|
36 #include <assert.h>
|
|
37
|
|
38 #include "cram/sam_header.h"
|
|
39 #include "cram/string_alloc.h"
|
|
40
|
|
41 static void sam_hdr_error(char *msg, char *line, int len, int lno) {
|
|
42 int j;
|
|
43
|
|
44 for (j = 0; j < len && line[j] != '\n'; j++)
|
|
45 ;
|
|
46 fprintf(stderr, "%s at line %d: \"%.*s\"\n", msg, lno, j, line);
|
|
47 }
|
|
48
|
|
49 void sam_hdr_dump(SAM_hdr *hdr) {
|
|
50 khint_t k;
|
|
51 int i;
|
|
52
|
|
53 printf("===DUMP===\n");
|
|
54 for (k = kh_begin(hdr->h); k != kh_end(hdr->h); k++) {
|
|
55 SAM_hdr_type *t1, *t2;
|
|
56 char c[2];
|
|
57
|
|
58 if (!kh_exist(hdr->h, k))
|
|
59 continue;
|
|
60
|
|
61 t1 = t2 = kh_val(hdr->h, k);
|
|
62 c[0] = kh_key(hdr->h, k)>>8;
|
|
63 c[1] = kh_key(hdr->h, k)&0xff;
|
|
64 printf("Type %.2s, count %d\n", c, t1->prev->order+1);
|
|
65
|
|
66 do {
|
|
67 SAM_hdr_tag *tag;
|
|
68 printf(">>>%d ", t1->order);
|
|
69 for (tag = t1->tag; tag; tag=tag->next) {
|
|
70 printf("\"%.2s\":\"%.*s\"\t",
|
|
71 tag->str, tag->len-3, tag->str+3);
|
|
72 }
|
|
73 putchar('\n');
|
|
74 t1 = t1->next;
|
|
75 } while (t1 != t2);
|
|
76 }
|
|
77
|
|
78 /* Dump out PG chains */
|
|
79 printf("\n@PG chains:\n");
|
|
80 for (i = 0; i < hdr->npg_end; i++) {
|
|
81 int j;
|
|
82 printf(" %d:", i);
|
|
83 for (j = hdr->pg_end[i]; j != -1; j = hdr->pg[j].prev_id) {
|
|
84 printf("%s%d(%.*s)",
|
|
85 j == hdr->pg_end[i] ? " " : "->",
|
|
86 j, hdr->pg[j].name_len, hdr->pg[j].name);
|
|
87 }
|
|
88 printf("\n");
|
|
89 }
|
|
90
|
|
91 puts("===END DUMP===");
|
|
92 }
|
|
93
|
|
94 /* Updates the hash tables in the SAM_hdr structure.
|
|
95 *
|
|
96 * Returns 0 on success;
|
|
97 * -1 on failure
|
|
98 */
|
|
99 static int sam_hdr_update_hashes(SAM_hdr *sh,
|
|
100 int type,
|
|
101 SAM_hdr_type *h_type) {
|
|
102 /* Add to reference hash? */
|
|
103 if ((type>>8) == 'S' && (type&0xff) == 'Q') {
|
|
104 SAM_hdr_tag *tag;
|
|
105 int nref = sh->nref;
|
|
106
|
|
107 sh->ref = realloc(sh->ref, (sh->nref+1)*sizeof(*sh->ref));
|
|
108 if (!sh->ref)
|
|
109 return -1;
|
|
110
|
|
111 tag = h_type->tag;
|
|
112 sh->ref[nref].name = NULL;
|
|
113 sh->ref[nref].len = 0;
|
|
114 sh->ref[nref].ty = h_type;
|
|
115 sh->ref[nref].tag = tag;
|
|
116
|
|
117 while (tag) {
|
|
118 if (tag->str[0] == 'S' && tag->str[1] == 'N') {
|
|
119 if (!(sh->ref[nref].name = malloc(tag->len)))
|
|
120 return -1;
|
|
121 strncpy(sh->ref[nref].name, tag->str+3, tag->len-3);
|
|
122 sh->ref[nref].name[tag->len-3] = 0;
|
|
123 } else if (tag->str[0] == 'L' && tag->str[1] == 'N') {
|
|
124 sh->ref[nref].len = atoi(tag->str+3);
|
|
125 }
|
|
126 tag = tag->next;
|
|
127 }
|
|
128
|
|
129 if (sh->ref[nref].name) {
|
|
130 khint_t k;
|
|
131 int r;
|
|
132 k = kh_put(m_s2i, sh->ref_hash, sh->ref[nref].name, &r);
|
|
133 if (-1 == r) return -1;
|
|
134 kh_val(sh->ref_hash, k) = nref;
|
|
135 }
|
|
136
|
|
137 sh->nref++;
|
|
138 }
|
|
139
|
|
140 /* Add to read-group hash? */
|
|
141 if ((type>>8) == 'R' && (type&0xff) == 'G') {
|
|
142 SAM_hdr_tag *tag;
|
|
143 int nrg = sh->nrg;
|
|
144
|
|
145 sh->rg = realloc(sh->rg, (sh->nrg+1)*sizeof(*sh->rg));
|
|
146 if (!sh->rg)
|
|
147 return -1;
|
|
148
|
|
149 tag = h_type->tag;
|
|
150 sh->rg[nrg].name = NULL;
|
|
151 sh->rg[nrg].name_len = 0;
|
|
152 sh->rg[nrg].ty = h_type;
|
|
153 sh->rg[nrg].tag = tag;
|
|
154 sh->rg[nrg].id = nrg;
|
|
155
|
|
156 while (tag) {
|
|
157 if (tag->str[0] == 'I' && tag->str[1] == 'D') {
|
|
158 if (!(sh->rg[nrg].name = malloc(tag->len)))
|
|
159 return -1;
|
|
160 strncpy(sh->rg[nrg].name, tag->str+3, tag->len-3);
|
|
161 sh->rg[nrg].name[tag->len-3] = 0;
|
|
162 sh->rg[nrg].name_len = strlen(sh->rg[nrg].name);
|
|
163 }
|
|
164 tag = tag->next;
|
|
165 }
|
|
166
|
|
167 if (sh->rg[nrg].name) {
|
|
168 khint_t k;
|
|
169 int r;
|
|
170 k = kh_put(m_s2i, sh->rg_hash, sh->rg[nrg].name, &r);
|
|
171 if (-1 == r) return -1;
|
|
172 kh_val(sh->rg_hash, k) = nrg;
|
|
173 }
|
|
174
|
|
175 sh->nrg++;
|
|
176 }
|
|
177
|
|
178 /* Add to program hash? */
|
|
179 if ((type>>8) == 'P' && (type&0xff) == 'G') {
|
|
180 SAM_hdr_tag *tag;
|
|
181 int npg = sh->npg;
|
|
182
|
|
183 sh->pg = realloc(sh->pg, (sh->npg+1)*sizeof(*sh->pg));
|
|
184 if (!sh->pg)
|
|
185 return -1;
|
|
186
|
|
187 tag = h_type->tag;
|
|
188 sh->pg[npg].name = NULL;
|
|
189 sh->pg[npg].name_len = 0;
|
|
190 sh->pg[npg].ty = h_type;
|
|
191 sh->pg[npg].tag = tag;
|
|
192 sh->pg[npg].id = npg;
|
|
193 sh->pg[npg].prev_id = -1;
|
|
194
|
|
195 while (tag) {
|
|
196 if (tag->str[0] == 'I' && tag->str[1] == 'D') {
|
|
197 if (!(sh->pg[npg].name = malloc(tag->len)))
|
|
198 return -1;
|
|
199 strncpy(sh->pg[npg].name, tag->str+3, tag->len-3);
|
|
200 sh->pg[npg].name[tag->len-3] = 0;
|
|
201 sh->pg[npg].name_len = strlen(sh->pg[npg].name);
|
|
202 } else if (tag->str[0] == 'P' && tag->str[1] == 'P') {
|
|
203 // Resolve later if needed
|
|
204 khint_t k;
|
|
205 char tmp = tag->str[tag->len]; tag->str[tag->len] = 0;
|
|
206 k = kh_get(m_s2i, sh->pg_hash, tag->str+3);
|
|
207 tag->str[tag->len] = tmp;
|
|
208
|
|
209 if (k != kh_end(sh->pg_hash)) {
|
|
210 int p_id = kh_val(sh->pg_hash, k);
|
|
211 sh->pg[npg].prev_id = sh->pg[p_id].id;
|
|
212
|
|
213 /* Unmark previous entry as a PG termination */
|
|
214 if (sh->npg_end > 0 &&
|
|
215 sh->pg_end[sh->npg_end-1] == p_id) {
|
|
216 sh->npg_end--;
|
|
217 } else {
|
|
218 int i;
|
|
219 for (i = 0; i < sh->npg_end; i++) {
|
|
220 if (sh->pg_end[i] == p_id) {
|
|
221 memmove(&sh->pg_end[i], &sh->pg_end[i+1],
|
|
222 (sh->npg_end-i-1)*sizeof(*sh->pg_end));
|
|
223 sh->npg_end--;
|
|
224 }
|
|
225 }
|
|
226 }
|
|
227 } else {
|
|
228 sh->pg[npg].prev_id = -1;
|
|
229 }
|
|
230 }
|
|
231 tag = tag->next;
|
|
232 }
|
|
233
|
|
234 if (sh->pg[npg].name) {
|
|
235 khint_t k;
|
|
236 int r;
|
|
237 k = kh_put(m_s2i, sh->pg_hash, sh->pg[npg].name, &r);
|
|
238 if (-1 == r) return -1;
|
|
239 kh_val(sh->pg_hash, k) = npg;
|
|
240 }
|
|
241
|
|
242 /* Add to npg_end[] array. Remove later if we find a PP line */
|
|
243 if (sh->npg_end >= sh->npg_end_alloc) {
|
|
244 sh->npg_end_alloc = sh->npg_end_alloc
|
|
245 ? sh->npg_end_alloc*2
|
|
246 : 4;
|
|
247 sh->pg_end = realloc(sh->pg_end,
|
|
248 sh->npg_end_alloc * sizeof(int));
|
|
249 if (!sh->pg_end)
|
|
250 return -1;
|
|
251 }
|
|
252 sh->pg_end[sh->npg_end++] = npg;
|
|
253
|
|
254 sh->npg++;
|
|
255 }
|
|
256
|
|
257 return 0;
|
|
258 }
|
|
259
|
|
260 /*
|
|
261 * Appends a formatted line to an existing SAM header.
|
|
262 * Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
|
|
263 * optional new-line. If it contains more than 1 line then multiple lines
|
|
264 * will be added in order.
|
|
265 *
|
|
266 * Len is the length of the text data, or 0 if unknown (in which case
|
|
267 * it should be null terminated).
|
|
268 *
|
|
269 * Returns 0 on success
|
|
270 * -1 on failure
|
|
271 */
|
|
272 int sam_hdr_add_lines(SAM_hdr *sh, const char *lines, int len) {
|
|
273 int i, lno = 1, text_offset;
|
|
274 char *hdr;
|
|
275
|
|
276 if (!len)
|
|
277 len = strlen(lines);
|
|
278
|
|
279 text_offset = ks_len(&sh->text);
|
|
280 if (EOF == kputsn(lines, len, &sh->text))
|
|
281 return -1;
|
|
282 hdr = ks_str(&sh->text) + text_offset;
|
|
283
|
|
284 for (i = 0; i < len; i++) {
|
|
285 khint32_t type;
|
|
286 khint_t k;
|
|
287
|
|
288 int l_start = i, new;
|
|
289 SAM_hdr_type *h_type;
|
|
290 SAM_hdr_tag *h_tag, *last;
|
|
291
|
|
292 if (hdr[i] != '@') {
|
|
293 int j;
|
|
294 for (j = i; j < len && hdr[j] != '\n'; j++)
|
|
295 ;
|
|
296 sam_hdr_error("Header line does not start with '@'",
|
|
297 &hdr[l_start], len - l_start, lno);
|
|
298 return -1;
|
|
299 }
|
|
300
|
|
301 type = (hdr[i+1]<<8) | hdr[i+2];
|
|
302 if (hdr[i+1] < 'A' || hdr[i+1] > 'z' ||
|
|
303 hdr[i+2] < 'A' || hdr[i+2] > 'z') {
|
|
304 sam_hdr_error("Header line does not have a two character key",
|
|
305 &hdr[l_start], len - l_start, lno);
|
|
306 return -1;
|
|
307 }
|
|
308
|
|
309 i += 3;
|
|
310 if (hdr[i] == '\n')
|
|
311 continue;
|
|
312
|
|
313 // Add the header line type
|
|
314 if (!(h_type = pool_alloc(sh->type_pool)))
|
|
315 return -1;
|
|
316 if (-1 == (k = kh_put(sam_hdr, sh->h, type, &new)))
|
|
317 return -1;
|
|
318
|
|
319 // Form the ring, either with self or other lines of this type
|
|
320 if (!new) {
|
|
321 SAM_hdr_type *t = kh_val(sh->h, k), *p;
|
|
322 p = t->prev;
|
|
323
|
|
324 assert(p->next = t);
|
|
325 p->next = h_type;
|
|
326 h_type->prev = p;
|
|
327
|
|
328 t->prev = h_type;
|
|
329 h_type->next = t;
|
|
330 h_type->order = p->order+1;
|
|
331 } else {
|
|
332 kh_val(sh->h, k) = h_type;
|
|
333 h_type->prev = h_type->next = h_type;
|
|
334 h_type->order = 0;
|
|
335 }
|
|
336
|
|
337 // Parse the tags on this line
|
|
338 last = NULL;
|
|
339 if ((type>>8) == 'C' && (type&0xff) == 'O') {
|
|
340 int j;
|
|
341 if (hdr[i] != '\t') {
|
|
342 sam_hdr_error("Missing tab",
|
|
343 &hdr[l_start], len - l_start, lno);
|
|
344 return -1;
|
|
345 }
|
|
346
|
|
347 for (j = ++i; j < len && hdr[j] != '\n'; j++)
|
|
348 ;
|
|
349
|
|
350 if (!(h_type->tag = h_tag = pool_alloc(sh->tag_pool)))
|
|
351 return -1;
|
|
352 h_tag->str = string_ndup(sh->str_pool, &hdr[i], j-i);
|
|
353 h_tag->len = j-i;
|
|
354 h_tag->next = NULL;
|
|
355 if (!h_tag->str)
|
|
356 return -1;
|
|
357
|
|
358 i = j;
|
|
359
|
|
360 } else {
|
|
361 do {
|
|
362 int j;
|
|
363 if (hdr[i] != '\t') {
|
|
364 sam_hdr_error("Missing tab",
|
|
365 &hdr[l_start], len - l_start, lno);
|
|
366 return -1;
|
|
367 }
|
|
368
|
|
369 for (j = ++i; j < len && hdr[j] != '\n' && hdr[j] != '\t'; j++)
|
|
370 ;
|
|
371
|
|
372 if (!(h_tag = pool_alloc(sh->tag_pool)))
|
|
373 return -1;
|
|
374 h_tag->str = string_ndup(sh->str_pool, &hdr[i], j-i);
|
|
375 h_tag->len = j-i;
|
|
376 h_tag->next = NULL;
|
|
377 if (!h_tag->str)
|
|
378 return -1;
|
|
379
|
|
380 if (h_tag->len < 3 || h_tag->str[2] != ':') {
|
|
381 sam_hdr_error("Malformed key:value pair",
|
|
382 &hdr[l_start], len - l_start, lno);
|
|
383 return -1;
|
|
384 }
|
|
385
|
|
386 if (last)
|
|
387 last->next = h_tag;
|
|
388 else
|
|
389 h_type->tag = h_tag;
|
|
390
|
|
391 last = h_tag;
|
|
392 i = j;
|
|
393 } while (i < len && hdr[i] != '\n');
|
|
394 }
|
|
395
|
|
396 /* Update RG/SQ hashes */
|
|
397 if (-1 == sam_hdr_update_hashes(sh, type, h_type))
|
|
398 return -1;
|
|
399 }
|
|
400
|
|
401 return 0;
|
|
402 }
|
|
403
|
|
404 /*
|
|
405 * Adds a single line to a SAM header.
|
|
406 * Specify type and one or more key,value pairs, ending with the NULL key.
|
|
407 * Eg. sam_hdr_add(h, "SQ", "ID", "foo", "LN", "100", NULL).
|
|
408 *
|
|
409 * Returns index for specific entry on success (eg 2nd SQ, 4th RG)
|
|
410 * -1 on failure
|
|
411 */
|
|
412 int sam_hdr_add(SAM_hdr *sh, const char *type, ...) {
|
|
413 va_list args;
|
|
414 va_start(args, type);
|
|
415 return sam_hdr_vadd(sh, type, args, NULL);
|
|
416 }
|
|
417
|
|
418 int sam_hdr_vadd(SAM_hdr *sh, const char *type, va_list ap, ...) {
|
|
419 va_list args;
|
|
420 SAM_hdr_type *h_type;
|
|
421 SAM_hdr_tag *h_tag, *last;
|
|
422 int new;
|
|
423 khint32_t type_i = (type[0]<<8) | type[1], k;
|
|
424
|
|
425 #if defined(HAVE_VA_COPY)
|
|
426 va_list ap_local;
|
|
427 #endif
|
|
428
|
|
429 if (EOF == kputc_('@', &sh->text))
|
|
430 return -1;
|
|
431 if (EOF == kputsn(type, 2, &sh->text))
|
|
432 return -1;
|
|
433
|
|
434 if (!(h_type = pool_alloc(sh->type_pool)))
|
|
435 return -1;
|
|
436 if (-1 == (k = kh_put(sam_hdr, sh->h, type_i, &new)))
|
|
437 return -1;
|
|
438 kh_val(sh->h, k) = h_type;
|
|
439
|
|
440 // Form the ring, either with self or other lines of this type
|
|
441 if (!new) {
|
|
442 SAM_hdr_type *t = kh_val(sh->h, k), *p;
|
|
443 p = t->prev;
|
|
444
|
|
445 assert(p->next = t);
|
|
446 p->next = h_type;
|
|
447 h_type->prev = p;
|
|
448
|
|
449 t->prev = h_type;
|
|
450 h_type->next = t;
|
|
451 h_type->order = p->order + 1;
|
|
452 } else {
|
|
453 h_type->prev = h_type->next = h_type;
|
|
454 h_type->order = 0;
|
|
455 }
|
|
456
|
|
457 last = NULL;
|
|
458
|
|
459 // Any ... varargs
|
|
460 va_start(args, ap);
|
|
461 for (;;) {
|
|
462 char *k, *v;
|
|
463 int idx;
|
|
464
|
|
465 if (!(k = (char *)va_arg(args, char *)))
|
|
466 break;
|
|
467 v = va_arg(args, char *);
|
|
468
|
|
469 if (EOF == kputc_('\t', &sh->text))
|
|
470 return -1;
|
|
471
|
|
472 if (!(h_tag = pool_alloc(sh->tag_pool)))
|
|
473 return -1;
|
|
474 idx = ks_len(&sh->text);
|
|
475
|
|
476 if (EOF == kputs(k, &sh->text))
|
|
477 return -1;
|
|
478 if (EOF == kputc_(':', &sh->text))
|
|
479 return -1;
|
|
480 if (EOF == kputs(v, &sh->text))
|
|
481 return -1;
|
|
482
|
|
483 h_tag->len = ks_len(&sh->text) - idx;
|
|
484 h_tag->str = string_ndup(sh->str_pool,
|
|
485 ks_str(&sh->text) + idx,
|
|
486 h_tag->len);
|
|
487 h_tag->next = NULL;
|
|
488 if (!h_tag->str)
|
|
489 return -1;
|
|
490
|
|
491 if (last)
|
|
492 last->next = h_tag;
|
|
493 else
|
|
494 h_type->tag = h_tag;
|
|
495
|
|
496 last = h_tag;
|
|
497 }
|
|
498 va_end(args);
|
|
499
|
|
500 #if defined(HAVE_VA_COPY)
|
|
501 va_copy(ap_local, ap);
|
|
502 # define ap ap_local
|
|
503 #endif
|
|
504
|
|
505 // Plus the specified va_list params
|
|
506 for (;;) {
|
|
507 char *k, *v;
|
|
508 int idx;
|
|
509
|
|
510 if (!(k = (char *)va_arg(ap, char *)))
|
|
511 break;
|
|
512 v = va_arg(ap, char *);
|
|
513
|
|
514 if (EOF == kputc_('\t', &sh->text))
|
|
515 return -1;
|
|
516
|
|
517 if (!(h_tag = pool_alloc(sh->tag_pool)))
|
|
518 return -1;
|
|
519 idx = ks_len(&sh->text);
|
|
520
|
|
521 if (EOF == kputs(k, &sh->text))
|
|
522 return -1;
|
|
523 if (EOF == kputc_(':', &sh->text))
|
|
524 return -1;
|
|
525 if (EOF == kputs(v, &sh->text))
|
|
526 return -1;
|
|
527
|
|
528 h_tag->len = ks_len(&sh->text) - idx;
|
|
529 h_tag->str = string_ndup(sh->str_pool,
|
|
530 ks_str(&sh->text) + idx,
|
|
531 h_tag->len);
|
|
532 h_tag->next = NULL;
|
|
533 if (!h_tag->str)
|
|
534 return -1;
|
|
535
|
|
536 if (last)
|
|
537 last->next = h_tag;
|
|
538 else
|
|
539 h_type->tag = h_tag;
|
|
540
|
|
541 last = h_tag;
|
|
542 }
|
|
543 va_end(ap);
|
|
544
|
|
545 if (EOF == kputc('\n', &sh->text))
|
|
546 return -1;
|
|
547
|
|
548 int itype = (type[0]<<8) | type[1];
|
|
549 if (-1 == sam_hdr_update_hashes(sh, itype, h_type))
|
|
550 return -1;
|
|
551
|
|
552 return h_type->order;
|
|
553 }
|
|
554
|
|
555 /*
|
|
556 * Returns the first header item matching 'type'. If ID is non-NULL it checks
|
|
557 * for the tag ID: and compares against the specified ID.
|
|
558 *
|
|
559 * Returns NULL if no type/ID is found
|
|
560 */
|
|
561 SAM_hdr_type *sam_hdr_find(SAM_hdr *hdr, char *type,
|
|
562 char *ID_key, char *ID_value) {
|
|
563 SAM_hdr_type *t1, *t2;
|
|
564 int itype = (type[0]<<8)|(type[1]);
|
|
565 khint_t k;
|
|
566
|
|
567 /* Special case for types we have prebuilt hashes on */
|
|
568 if (ID_key) {
|
|
569 if (type[0] == 'S' && type[1] == 'Q' &&
|
|
570 ID_key[0] == 'S' && ID_key[1] == 'N') {
|
|
571 k = kh_get(m_s2i, hdr->ref_hash, ID_value);
|
|
572 return k != kh_end(hdr->ref_hash)
|
|
573 ? hdr->ref[kh_val(hdr->ref_hash, k)].ty
|
|
574 : NULL;
|
|
575 }
|
|
576
|
|
577 if (type[0] == 'R' && type[1] == 'G' &&
|
|
578 ID_key[0] == 'I' && ID_key[1] == 'D') {
|
|
579 k = kh_get(m_s2i, hdr->rg_hash, ID_value);
|
|
580 return k != kh_end(hdr->rg_hash)
|
|
581 ? hdr->rg[kh_val(hdr->rg_hash, k)].ty
|
|
582 : NULL;
|
|
583 }
|
|
584
|
|
585 if (type[0] == 'P' && type[1] == 'G' &&
|
|
586 ID_key[0] == 'I' && ID_key[1] == 'D') {
|
|
587 k = kh_get(m_s2i, hdr->pg_hash, ID_value);
|
|
588 return k != kh_end(hdr->pg_hash)
|
|
589 ? hdr->pg[kh_val(hdr->pg_hash, k)].ty
|
|
590 : NULL;
|
|
591 }
|
|
592 }
|
|
593
|
|
594 k = kh_get(sam_hdr, hdr->h, itype);
|
|
595 if (k == kh_end(hdr->h))
|
|
596 return NULL;
|
|
597
|
|
598 if (!ID_key)
|
|
599 return kh_val(hdr->h, k);
|
|
600
|
|
601 t1 = t2 = kh_val(hdr->h, k);
|
|
602 do {
|
|
603 SAM_hdr_tag *tag;
|
|
604 for (tag = t1->tag; tag; tag = tag->next) {
|
|
605 if (tag->str[0] == ID_key[0] && tag->str[1] == ID_key[1]) {
|
|
606 char *cp1 = tag->str+3;
|
|
607 char *cp2 = ID_value;
|
|
608 while (*cp1 && *cp1 == *cp2)
|
|
609 cp1++, cp2++;
|
|
610 if (*cp2 || *cp1)
|
|
611 continue;
|
|
612 return t1;
|
|
613 }
|
|
614 }
|
|
615 t1 = t1->next;
|
|
616 } while (t1 != t2);
|
|
617
|
|
618 return NULL;
|
|
619 }
|
|
620
|
|
621 /*
|
|
622 * As per SAM_hdr_type, but returns a complete line of formatted text
|
|
623 * for a specific head type/ID combination. If ID is NULL then it returns
|
|
624 * the first line of the specified type.
|
|
625 *
|
|
626 * The returned string is malloced and should be freed by the calling
|
|
627 * function with free().
|
|
628 *
|
|
629 * Returns NULL if no type/ID is found.
|
|
630 */
|
|
631 char *sam_hdr_find_line(SAM_hdr *hdr, char *type,
|
|
632 char *ID_key, char *ID_value) {
|
|
633 SAM_hdr_type *ty = sam_hdr_find(hdr, type, ID_key, ID_value);
|
|
634 kstring_t ks = KS_INITIALIZER;
|
|
635 SAM_hdr_tag *tag;
|
|
636 int r = 0;
|
|
637
|
|
638 if (!ty)
|
|
639 return NULL;
|
|
640
|
|
641 // Paste together the line from the hashed copy
|
|
642 r |= (kputc_('@', &ks) == EOF);
|
|
643 r |= (kputs(type, &ks) == EOF);
|
|
644 for (tag = ty->tag; tag; tag = tag->next) {
|
|
645 r |= (kputc_('\t', &ks) == EOF);
|
|
646 r |= (kputsn(tag->str, tag->len, &ks) == EOF);
|
|
647 }
|
|
648
|
|
649 if (r) {
|
|
650 KS_FREE(&ks);
|
|
651 return NULL;
|
|
652 }
|
|
653
|
|
654 return ks_str(&ks);
|
|
655 }
|
|
656
|
|
657
|
|
658 /*
|
|
659 * Looks for a specific key in a single sam header line.
|
|
660 * If prev is non-NULL it also fills this out with the previous tag, to
|
|
661 * permit use in key removal. *prev is set to NULL when the tag is the first
|
|
662 * key in the list. When a tag isn't found, prev (if non NULL) will be the last
|
|
663 * tag in the existing list.
|
|
664 *
|
|
665 * Returns the tag pointer on success
|
|
666 * NULL on failure
|
|
667 */
|
|
668 SAM_hdr_tag *sam_hdr_find_key(SAM_hdr *sh,
|
|
669 SAM_hdr_type *type,
|
|
670 char *key,
|
|
671 SAM_hdr_tag **prev) {
|
|
672 SAM_hdr_tag *tag, *p = NULL;
|
|
673
|
|
674 for (tag = type->tag; tag; p = tag, tag = tag->next) {
|
|
675 if (tag->str[0] == key[0] && tag->str[1] == key[1]) {
|
|
676 if (prev)
|
|
677 *prev = p;
|
|
678 return tag;
|
|
679 }
|
|
680 }
|
|
681
|
|
682 if (prev)
|
|
683 *prev = p;
|
|
684
|
|
685 return NULL;
|
|
686 }
|
|
687
|
|
688
|
|
689 /*
|
|
690 * Adds or updates tag key,value pairs in a header line.
|
|
691 * Eg for adding M5 tags to @SQ lines or updating sort order for the
|
|
692 * @HD line (although use the sam_hdr_sort_order() function for
|
|
693 * HD manipulation, which is a wrapper around this funuction).
|
|
694 *
|
|
695 * Specify multiple key,value pairs ending in NULL.
|
|
696 *
|
|
697 * Returns 0 on success
|
|
698 * -1 on failure
|
|
699 */
|
|
700 int sam_hdr_update(SAM_hdr *hdr, SAM_hdr_type *type, ...) {
|
|
701 va_list ap;
|
|
702
|
|
703 va_start(ap, type);
|
|
704
|
|
705 for (;;) {
|
|
706 char *k, *v;
|
|
707 int idx;
|
|
708 SAM_hdr_tag *tag, *prev;
|
|
709
|
|
710 if (!(k = (char *)va_arg(ap, char *)))
|
|
711 break;
|
|
712 v = va_arg(ap, char *);
|
|
713
|
|
714 tag = sam_hdr_find_key(hdr, type, k, &prev);
|
|
715 if (!tag) {
|
|
716 if (!(tag = pool_alloc(hdr->tag_pool)))
|
|
717 return -1;
|
|
718 if (prev)
|
|
719 prev->next = tag;
|
|
720 else
|
|
721 type->tag = tag;
|
|
722
|
|
723 tag->next = NULL;
|
|
724 }
|
|
725
|
|
726 idx = ks_len(&hdr->text);
|
|
727 if (ksprintf(&hdr->text, "%2.2s:%s", k, v) < 0)
|
|
728 return -1;
|
|
729 tag->len = ks_len(&hdr->text) - idx;
|
|
730 tag->str = string_ndup(hdr->str_pool,
|
|
731 ks_str(&hdr->text) + idx,
|
|
732 tag->len);
|
|
733 if (!tag->str)
|
|
734 return -1;
|
|
735 }
|
|
736
|
|
737 va_end(ap);
|
|
738
|
|
739 return 0;
|
|
740 }
|
|
741
|
|
742 #define K(a) (((a)[0]<<8)|((a)[1]))
|
|
743
|
|
744 /*
|
|
745 * Reconstructs the kstring from the header hash table.
|
|
746 * Returns 0 on success
|
|
747 * -1 on failure
|
|
748 */
|
|
749 int sam_hdr_rebuild(SAM_hdr *hdr) {
|
|
750 /* Order: HD then others */
|
|
751 kstring_t ks = KS_INITIALIZER;
|
|
752 khint_t k;
|
|
753
|
|
754
|
|
755 k = kh_get(sam_hdr, hdr->h, K("HD"));
|
|
756 if (k != kh_end(hdr->h)) {
|
|
757 SAM_hdr_type *ty = kh_val(hdr->h, k);
|
|
758 SAM_hdr_tag *tag;
|
|
759 if (EOF == kputs("@HD", &ks))
|
|
760 return -1;
|
|
761 for (tag = ty->tag; tag; tag = tag->next) {
|
|
762 if (EOF == kputc_('\t', &ks))
|
|
763 return -1;
|
|
764 if (EOF == kputsn_(tag->str, tag->len, &ks))
|
|
765 return -1;
|
|
766 }
|
|
767 if (EOF == kputc('\n', &ks))
|
|
768 return -1;
|
|
769 }
|
|
770
|
|
771 for (k = kh_begin(hdr->h); k != kh_end(hdr->h); k++) {
|
|
772 SAM_hdr_type *t1, *t2;
|
|
773
|
|
774 if (!kh_exist(hdr->h, k))
|
|
775 continue;
|
|
776
|
|
777 if (kh_key(hdr->h, k) == K("HD"))
|
|
778 continue;
|
|
779
|
|
780 t1 = t2 = kh_val(hdr->h, k);
|
|
781 do {
|
|
782 SAM_hdr_tag *tag;
|
|
783 char c[2];
|
|
784
|
|
785 if (EOF == kputc_('@', &ks))
|
|
786 return -1;
|
|
787 c[0] = kh_key(hdr->h, k)>>8;
|
|
788 c[1] = kh_key(hdr->h, k)&0xff;
|
|
789 if (EOF == kputsn_(c, 2, &ks))
|
|
790 return -1;
|
|
791 for (tag = t1->tag; tag; tag=tag->next) {
|
|
792 if (EOF == kputc_('\t', &ks))
|
|
793 return -1;
|
|
794 if (EOF == kputsn_(tag->str, tag->len, &ks))
|
|
795 return -1;
|
|
796 }
|
|
797 if (EOF == kputc('\n', &ks))
|
|
798 return -1;
|
|
799 t1 = t1->next;
|
|
800 } while (t1 != t2);
|
|
801 }
|
|
802
|
|
803 if (ks_str(&hdr->text))
|
|
804 KS_FREE(&hdr->text);
|
|
805
|
|
806 hdr->text = ks;
|
|
807
|
|
808 return 0;
|
|
809 }
|
|
810
|
|
811
|
|
812 /*
|
|
813 * Creates an empty SAM header, ready to be populated.
|
|
814 *
|
|
815 * Returns a SAM_hdr struct on success (free with sam_hdr_free())
|
|
816 * NULL on failure
|
|
817 */
|
|
818 SAM_hdr *sam_hdr_new() {
|
|
819 SAM_hdr *sh = calloc(1, sizeof(*sh));
|
|
820
|
|
821 if (!sh)
|
|
822 return NULL;
|
|
823
|
|
824 sh->h = kh_init(sam_hdr);
|
|
825 if (!sh->h)
|
|
826 goto err;
|
|
827
|
|
828 sh->ID_cnt = 1;
|
|
829 sh->ref_count = 1;
|
|
830
|
|
831 sh->nref = 0;
|
|
832 sh->ref = NULL;
|
|
833 if (!(sh->ref_hash = kh_init(m_s2i)))
|
|
834 goto err;
|
|
835
|
|
836 sh->nrg = 0;
|
|
837 sh->rg = NULL;
|
|
838 if (!(sh->rg_hash = kh_init(m_s2i)))
|
|
839 goto err;
|
|
840
|
|
841 sh->npg = 0;
|
|
842 sh->pg = NULL;
|
|
843 sh->npg_end = sh->npg_end_alloc = 0;
|
|
844 sh->pg_end = NULL;
|
|
845 if (!(sh->pg_hash = kh_init(m_s2i)))
|
|
846 goto err;
|
|
847
|
|
848 KS_INIT(&sh->text);
|
|
849
|
|
850 if (!(sh->tag_pool = pool_create(sizeof(SAM_hdr_tag))))
|
|
851 goto err;
|
|
852
|
|
853 if (!(sh->type_pool = pool_create(sizeof(SAM_hdr_type))))
|
|
854 goto err;
|
|
855
|
|
856 if (!(sh->str_pool = string_pool_create(8192)))
|
|
857 goto err;
|
|
858
|
|
859 return sh;
|
|
860
|
|
861 err:
|
|
862 if (sh->h)
|
|
863 kh_destroy(sam_hdr, sh->h);
|
|
864
|
|
865 if (sh->tag_pool)
|
|
866 pool_destroy(sh->tag_pool);
|
|
867
|
|
868 if (sh->type_pool)
|
|
869 pool_destroy(sh->type_pool);
|
|
870
|
|
871 if (sh->str_pool)
|
|
872 string_pool_destroy(sh->str_pool);
|
|
873
|
|
874 free(sh);
|
|
875
|
|
876 return NULL;
|
|
877 }
|
|
878
|
|
879
|
|
880 /*
|
|
881 * Tokenises a SAM header into a hash table.
|
|
882 * Also extracts a few bits on specific data types, such as @RG lines.
|
|
883 *
|
|
884 * Returns a SAM_hdr struct on success (free with sam_hdr_free())
|
|
885 * NULL on failure
|
|
886 */
|
|
887 SAM_hdr *sam_hdr_parse_(const char *hdr, int len) {
|
|
888 /* Make an empty SAM_hdr */
|
|
889 SAM_hdr *sh;
|
|
890
|
|
891 sh = sam_hdr_new();
|
|
892 if (NULL == sh) return NULL;
|
|
893
|
|
894 if (NULL == hdr) return sh; // empty header is permitted
|
|
895
|
|
896 /* Parse the header, line by line */
|
|
897 if (-1 == sam_hdr_add_lines(sh, hdr, len)) {
|
|
898 sam_hdr_free(sh);
|
|
899 return NULL;
|
|
900 }
|
|
901
|
|
902 //sam_hdr_dump(sh);
|
|
903 //sam_hdr_add(sh, "RG", "ID", "foo", "SM", "bar", NULL);
|
|
904 //sam_hdr_rebuild(sh);
|
|
905 //printf(">>%s<<", ks_str(sh->text));
|
|
906
|
|
907 //parse_references(sh);
|
|
908 //parse_read_groups(sh);
|
|
909
|
|
910 sam_hdr_link_pg(sh);
|
|
911 //sam_hdr_dump(sh);
|
|
912
|
|
913 return sh;
|
|
914 }
|
|
915
|
|
916 /*
|
|
917 * Produces a duplicate copy of hdr and returns it.
|
|
918 * Returns NULL on failure
|
|
919 */
|
|
920 SAM_hdr *sam_hdr_dup(SAM_hdr *hdr) {
|
|
921 if (-1 == sam_hdr_rebuild(hdr))
|
|
922 return NULL;
|
|
923
|
|
924 return sam_hdr_parse_(sam_hdr_str(hdr), sam_hdr_length(hdr));
|
|
925 }
|
|
926
|
|
927 /*! Increments a reference count on hdr.
|
|
928 *
|
|
929 * This permits multiple files to share the same header, all calling
|
|
930 * sam_hdr_free when done, without causing errors for other open files.
|
|
931 */
|
|
932 void sam_hdr_incr_ref(SAM_hdr *hdr) {
|
|
933 hdr->ref_count++;
|
|
934 }
|
|
935
|
|
936 /*! Increments a reference count on hdr.
|
|
937 *
|
|
938 * This permits multiple files to share the same header, all calling
|
|
939 * sam_hdr_free when done, without causing errors for other open files.
|
|
940 *
|
|
941 * If the reference count hits zero then the header is automatically
|
|
942 * freed. This makes it a synonym for sam_hdr_free().
|
|
943 */
|
|
944 void sam_hdr_decr_ref(SAM_hdr *hdr) {
|
|
945 sam_hdr_free(hdr);
|
|
946 }
|
|
947
|
|
948 /*! Deallocates all storage used by a SAM_hdr struct.
|
|
949 *
|
|
950 * This also decrements the header reference count. If after decrementing
|
|
951 * it is still non-zero then the header is assumed to be in use by another
|
|
952 * caller and the free is not done.
|
|
953 *
|
|
954 * This is a synonym for sam_hdr_dec_ref().
|
|
955 */
|
|
956 void sam_hdr_free(SAM_hdr *hdr) {
|
|
957 if (!hdr)
|
|
958 return;
|
|
959
|
|
960 if (--hdr->ref_count > 0)
|
|
961 return;
|
|
962
|
|
963 if (ks_str(&hdr->text))
|
|
964 KS_FREE(&hdr->text);
|
|
965
|
|
966 if (hdr->h)
|
|
967 kh_destroy(sam_hdr, hdr->h);
|
|
968
|
|
969 if (hdr->ref_hash)
|
|
970 kh_destroy(m_s2i, hdr->ref_hash);
|
|
971
|
|
972 if (hdr->ref) {
|
|
973 int i;
|
|
974 for (i = 0; i < hdr->nref; i++)
|
|
975 if (hdr->ref[i].name)
|
|
976 free(hdr->ref[i].name);
|
|
977 free(hdr->ref);
|
|
978 }
|
|
979
|
|
980 if (hdr->rg_hash)
|
|
981 kh_destroy(m_s2i, hdr->rg_hash);
|
|
982
|
|
983 if (hdr->rg) {
|
|
984 int i;
|
|
985 for (i = 0; i < hdr->nrg; i++)
|
|
986 if (hdr->rg[i].name)
|
|
987 free(hdr->rg[i].name);
|
|
988 free(hdr->rg);
|
|
989 }
|
|
990
|
|
991 if (hdr->pg_hash)
|
|
992 kh_destroy(m_s2i, hdr->pg_hash);
|
|
993
|
|
994 if (hdr->pg) {
|
|
995 int i;
|
|
996 for (i = 0; i < hdr->npg; i++)
|
|
997 if (hdr->pg[i].name)
|
|
998 free(hdr->pg[i].name);
|
|
999 free(hdr->pg);
|
|
1000 }
|
|
1001
|
|
1002 if (hdr->pg_end)
|
|
1003 free(hdr->pg_end);
|
|
1004
|
|
1005 if (hdr->type_pool)
|
|
1006 pool_destroy(hdr->type_pool);
|
|
1007
|
|
1008 if (hdr->tag_pool)
|
|
1009 pool_destroy(hdr->tag_pool);
|
|
1010
|
|
1011 if (hdr->str_pool)
|
|
1012 string_pool_destroy(hdr->str_pool);
|
|
1013
|
|
1014 free(hdr);
|
|
1015 }
|
|
1016
|
|
1017 int sam_hdr_length(SAM_hdr *hdr) {
|
|
1018 return ks_len(&hdr->text);
|
|
1019 }
|
|
1020
|
|
1021 char *sam_hdr_str(SAM_hdr *hdr) {
|
|
1022 return ks_str(&hdr->text);
|
|
1023 }
|
|
1024
|
|
1025 /*
|
|
1026 * Looks up a reference sequence by name and returns the numerical ID.
|
|
1027 * Returns -1 if unknown reference.
|
|
1028 */
|
|
1029 int sam_hdr_name2ref(SAM_hdr *hdr, const char *ref) {
|
|
1030 khint_t k = kh_get(m_s2i, hdr->ref_hash, ref);
|
|
1031 return k == kh_end(hdr->ref_hash) ? -1 : kh_val(hdr->ref_hash, k);
|
|
1032 }
|
|
1033
|
|
1034 /*
|
|
1035 * Looks up a read-group by name and returns a pointer to the start of the
|
|
1036 * associated tag list.
|
|
1037 *
|
|
1038 * Returns NULL on failure
|
|
1039 */
|
|
1040 SAM_RG *sam_hdr_find_rg(SAM_hdr *hdr, const char *rg) {
|
|
1041 khint_t k = kh_get(m_s2i, hdr->rg_hash, rg);
|
|
1042 return k == kh_end(hdr->rg_hash)
|
|
1043 ? NULL
|
|
1044 : &hdr->rg[kh_val(hdr->rg_hash, k)];
|
|
1045 }
|
|
1046
|
|
1047
|
|
1048 /*
|
|
1049 * Fixes any PP links in @PG headers.
|
|
1050 * If the entries are in order then this doesn't need doing, but incase
|
|
1051 * our header is out of order this goes through the sh->pg[] array
|
|
1052 * setting the prev_id field.
|
|
1053 *
|
|
1054 * Note we can have multiple complete chains. This code should identify the
|
|
1055 * tails of these chains as these are the entries we have to link to in
|
|
1056 * subsequent PP records.
|
|
1057 *
|
|
1058 * Returns 0 on sucess
|
|
1059 * -1 on failure (indicating broken PG/PP records)
|
|
1060 */
|
|
1061 int sam_hdr_link_pg(SAM_hdr *hdr) {
|
|
1062 int i, j, ret = 0;
|
|
1063
|
|
1064 hdr->npg_end_alloc = hdr->npg;
|
|
1065 hdr->pg_end = realloc(hdr->pg_end, hdr->npg * sizeof(*hdr->pg_end));
|
|
1066 if (!hdr->pg_end)
|
|
1067 return -1;
|
|
1068
|
|
1069 for (i = 0; i < hdr->npg; i++)
|
|
1070 hdr->pg_end[i] = i;
|
|
1071
|
|
1072 for (i = 0; i < hdr->npg; i++) {
|
|
1073 khint_t k;
|
|
1074 SAM_hdr_tag *tag;
|
|
1075 char tmp;
|
|
1076
|
|
1077 for (tag = hdr->pg[i].tag; tag; tag = tag->next) {
|
|
1078 if (tag->str[0] == 'P' && tag->str[1] == 'P')
|
|
1079 break;
|
|
1080 }
|
|
1081 if (!tag) {
|
|
1082 /* Chain start points */
|
|
1083 continue;
|
|
1084 }
|
|
1085
|
|
1086 tmp = tag->str[tag->len]; tag->str[tag->len] = 0;
|
|
1087 k = kh_get(m_s2i, hdr->pg_hash, tag->str+3);
|
|
1088 tag->str[tag->len] = tmp;
|
|
1089
|
|
1090 if (k == kh_end(hdr->pg_hash)) {
|
|
1091 ret = -1;
|
|
1092 continue;
|
|
1093 }
|
|
1094
|
|
1095 hdr->pg[i].prev_id = hdr->pg[kh_val(hdr->pg_hash, k)].id;
|
|
1096 hdr->pg_end[kh_val(hdr->pg_hash, k)] = -1;
|
|
1097 }
|
|
1098
|
|
1099 for (i = j = 0; i < hdr->npg; i++) {
|
|
1100 if (hdr->pg_end[i] != -1)
|
|
1101 hdr->pg_end[j++] = hdr->pg_end[i];
|
|
1102 }
|
|
1103 hdr->npg_end = j;
|
|
1104
|
|
1105 return ret;
|
|
1106 }
|
|
1107
|
|
1108 /*
|
|
1109 * Returns a unique ID from a base name.
|
|
1110 *
|
|
1111 * The value returned is valid until the next call to
|
|
1112 * this function.
|
|
1113 */
|
|
1114 const char *sam_hdr_PG_ID(SAM_hdr *sh, const char *name) {
|
|
1115 khint_t k = kh_get(m_s2i, sh->pg_hash, name);
|
|
1116 if (k == kh_end(sh->pg_hash))
|
|
1117 return name;
|
|
1118
|
|
1119 do {
|
|
1120 sprintf(sh->ID_buf, "%.1000s.%d", name, sh->ID_cnt++);
|
|
1121 k = kh_get(m_s2i, sh->pg_hash, sh->ID_buf);
|
|
1122 } while (k == kh_end(sh->pg_hash));
|
|
1123
|
|
1124 return sh->ID_buf;
|
|
1125 }
|
|
1126
|
|
1127 /*
|
|
1128 * Add an @PG line.
|
|
1129 *
|
|
1130 * If we wish complete control over this use sam_hdr_add() directly. This
|
|
1131 * function uses that, but attempts to do a lot of tedious house work for
|
|
1132 * you too.
|
|
1133 *
|
|
1134 * - It will generate a suitable ID if the supplied one clashes.
|
|
1135 * - It will generate multiple @PG records if we have multiple PG chains.
|
|
1136 *
|
|
1137 * Call it as per sam_hdr_add() with a series of key,value pairs ending
|
|
1138 * in NULL.
|
|
1139 *
|
|
1140 * Returns 0 on success
|
|
1141 * -1 on failure
|
|
1142 */
|
|
1143 int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...) {
|
|
1144 va_list args;
|
|
1145 va_start(args, name);
|
|
1146
|
|
1147 if (sh->npg_end) {
|
|
1148 /* Copy ends array to avoid us looping while modifying it */
|
|
1149 int *end = malloc(sh->npg_end * sizeof(int));
|
|
1150 int i, nends = sh->npg_end;
|
|
1151
|
|
1152 if (!end)
|
|
1153 return -1;
|
|
1154
|
|
1155 memcpy(end, sh->pg_end, nends * sizeof(*end));
|
|
1156
|
|
1157 for (i = 0; i < nends; i++) {
|
|
1158 if (-1 == sam_hdr_vadd(sh, "PG", args,
|
|
1159 "ID", sam_hdr_PG_ID(sh, name),
|
|
1160 "PN", name,
|
|
1161 "PP", sh->pg[end[i]].name,
|
|
1162 NULL)) {
|
|
1163 free(end);
|
|
1164 return -1;
|
|
1165 }
|
|
1166 }
|
|
1167
|
|
1168 free(end);
|
|
1169 } else {
|
|
1170 if (-1 == sam_hdr_vadd(sh, "PG", args,
|
|
1171 "ID", sam_hdr_PG_ID(sh, name),
|
|
1172 "PN", name,
|
|
1173 NULL))
|
|
1174 return -1;
|
|
1175 }
|
|
1176
|
|
1177 //sam_hdr_dump(sh);
|
|
1178
|
|
1179 return 0;
|
|
1180 }
|
|
1181
|
|
1182 /*
|
|
1183 * A function to help with construction of CL tags in @PG records.
|
|
1184 * Takes an argc, argv pair and returns a single space-separated string.
|
|
1185 * This string should be deallocated by the calling function.
|
|
1186 *
|
|
1187 * Returns malloced char * on success
|
|
1188 * NULL on failure
|
|
1189 */
|
|
1190 char *stringify_argv(int argc, char *argv[]) {
|
|
1191 char *str, *cp;
|
|
1192 size_t nbytes = 1;
|
|
1193 int i, j;
|
|
1194
|
|
1195 /* Allocate */
|
|
1196 for (i = 0; i < argc; i++) {
|
|
1197 nbytes += strlen(argv[i]) + 1;
|
|
1198 }
|
|
1199 if (!(str = malloc(nbytes)))
|
|
1200 return NULL;
|
|
1201
|
|
1202 /* Copy */
|
|
1203 cp = str;
|
|
1204 for (i = 0; i < argc; i++) {
|
|
1205 j = 0;
|
|
1206 while (argv[i][j]) {
|
|
1207 if (argv[i][j] == '\t')
|
|
1208 *cp++ = ' ';
|
|
1209 else
|
|
1210 *cp++ = argv[i][j];
|
|
1211 j++;
|
|
1212 }
|
|
1213 *cp++ = ' ';
|
|
1214 }
|
|
1215 *cp++ = 0;
|
|
1216
|
|
1217 return str;
|
|
1218 }
|