Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/sample.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #include <stdlib.h> | |
2 #include <string.h> | |
3 #include "sample.h" | |
4 #include "khash.h" | |
5 KHASH_MAP_INIT_STR(sm, int) | |
6 | |
7 bam_sample_t *bam_smpl_init(void) | |
8 { | |
9 bam_sample_t *s; | |
10 s = calloc(1, sizeof(bam_sample_t)); | |
11 s->rg2smid = kh_init(sm); | |
12 s->sm2id = kh_init(sm); | |
13 return s; | |
14 } | |
15 | |
16 void bam_smpl_destroy(bam_sample_t *sm) | |
17 { | |
18 int i; | |
19 khint_t k; | |
20 khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; | |
21 if (sm == 0) return; | |
22 for (i = 0; i < sm->n; ++i) free(sm->smpl[i]); | |
23 free(sm->smpl); | |
24 for (k = kh_begin(rg2smid); k != kh_end(rg2smid); ++k) | |
25 if (kh_exist(rg2smid, k)) free((char*)kh_key(rg2smid, k)); | |
26 kh_destroy(sm, sm->rg2smid); | |
27 kh_destroy(sm, sm->sm2id); | |
28 free(sm); | |
29 } | |
30 | |
31 static void add_pair(bam_sample_t *sm, khash_t(sm) *sm2id, const char *key, const char *val) | |
32 { | |
33 khint_t k_rg, k_sm; | |
34 int ret; | |
35 khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; | |
36 k_rg = kh_get(sm, rg2smid, key); | |
37 if (k_rg != kh_end(rg2smid)) return; // duplicated @RG-ID | |
38 k_rg = kh_put(sm, rg2smid, strdup(key), &ret); | |
39 k_sm = kh_get(sm, sm2id, val); | |
40 if (k_sm == kh_end(sm2id)) { // absent | |
41 if (sm->n == sm->m) { | |
42 sm->m = sm->m? sm->m<<1 : 1; | |
43 sm->smpl = realloc(sm->smpl, sizeof(void*) * sm->m); | |
44 } | |
45 sm->smpl[sm->n] = strdup(val); | |
46 k_sm = kh_put(sm, sm2id, sm->smpl[sm->n], &ret); | |
47 kh_val(sm2id, k_sm) = sm->n++; | |
48 } | |
49 kh_val(rg2smid, k_rg) = kh_val(sm2id, k_sm); | |
50 } | |
51 | |
52 int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) | |
53 { | |
54 const char *p = txt, *q, *r; | |
55 kstring_t buf, first_sm; | |
56 int n = 0; | |
57 khash_t(sm) *sm2id = (khash_t(sm)*)sm->sm2id; | |
58 if (txt == 0) { | |
59 add_pair(sm, sm2id, fn, fn); | |
60 return 0; | |
61 } | |
62 memset(&buf, 0, sizeof(kstring_t)); | |
63 memset(&first_sm, 0, sizeof(kstring_t)); | |
64 while ((q = strstr(p, "@RG")) != 0) { | |
65 p = q + 3; | |
66 r = q = 0; | |
67 if ((q = strstr(p, "\tID:")) != 0) q += 4; | |
68 if ((r = strstr(p, "\tSM:")) != 0) r += 4; | |
69 if (r && q) { | |
70 char *u, *v; | |
71 int oq, or; | |
72 for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u); | |
73 for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v); | |
74 oq = *u; or = *v; *u = *v = '\0'; | |
75 buf.l = 0; kputs(fn, &buf); kputc('/', &buf); kputs(q, &buf); | |
76 add_pair(sm, sm2id, buf.s, r); | |
77 if ( !first_sm.s ) | |
78 kputs(r,&first_sm); | |
79 *u = oq; *v = or; | |
80 } else break; | |
81 p = q > r? q : r; | |
82 ++n; | |
83 } | |
84 if (n == 0) add_pair(sm, sm2id, fn, fn); | |
85 // If there is only one RG tag present in the header and reads are not annotated, don't refuse to work but | |
86 // use the tag instead. | |
87 else if ( n==1 && first_sm.s ) | |
88 add_pair(sm,sm2id,fn,first_sm.s); | |
89 if ( first_sm.s ) | |
90 free(first_sm.s); | |
91 | |
92 // add_pair(sm, sm2id, fn, fn); | |
93 free(buf.s); | |
94 return 0; | |
95 } | |
96 | |
97 int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str) | |
98 { | |
99 khint_t k; | |
100 khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; | |
101 if (rg) { | |
102 str->l = 0; | |
103 kputs(fn, str); kputc('/', str); kputs(rg, str); | |
104 k = kh_get(sm, rg2smid, str->s); | |
105 } else k = kh_get(sm, rg2smid, fn); | |
106 return k == kh_end(rg2smid)? -1 : kh_val(rg2smid, k); | |
107 } |