| 0 | 1 #include <stdlib.h> | 
|  | 2 #include <string.h> | 
|  | 3 #include "sample.h" | 
|  | 4 #include "khash.h" | 
|  | 5 KHASH_MAP_INIT_STR(sm, int) | 
|  | 6 | 
|  | 7 bam_sample_t *bam_smpl_init(void) | 
|  | 8 { | 
|  | 9 	bam_sample_t *s; | 
|  | 10 	s = calloc(1, sizeof(bam_sample_t)); | 
|  | 11 	s->rg2smid = kh_init(sm); | 
|  | 12 	s->sm2id = kh_init(sm); | 
|  | 13 	return s; | 
|  | 14 } | 
|  | 15 | 
|  | 16 void bam_smpl_destroy(bam_sample_t *sm) | 
|  | 17 { | 
|  | 18 	int i; | 
|  | 19 	khint_t k; | 
|  | 20 	khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; | 
|  | 21 	if (sm == 0) return; | 
|  | 22 	for (i = 0; i < sm->n; ++i) free(sm->smpl[i]); | 
|  | 23 	free(sm->smpl); | 
|  | 24 	for (k = kh_begin(rg2smid); k != kh_end(rg2smid); ++k) | 
|  | 25 		if (kh_exist(rg2smid, k)) free((char*)kh_key(rg2smid, k)); | 
|  | 26 	kh_destroy(sm, sm->rg2smid); | 
|  | 27 	kh_destroy(sm, sm->sm2id); | 
|  | 28 	free(sm); | 
|  | 29 } | 
|  | 30 | 
|  | 31 static void add_pair(bam_sample_t *sm, khash_t(sm) *sm2id, const char *key, const char *val) | 
|  | 32 { | 
|  | 33 	khint_t k_rg, k_sm; | 
|  | 34 	int ret; | 
|  | 35 	khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; | 
|  | 36 	k_rg = kh_get(sm, rg2smid, key); | 
|  | 37 	if (k_rg != kh_end(rg2smid)) return; // duplicated @RG-ID | 
|  | 38 	k_rg = kh_put(sm, rg2smid, strdup(key), &ret); | 
|  | 39 	k_sm = kh_get(sm, sm2id, val); | 
|  | 40 	if (k_sm == kh_end(sm2id)) { // absent | 
|  | 41 		if (sm->n == sm->m) { | 
|  | 42 			sm->m = sm->m? sm->m<<1 : 1; | 
|  | 43 			sm->smpl = realloc(sm->smpl, sizeof(void*) * sm->m); | 
|  | 44 		} | 
|  | 45 		sm->smpl[sm->n] = strdup(val); | 
|  | 46 		k_sm = kh_put(sm, sm2id, sm->smpl[sm->n], &ret); | 
|  | 47 		kh_val(sm2id, k_sm) = sm->n++; | 
|  | 48 	} | 
|  | 49 	kh_val(rg2smid, k_rg) = kh_val(sm2id, k_sm); | 
|  | 50 } | 
|  | 51 | 
|  | 52 int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) | 
|  | 53 { | 
|  | 54 	const char *p = txt, *q, *r; | 
|  | 55 	kstring_t buf; | 
|  | 56 	int n = 0; | 
|  | 57 	khash_t(sm) *sm2id = (khash_t(sm)*)sm->sm2id; | 
|  | 58 	memset(&buf, 0, sizeof(kstring_t)); | 
|  | 59 	while ((q = strstr(p, "@RG")) != 0) { | 
|  | 60 		p = q + 3; | 
|  | 61 		r = q = 0; | 
|  | 62 		if ((q = strstr(p, "\tID:")) != 0) q += 4; | 
|  | 63 		if ((r = strstr(p, "\tSM:")) != 0) r += 4; | 
|  | 64 		if (r && q) { | 
|  | 65 			char *u, *v; | 
|  | 66 			int oq, or; | 
|  | 67 			for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u); | 
|  | 68 			for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v); | 
|  | 69 			oq = *u; or = *v; *u = *v = '\0'; | 
|  | 70 			buf.l = 0; kputs(fn, &buf); kputc('/', &buf); kputs(q, &buf); | 
|  | 71 			add_pair(sm, sm2id, buf.s, r); | 
|  | 72 			*u = oq; *v = or; | 
|  | 73 		} else break; | 
|  | 74 		p = q > r? q : r; | 
|  | 75 		++n; | 
|  | 76 	} | 
|  | 77 	if (n == 0) add_pair(sm, sm2id, fn, fn); | 
|  | 78 //	add_pair(sm, sm2id, fn, fn); | 
|  | 79 	free(buf.s); | 
|  | 80 	return 0; | 
|  | 81 } | 
|  | 82 | 
|  | 83 int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str) | 
|  | 84 { | 
|  | 85 	khint_t k; | 
|  | 86 	khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; | 
|  | 87 	if (rg) { | 
|  | 88 		str->l = 0; | 
|  | 89 		kputs(fn, str); kputc('/', str); kputs(rg, str); | 
|  | 90 		k = kh_get(sm, rg2smid, str->s); | 
|  | 91 	} else k = kh_get(sm, rg2smid, fn); | 
|  | 92 	return k == kh_end(rg2smid)? -1 : kh_val(rg2smid, k); | 
|  | 93 } |