Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/sam.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #include <string.h> | |
2 #include <unistd.h> | |
3 #include "faidx.h" | |
4 #include "sam.h" | |
5 | |
6 #define TYPE_BAM 1 | |
7 #define TYPE_READ 2 | |
8 | |
9 bam_header_t *bam_header_dup(const bam_header_t *h0) | |
10 { | |
11 bam_header_t *h; | |
12 int i; | |
13 h = bam_header_init(); | |
14 *h = *h0; | |
15 h->hash = h->dict = h->rg2lib = 0; | |
16 h->text = (char*)calloc(h->l_text + 1, 1); | |
17 memcpy(h->text, h0->text, h->l_text); | |
18 h->target_len = (uint32_t*)calloc(h->n_targets, 4); | |
19 h->target_name = (char**)calloc(h->n_targets, sizeof(void*)); | |
20 for (i = 0; i < h->n_targets; ++i) { | |
21 h->target_len[i] = h0->target_len[i]; | |
22 h->target_name[i] = strdup(h0->target_name[i]); | |
23 } | |
24 return h; | |
25 } | |
26 static void append_header_text(bam_header_t *header, char* text, int len) | |
27 { | |
28 int x = header->l_text + 1; | |
29 int y = header->l_text + len + 1; // 1 byte null | |
30 if (text == 0) return; | |
31 kroundup32(x); | |
32 kroundup32(y); | |
33 if (x < y) header->text = (char*)realloc(header->text, y); | |
34 strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here. | |
35 header->l_text += len; | |
36 header->text[header->l_text] = 0; | |
37 } | |
38 | |
39 int samthreads(samfile_t *fp, int n_threads, int n_sub_blks) | |
40 { | |
41 if (!(fp->type&1) || (fp->type&2)) return -1; | |
42 bgzf_mt(fp->x.bam, n_threads, n_sub_blks); | |
43 return 0; | |
44 } | |
45 | |
46 samfile_t *samopen(const char *fn, const char *mode, const void *aux) | |
47 { | |
48 samfile_t *fp; | |
49 fp = (samfile_t*)calloc(1, sizeof(samfile_t)); | |
50 if (strchr(mode, 'r')) { // read | |
51 fp->type |= TYPE_READ; | |
52 if (strchr(mode, 'b')) { // binary | |
53 fp->type |= TYPE_BAM; | |
54 fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); | |
55 if (fp->x.bam == 0) goto open_err_ret; | |
56 fp->header = bam_header_read(fp->x.bam); | |
57 } else { // text | |
58 fp->x.tamr = sam_open(fn); | |
59 if (fp->x.tamr == 0) goto open_err_ret; | |
60 fp->header = sam_header_read(fp->x.tamr); | |
61 if (fp->header->n_targets == 0) { // no @SQ fields | |
62 if (aux) { // check if aux is present | |
63 bam_header_t *textheader = fp->header; | |
64 fp->header = sam_header_read2((const char*)aux); | |
65 if (fp->header == 0) goto open_err_ret; | |
66 append_header_text(fp->header, textheader->text, textheader->l_text); | |
67 bam_header_destroy(textheader); | |
68 } | |
69 if (fp->header->n_targets == 0 && bam_verbose >= 1) | |
70 fprintf(stderr, "[samopen] no @SQ lines in the header.\n"); | |
71 } else if (bam_verbose >= 2) fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets); | |
72 } | |
73 } else if (strchr(mode, 'w')) { // write | |
74 fp->header = bam_header_dup((const bam_header_t*)aux); | |
75 if (strchr(mode, 'b')) { // binary | |
76 char bmode[3]; | |
77 int i, compress_level = -1; | |
78 for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break; | |
79 if (mode[i]) compress_level = mode[i] - '0'; | |
80 if (strchr(mode, 'u')) compress_level = 0; | |
81 bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0; | |
82 fp->type |= TYPE_BAM; | |
83 fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode); | |
84 if (fp->x.bam == 0) goto open_err_ret; | |
85 bam_header_write(fp->x.bam, fp->header); | |
86 } else { // text | |
87 // open file | |
88 fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; | |
89 if (fp->x.tamw == 0) goto open_err_ret; | |
90 if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2; | |
91 else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2; | |
92 else fp->type |= BAM_OFDEC<<2; | |
93 // write header | |
94 if (strchr(mode, 'h')) { | |
95 int i; | |
96 bam_header_t *alt; | |
97 // parse the header text | |
98 alt = bam_header_init(); | |
99 alt->l_text = fp->header->l_text; alt->text = fp->header->text; | |
100 sam_header_parse(alt); | |
101 alt->l_text = 0; alt->text = 0; | |
102 // check if there are @SQ lines in the header | |
103 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL | |
104 if (alt->n_targets) { // then write the header text without dumping ->target_{name,len} | |
105 if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1) | |
106 fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n"); | |
107 } else { // then dump ->target_{name,len} | |
108 for (i = 0; i < fp->header->n_targets; ++i) | |
109 fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]); | |
110 } | |
111 bam_header_destroy(alt); | |
112 } | |
113 } | |
114 } | |
115 return fp; | |
116 | |
117 open_err_ret: | |
118 free(fp); | |
119 return 0; | |
120 } | |
121 | |
122 void samclose(samfile_t *fp) | |
123 { | |
124 if (fp == 0) return; | |
125 if (fp->header) bam_header_destroy(fp->header); | |
126 if (fp->type & TYPE_BAM) bam_close(fp->x.bam); | |
127 else if (fp->type & TYPE_READ) sam_close(fp->x.tamr); | |
128 else fclose(fp->x.tamw); | |
129 free(fp); | |
130 } | |
131 | |
132 int samread(samfile_t *fp, bam1_t *b) | |
133 { | |
134 if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading | |
135 if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b); | |
136 else return sam_read1(fp->x.tamr, fp->header, b); | |
137 } | |
138 | |
139 int samwrite(samfile_t *fp, const bam1_t *b) | |
140 { | |
141 if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing | |
142 if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b); | |
143 else { | |
144 char *s = bam_format1_core(fp->header, b, fp->type>>2&3); | |
145 int l = strlen(s); | |
146 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw); | |
147 free(s); | |
148 return l + 1; | |
149 } | |
150 } | |
151 | |
152 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) | |
153 { | |
154 bam_plbuf_t *buf; | |
155 int ret; | |
156 bam1_t *b; | |
157 b = bam_init1(); | |
158 buf = bam_plbuf_init(func, func_data); | |
159 bam_plbuf_set_mask(buf, mask); | |
160 while ((ret = samread(fp, b)) >= 0) | |
161 bam_plbuf_push(b, buf); | |
162 bam_plbuf_push(0, buf); | |
163 bam_plbuf_destroy(buf); | |
164 bam_destroy1(b); | |
165 return 0; | |
166 } | |
167 | |
168 char *samfaipath(const char *fn_ref) | |
169 { | |
170 char *fn_list = 0; | |
171 if (fn_ref == 0) return 0; | |
172 fn_list = calloc(strlen(fn_ref) + 5, 1); | |
173 strcat(strcpy(fn_list, fn_ref), ".fai"); | |
174 if (access(fn_list, R_OK) == -1) { // fn_list is unreadable | |
175 if (access(fn_ref, R_OK) == -1) { | |
176 fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref); | |
177 } else { | |
178 if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n"); | |
179 if (fai_build(fn_ref) == -1) { | |
180 fprintf(stderr, "[samfaipath] fail to build FASTA index.\n"); | |
181 free(fn_list); fn_list = 0; | |
182 } | |
183 } | |
184 } | |
185 return fn_list; | |
186 } |