annotate SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/samtools-0.1.6/sam.c @ 7:351b3acadd17 default tip

Uploaded
author ryanmorin
date Tue, 18 Oct 2011 18:33:15 -0400
parents 74f5ea818cea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #include <string.h>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2 #include <unistd.h>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 #include "faidx.h"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 #include "sam.h"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 #define TYPE_BAM 1
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 #define TYPE_READ 2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 bam_header_t *bam_header_dup(const bam_header_t *h0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 bam_header_t *h;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12 int i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 h = bam_header_init();
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14 *h = *h0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15 h->hash = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 h->text = (char*)calloc(h->l_text + 1, 1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 memcpy(h->text, h0->text, h->l_text);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 h->target_len = (uint32_t*)calloc(h->n_targets, 4);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20 for (i = 0; i < h->n_targets; ++i) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 h->target_len[i] = h0->target_len[i];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 h->target_name[i] = strdup(h0->target_name[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24 if (h0->rg2lib) h->rg2lib = bam_strmap_dup(h0->rg2lib);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25 return h;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 static void append_header_text(bam_header_t *header, char* text, int len)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 int x = header->l_text + 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 int y = header->l_text + len + 1; // 1 byte null
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 if (text == 0) return;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 kroundup32(x);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 kroundup32(y);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 if (x < y) header->text = (char*)realloc(header->text, y);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 header->l_text += len;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 header->text[header->l_text] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 samfile_t *fp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 fp = (samfile_t*)calloc(1, sizeof(samfile_t));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 if (mode[0] == 'r') { // read
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 fp->type |= TYPE_READ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 if (mode[1] == 'b') { // binary
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 fp->type |= TYPE_BAM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 if (fp->x.bam == 0) goto open_err_ret;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 fp->header = bam_header_read(fp->x.bam);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 } else { // text
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52 fp->x.tamr = sam_open(fn);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 if (fp->x.tamr == 0) goto open_err_ret;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 fp->header = sam_header_read(fp->x.tamr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 if (fp->header->n_targets == 0) { // no @SQ fields
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 if (aux) { // check if aux is present
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57 bam_header_t *textheader = fp->header;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 fp->header = sam_header_read2((const char*)aux);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 append_header_text(fp->header, textheader->text, textheader->l_text);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 bam_header_destroy(textheader);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 if (fp->header->n_targets == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 sam_header_parse_rg(fp->header);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 } else if (mode[0] == 'w') { // write
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68 fp->header = bam_header_dup((const bam_header_t*)aux);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69 if (mode[1] == 'b') { // binary
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
70 char bmode[3];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
71 bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
72 fp->type |= TYPE_BAM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
73 fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
74 if (fp->x.bam == 0) goto open_err_ret;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
75 bam_header_write(fp->x.bam, fp->header);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
76 } else { // text
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
77 // open file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
78 fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
79 if (fp->x.tamr == 0) goto open_err_ret;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
80 if (strstr(mode, "X")) fp->type |= BAM_OFSTR<<2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
81 else if (strstr(mode, "x")) fp->type |= BAM_OFHEX<<2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
82 else fp->type |= BAM_OFDEC<<2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
83 // write header
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
84 if (strstr(mode, "h")) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
85 int i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
86 bam_header_t *alt;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
87 // parse the header text
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
88 alt = bam_header_init();
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
89 alt->l_text = fp->header->l_text; alt->text = fp->header->text;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
90 sam_header_parse(alt);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
91 alt->l_text = 0; alt->text = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
92 // check if there are @SQ lines in the header
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
93 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
94 if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
95 if (alt->n_targets != fp->header->n_targets)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
96 fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
97 } else { // then dump ->target_{name,len}
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
98 for (i = 0; i < fp->header->n_targets; ++i)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
99 fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
100 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
101 bam_header_destroy(alt);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
102 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
103 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
104 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
105 return fp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
106
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
107 open_err_ret:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
108 free(fp);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
109 return 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
110 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
111
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
112 void samclose(samfile_t *fp)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
113 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
114 if (fp == 0) return;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
115 if (fp->header) bam_header_destroy(fp->header);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
116 if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
117 else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
118 else fclose(fp->x.tamw);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
119 free(fp);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
120 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
121
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
122 int samread(samfile_t *fp, bam1_t *b)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
123 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
124 if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
125 if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
126 else return sam_read1(fp->x.tamr, fp->header, b);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
127 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
128
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
129 int samwrite(samfile_t *fp, const bam1_t *b)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
130 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
131 if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
132 if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
133 else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
134 char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
135 int l = strlen(s);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
136 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
137 free(s);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
138 return l + 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
139 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
140 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
141
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
142 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
143 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
144 bam_plbuf_t *buf;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
145 int ret;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
146 bam1_t *b;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
147 b = bam_init1();
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
148 buf = bam_plbuf_init(func, func_data);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
149 bam_plbuf_set_mask(buf, mask);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
150 while ((ret = samread(fp, b)) >= 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
151 bam_plbuf_push(b, buf);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
152 bam_plbuf_push(0, buf);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
153 bam_plbuf_destroy(buf);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
154 bam_destroy1(b);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
155 return 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
156 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
157
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
158 char *samfaipath(const char *fn_ref)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
159 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
160 char *fn_list = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
161 if (fn_ref == 0) return 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
162 fn_list = calloc(strlen(fn_ref) + 5, 1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
163 strcat(strcpy(fn_list, fn_ref), ".fai");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
164 if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
165 if (access(fn_ref, R_OK) == -1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
166 fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
167 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
168 fprintf(stderr, "[samfaipath] build FASTA index...\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
169 if (fai_build(fn_ref) == -1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
170 fprintf(stderr, "[samfaipath] fail to build FASTA index.\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
171 free(fn_list); fn_list = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
172 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
173 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
174 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
175 return fn_list;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
176 }