annotate pyPRADA_1.2/tools/samtools-0.1.16/bam.h @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 /* The MIT License
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 Copyright (c) 2008-2010 Genome Research Ltd (GRL).
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 a copy of this software and associated documentation files (the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 the following conditions:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 SOFTWARE.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 #ifndef BAM_BAM_H
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 #define BAM_BAM_H
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 @header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 BAM library provides I/O and various operations on manipulating files
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 format. It now supports importing from or exporting to SAM, sorting,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 merging, generating pileup, and quickly retrieval of reads overlapped
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 with a specified region.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 @copyright Genome Research Ltd.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 #define BAM_VERSION "0.1.16 (r963:234)"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 #include <stdint.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 #include <stdio.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 #ifndef BAM_LITE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 #define BAM_VIRTUAL_OFFSET16
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 #include "bgzf.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 /*! @abstract BAM file handler */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 typedef BGZF *bamFile;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 #define bam_open(fn, mode) bgzf_open(fn, mode)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 #define bam_dopen(fd, mode) bgzf_fdopen(fd, mode)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 #define bam_close(fp) bgzf_close(fp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 #define bam_read(fp, buf, size) bgzf_read(fp, buf, size)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 #define bam_write(fp, buf, size) bgzf_write(fp, buf, size)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 #define bam_tell(fp) bgzf_tell(fp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 #define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 #else
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 #define BAM_TRUE_OFFSET
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 #include <zlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 typedef gzFile bamFile;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 #define bam_open(fn, mode) gzopen(fn, mode)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 #define bam_dopen(fd, mode) gzdopen(fd, mode)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 #define bam_close(fp) gzclose(fp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 #define bam_read(fp, buf, size) gzread(fp, buf, size)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 /* no bam_write/bam_tell/bam_seek() here */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 /*! @typedef
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 @abstract Structure for the alignment header.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 @field n_targets number of reference sequences
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 @field target_name names of the reference sequences
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 @field target_len lengths of the referene sequences
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 @field dict header dictionary
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 @field hash hash table for fast name lookup
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 @field rg2lib hash table for @RG-ID -> LB lookup
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 @field l_text length of the plain text in the header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 @field text plain text
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 @discussion Field hash points to null by default. It is a private
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 member.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 int32_t n_targets;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 char **target_name;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 uint32_t *target_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 void *dict, *hash, *rg2lib;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 size_t l_text, n_text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 char *text;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 } bam_header_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 #define BAM_FPAIRED 1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 /*! @abstract the read is mapped in a proper pair */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 #define BAM_FPROPER_PAIR 2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 #define BAM_FUNMAP 4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 /*! @abstract the mate is unmapped */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 #define BAM_FMUNMAP 8
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 /*! @abstract the read is mapped to the reverse strand */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 #define BAM_FREVERSE 16
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 /*! @abstract the mate is mapped to the reverse strand */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 #define BAM_FMREVERSE 32
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 /*! @abstract this is read1 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 #define BAM_FREAD1 64
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 /*! @abstract this is read2 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 #define BAM_FREAD2 128
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 /*! @abstract not primary alignment */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 #define BAM_FSECONDARY 256
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 /*! @abstract QC failure */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 #define BAM_FQCFAIL 512
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 /*! @abstract optical or PCR duplicate */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 #define BAM_FDUP 1024
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 #define BAM_OFDEC 0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 #define BAM_OFHEX 1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 #define BAM_OFSTR 2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 /*! @abstract defautl mask for pileup */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 #define BAM_CORE_SIZE sizeof(bam1_core_t)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 /**
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 * Describing how CIGAR operation/length is packed in a 32-bit integer.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 #define BAM_CIGAR_SHIFT 4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 #define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 /*
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 CIGAR operations.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 /*! @abstract CIGAR: match */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 #define BAM_CMATCH 0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 /*! @abstract CIGAR: insertion to the reference */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 #define BAM_CINS 1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 /*! @abstract CIGAR: deletion from the reference */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 #define BAM_CDEL 2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 /*! @abstract CIGAR: skip on the reference (e.g. spliced alignment) */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 #define BAM_CREF_SKIP 3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 /*! @abstract CIGAR: clip on the read with clipped sequence present in qseq */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 #define BAM_CSOFT_CLIP 4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 /*! @abstract CIGAR: clip on the read with clipped sequence trimmed off */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 #define BAM_CHARD_CLIP 5
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 /*! @abstract CIGAR: padding */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 #define BAM_CPAD 6
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 /*! @typedef
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 @abstract Structure for core alignment information.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 @field tid chromosome ID, defined by bam_header_t
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 @field pos 0-based leftmost coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 @field strand strand; 0 for forward and 1 otherwise
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 @field bin bin calculated by bam_reg2bin()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 @field qual mapping quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 @field l_qname length of the query name
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 @field flag bitwise flag
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 @field n_cigar number of CIGAR operations
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 @field l_qseq length of the query sequence (read)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 int32_t tid;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 int32_t pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 uint32_t bin:16, qual:8, l_qname:8;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 uint32_t flag:16, n_cigar:16;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 int32_t l_qseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 int32_t mtid;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 int32_t mpos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 int32_t isize;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 } bam1_core_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 /*! @typedef
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 @abstract Structure for one alignment.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 @field core core information about the alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 @field l_aux length of auxiliary data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 @field data_len current length of bam1_t::data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 @field m_data maximum length of bam1_t::data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 @field data all variable-length data, concatenated; structure: cigar-qname-seq-qual-aux
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 @discussion Notes:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 1. qname is zero tailing and core.l_qname includes the tailing '\0'.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 2. l_qseq is calculated from the total length of an alignment block
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 on reading or from CIGAR.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 bam1_core_t core;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 int l_aux, data_len, m_data;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 uint8_t *data;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 } bam1_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 typedef struct __bam_iter_t *bam_iter_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 #define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 @abstract Get the CIGAR array
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 @param b pointer to an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 @return pointer to the CIGAR array
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 @discussion In the CIGAR array, each element is a 32-bit integer. The
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 length of a CIGAR.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 #define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 @abstract Get the name of the query
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 @param b pointer to an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 @return pointer to the name string, null terminated
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 #define bam1_qname(b) ((char*)((b)->data))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 @abstract Get query sequence
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 @param b pointer to an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 @return pointer to sequence
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223 @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 8 for T and 15 for N. Two bases are packed in one byte with the base
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 at the higher 4 bits having smaller coordinate on the read. It is
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 recommended to use bam1_seqi() macro to get the base.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 #define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231 @abstract Get query quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232 @param b pointer to an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 @return pointer to quality string
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 #define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238 @abstract Get a base on read
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 @param s Query sequence returned by bam1_seq()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 @param i The i-th position, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 @return 4-bit integer representing the base.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 #define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
245 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
246 @abstract Get query sequence and quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
247 @param b pointer to an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
248 @return pointer to the concatenated auxiliary data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
249 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
250 #define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
251
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
252 #ifndef kroundup32
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
253 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
254 @abstract Round an integer to the next closest power-2 integer.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
255 @param x integer to be rounded (in place)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
256 @discussion x will be modified.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
257 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
258 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
259 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
260
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
261 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
262 @abstract Whether the machine is big-endian; modified only in
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
263 bam_header_init().
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
264 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
265 extern int bam_is_be;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
266
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
267 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
268 @abstract Verbose level between 0 and 3; 0 is supposed to disable all
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
269 debugging information, though this may not have been implemented.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
270 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
271 extern int bam_verbose;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
272
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
273 /*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
274 extern unsigned char bam_nt16_table[256];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
275
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
276 /*! @abstract Table for converting a 4-bit encoded nucleotide to a letter. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
277 extern char *bam_nt16_rev_table;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
278
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
279 extern char bam_nt16_nt4_table[];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
280
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
281 #ifdef __cplusplus
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
282 extern "C" {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
283 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
284
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
285 /*********************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
286 * Low-level SAM I/O *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
287 *********************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
288
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
289 /*! @abstract TAM file handler */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
290 typedef struct __tamFile_t *tamFile;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
291
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
292 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
293 @abstract Open a SAM file for reading, either uncompressed or compressed by gzip/zlib.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
294 @param fn SAM file name
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
295 @return SAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
296 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
297 tamFile sam_open(const char *fn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
298
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
299 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
300 @abstract Close a SAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
301 @param fp SAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
302 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
303 void sam_close(tamFile fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
304
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
305 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
306 @abstract Read one alignment from a SAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
307 @param fp SAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
308 @param header header information (ordered names of chromosomes)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
309 @param b read alignment; all members in b will be updated
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
310 @return 0 if successful; otherwise negative
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
311 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
312 int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
313
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
314 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
315 @abstract Read header information from a TAB-delimited list file.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
316 @param fn_list file name for the list
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
317 @return a pointer to the header structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
318
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
319 @discussion Each line in this file consists of chromosome name and
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
320 the length of chromosome.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
321 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
322 bam_header_t *sam_header_read2(const char *fn_list);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
323
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
324 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
325 @abstract Read header from a SAM file (if present)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
326 @param fp SAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
327 @return pointer to header struct; 0 if no @SQ lines available
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
328 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
329 bam_header_t *sam_header_read(tamFile fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
330
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
331 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
332 @abstract Parse @SQ lines a update a header struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
333 @param h pointer to the header struct to be updated
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
334 @return number of target sequences
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
335
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
336 @discussion bam_header_t::{n_targets,target_len,target_name} will
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
337 be destroyed in the first place.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
338 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
339 int sam_header_parse(bam_header_t *h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
340 int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
341
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
342 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
343 @abstract Parse @RG lines a update a header struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
344 @param h pointer to the header struct to be updated
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
345 @return number of @RG lines
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
346
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
347 @discussion bam_header_t::rg2lib will be destroyed in the first
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
348 place.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
349 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
350 int sam_header_parse_rg(bam_header_t *h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
351
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
352 #define sam_write1(header, b) bam_view1(header, b)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
353
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
354
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
355 /********************************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
356 * APIs for string dictionaries *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
357 ********************************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
358
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
359 int bam_strmap_put(void *strmap, const char *rg, const char *lib);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
360 const char *bam_strmap_get(const void *strmap, const char *rg);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
361 void *bam_strmap_dup(const void*);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
362 void *bam_strmap_init();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
363 void bam_strmap_destroy(void *strmap);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
364
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
365
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
366 /*********************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
367 * Low-level BAM I/O *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
368 *********************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
369
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
370 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
371 @abstract Initialize a header structure.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
372 @return the pointer to the header structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
373
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
374 @discussion This function also modifies the global variable
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
375 bam_is_be.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
376 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
377 bam_header_t *bam_header_init();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
378
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
379 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
380 @abstract Destroy a header structure.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
381 @param header pointer to the header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
382 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
383 void bam_header_destroy(bam_header_t *header);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
384
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
385 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
386 @abstract Read a header structure from BAM.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
387 @param fp BAM file handler, opened by bam_open()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
388 @return pointer to the header structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
389
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
390 @discussion The file position indicator must be placed at the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
391 beginning of the file. Upon success, the position indicator will
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
392 be set at the start of the first alignment.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
393 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
394 bam_header_t *bam_header_read(bamFile fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
395
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
396 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
397 @abstract Write a header structure to BAM.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
398 @param fp BAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
399 @param header pointer to the header structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
400 @return always 0 currently
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
401 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
402 int bam_header_write(bamFile fp, const bam_header_t *header);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
403
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
404 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
405 @abstract Read an alignment from BAM.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
406 @param fp BAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
407 @param b read alignment; all members are updated.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
408 @return number of bytes read from the file
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
409
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
410 @discussion The file position indicator must be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
411 placed right before an alignment. Upon success, this function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
412 will set the position indicator to the start of the next
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
413 alignment. This function is not affected by the machine
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
414 endianness.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
415 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
416 int bam_read1(bamFile fp, bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
417
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
418 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
419 @abstract Write an alignment to BAM.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
420 @param fp BAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
421 @param c pointer to the bam1_core_t structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
422 @param data_len total length of variable size data related to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
423 the alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
424 @param data pointer to the concatenated data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
425 @return number of bytes written to the file
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
426
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
427 @discussion This function is not affected by the machine
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
428 endianness.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
429 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
430 int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
431
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
432 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
433 @abstract Write an alignment to BAM.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
434 @param fp BAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
435 @param b alignment to write
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
436 @return number of bytes written to the file
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
437
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
438 @abstract It is equivalent to:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
439 bam_write1_core(fp, &b->core, b->data_len, b->data)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
440 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
441 int bam_write1(bamFile fp, const bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
442
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
443 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
444 @abstract Initiate a pointer to bam1_t struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
445 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
446 #define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t)))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
447
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
448 /*! @function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
449 @abstract Free the memory allocated for an alignment.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
450 @param b pointer to an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
451 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
452 #define bam_destroy1(b) do { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
453 if (b) { free((b)->data); free(b); } \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
454 } while (0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
455
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
456 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
457 @abstract Format a BAM record in the SAM format
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
458 @param header pointer to the header structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
459 @param b alignment to print
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
460 @return a pointer to the SAM string
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
461 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
462 char *bam_format1(const bam_header_t *header, const bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
463
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
464 char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
465
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
466 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
467 @abstract Check whether a BAM record is plausibly valid
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
468 @param header associated header structure, or NULL if unavailable
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
469 @param b alignment to validate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
470 @return 0 if the alignment is invalid; non-zero otherwise
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
471
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
472 @discussion Simple consistency check of some of the fields of the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
473 alignment record. If the header is provided, several additional checks
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
474 are made. Not all fields are checked, so a non-zero result is not a
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
475 guarantee that the record is valid. However it is usually good enough
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
476 to detect when bam_seek() has been called with a virtual file offset
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
477 that is not the offset of an alignment record.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
478 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
479 int bam_validate1(const bam_header_t *header, const bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
480
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
481 const char *bam_get_library(bam_header_t *header, const bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
482
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
483
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
484 /***************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
485 * pileup APIs *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
486 ***************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
487
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
488 /*! @typedef
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
489 @abstract Structure for one alignment covering the pileup position.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
490 @field b pointer to the alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
491 @field qpos position of the read base at the pileup site, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
492 @field indel indel length; 0 for no indel, positive for ins and negative for del
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
493 @field is_del 1 iff the base on the padded read is a deletion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
494 @field level the level of the read in the "viewer" mode
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
495
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
496 @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
497 difference between the two functions is that the former does not
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
498 set bam_pileup1_t::level, while the later does. Level helps the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
499 implementation of alignment viewers, but calculating this has some
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
500 overhead.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
501 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
502 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
503 bam1_t *b;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
504 int32_t qpos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
505 int indel, level;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
506 uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
507 } bam_pileup1_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
508
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
509 typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
510
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
511 struct __bam_plp_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
512 typedef struct __bam_plp_t *bam_plp_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
513
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
514 bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
515 int bam_plp_push(bam_plp_t iter, const bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
516 const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
517 const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
518 void bam_plp_set_mask(bam_plp_t iter, int mask);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
519 void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
520 void bam_plp_reset(bam_plp_t iter);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
521 void bam_plp_destroy(bam_plp_t iter);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
522
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
523 struct __bam_mplp_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
524 typedef struct __bam_mplp_t *bam_mplp_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
525
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
526 bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
527 void bam_mplp_destroy(bam_mplp_t iter);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
528 void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
529 int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
530
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
531 /*! @typedef
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
532 @abstract Type of function to be called by bam_plbuf_push().
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
533 @param tid chromosome ID as is defined in the header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
534 @param pos start coordinate of the alignment, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
535 @param n number of elements in pl array
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
536 @param pl array of alignments
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
537 @param data user provided data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
538 @discussion See also bam_plbuf_push(), bam_plbuf_init() and bam_pileup1_t.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
539 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
540 typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
541
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
542 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
543 bam_plp_t iter;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
544 bam_pileup_f func;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
545 void *data;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
546 } bam_plbuf_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
547
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
548 void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
549 void bam_plbuf_reset(bam_plbuf_t *buf);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
550 bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
551 void bam_plbuf_destroy(bam_plbuf_t *buf);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
552 int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
553
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
554 int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
555
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
556 struct __bam_lplbuf_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
557 typedef struct __bam_lplbuf_t bam_lplbuf_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
558
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
559 void bam_lplbuf_reset(bam_lplbuf_t *buf);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
560
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
561 /*! @abstract bam_plbuf_init() equivalent with level calculated. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
562 bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
563
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
564 /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
565 void bam_lplbuf_destroy(bam_lplbuf_t *tv);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
566
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
567 /*! @abstract bam_plbuf_push() equivalent with level calculated. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
568 int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
569
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
570
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
571 /*********************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
572 * BAM indexing APIs *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
573 *********************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
574
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
575 struct __bam_index_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
576 typedef struct __bam_index_t bam_index_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
577
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
578 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
579 @abstract Build index for a BAM file.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
580 @discussion Index file "fn.bai" will be created.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
581 @param fn name of the BAM file
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
582 @return always 0 currently
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
583 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
584 int bam_index_build(const char *fn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
585
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
586 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
587 @abstract Load index from file "fn.bai".
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
588 @param fn name of the BAM file (NOT the index file)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
589 @return pointer to the index structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
590 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
591 bam_index_t *bam_index_load(const char *fn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
592
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
593 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
594 @abstract Destroy an index structure.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
595 @param idx pointer to the index structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
596 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
597 void bam_index_destroy(bam_index_t *idx);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
598
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
599 /*! @typedef
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
600 @abstract Type of function to be called by bam_fetch().
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
601 @param b the alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
602 @param data user provided data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
603 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
604 typedef int (*bam_fetch_f)(const bam1_t *b, void *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
605
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
606 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
607 @abstract Retrieve the alignments that are overlapped with the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
608 specified region.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
609
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
610 @discussion A user defined function will be called for each
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
611 retrieved alignment ordered by its start position.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
612
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
613 @param fp BAM file handler
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
614 @param idx pointer to the alignment index
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
615 @param tid chromosome ID as is defined in the header
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
616 @param beg start coordinate, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
617 @param end end coordinate, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
618 @param data user provided data (will be transferred to func)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
619 @param func user defined function
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
620 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
621 int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
622
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
623 bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
624 int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
625 void bam_iter_destroy(bam_iter_t iter);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
626
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
627 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
628 @abstract Parse a region in the format: "chr2:100,000-200,000".
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
629 @discussion bam_header_t::hash will be initialized if empty.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
630 @param header pointer to the header structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
631 @param str string to be parsed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
632 @param ref_id the returned chromosome ID
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
633 @param begin the returned start coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
634 @param end the returned end coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
635 @return 0 on success; -1 on failure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
636 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
637 int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
638
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
639
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
640 /**************************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
641 * APIs for optional tags *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
642 **************************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
643
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
644 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
645 @abstract Retrieve data of a tag
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
646 @param b pointer to an alignment struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
647 @param tag two-character tag to be retrieved
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
648
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
649 @return pointer to the type and data. The first character is the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
650 type that can be 'iIsScCdfAZH'.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
651
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
652 @discussion Use bam_aux2?() series to convert the returned data to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
653 the corresponding type.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
654 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
655 uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
656
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
657 int32_t bam_aux2i(const uint8_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
658 float bam_aux2f(const uint8_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
659 double bam_aux2d(const uint8_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
660 char bam_aux2A(const uint8_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
661 char *bam_aux2Z(const uint8_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
662
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
663 int bam_aux_del(bam1_t *b, uint8_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
664 void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
665 uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
666
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
667
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
668 /*****************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
669 * Miscellaneous *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
670 *****************/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
671
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
672 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
673 @abstract Calculate the rightmost coordinate of an alignment on the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
674 reference genome.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
675
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
676 @param c pointer to the bam1_core_t structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
677 @param cigar the corresponding CIGAR array (from bam1_t::cigar)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
678 @return the rightmost coordinate, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
679 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
680 uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
681
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
682 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
683 @abstract Calculate the length of the query sequence from CIGAR.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
684 @param c pointer to the bam1_core_t structure
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
685 @param cigar the corresponding CIGAR array (from bam1_t::cigar)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
686 @return length of the query sequence
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
687 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
688 int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
689
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
690 #ifdef __cplusplus
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
691 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
692 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
693
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
694 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
695 @abstract Calculate the minimum bin that contains a region [beg,end).
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
696 @param beg start of the region, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
697 @param end end of the region, 0-based
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
698 @return bin
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
699 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
700 static inline int bam_reg2bin(uint32_t beg, uint32_t end)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
701 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
702 --end;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
703 if (beg>>14 == end>>14) return 4681 + (beg>>14);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
704 if (beg>>17 == end>>17) return 585 + (beg>>17);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
705 if (beg>>20 == end>>20) return 73 + (beg>>20);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
706 if (beg>>23 == end>>23) return 9 + (beg>>23);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
707 if (beg>>26 == end>>26) return 1 + (beg>>26);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
708 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
709 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
710
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
711 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
712 @abstract Copy an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
713 @param bdst destination alignment struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
714 @param bsrc source alignment struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
715 @return pointer to the destination alignment struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
716 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
717 static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
718 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
719 uint8_t *data = bdst->data;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
720 int m_data = bdst->m_data; // backup data and m_data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
721 if (m_data < bsrc->data_len) { // double the capacity
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
722 m_data = bsrc->data_len; kroundup32(m_data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
723 data = (uint8_t*)realloc(data, m_data);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
724 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
725 memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
726 *bdst = *bsrc; // copy the rest
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
727 // restore the backup
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
728 bdst->m_data = m_data;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
729 bdst->data = data;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
730 return bdst;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
731 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
732
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
733 /*!
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
734 @abstract Duplicate an alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
735 @param src source alignment struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
736 @return pointer to the destination alignment struct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
737 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
738 static inline bam1_t *bam_dup1(const bam1_t *src)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
739 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
740 bam1_t *b;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
741 b = bam_init1();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
742 *b = *src;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
743 b->m_data = b->data_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
744 b->data = (uint8_t*)calloc(b->data_len, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
745 memcpy(b->data, src->data, b->data_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
746 return b;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
747 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
748
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
749 static inline int bam_aux_type2size(int x)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
750 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
751 if (x == 'C' || x == 'c' || x == 'A') return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
752 else if (x == 'S' || x == 's') return 2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
753 else if (x == 'I' || x == 'i' || x == 'f') return 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
754 else return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
755 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
756
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
757
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
758 #endif