annotate PsiCLASS-1.0.2/samtools-0.1.19/bam.h @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /* The MIT License
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 Copyright (c) 2008-2010 Genome Research Ltd (GRL).
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 a copy of this software and associated documentation files (the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 the following conditions:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 SOFTWARE.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 #ifndef BAM_BAM_H
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 #define BAM_BAM_H
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 @header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 BAM library provides I/O and various operations on manipulating files
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 format. It now supports importing from or exporting to SAM, sorting,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 merging, generating pileup, and quickly retrieval of reads overlapped
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 with a specified region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 @copyright Genome Research Ltd.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 #define BAM_VERSION "0.1.19-44428cd"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 #include <stdint.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 #ifndef BAM_LITE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 #define BAM_VIRTUAL_OFFSET16
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 #include "bgzf.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 /*! @abstract BAM file handler */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 typedef BGZF *bamFile;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 #define bam_open(fn, mode) bgzf_open(fn, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 #define bam_dopen(fd, mode) bgzf_fdopen(fd, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 #define bam_close(fp) bgzf_close(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 #define bam_read(fp, buf, size) bgzf_read(fp, buf, size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 #define bam_write(fp, buf, size) bgzf_write(fp, buf, size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 #define bam_tell(fp) bgzf_tell(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 #define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 #else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 #define BAM_TRUE_OFFSET
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 #include <zlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 typedef gzFile bamFile;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 #define bam_open(fn, mode) gzopen(fn, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 #define bam_dopen(fd, mode) gzdopen(fd, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 #define bam_close(fp) gzclose(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 #define bam_read(fp, buf, size) gzread(fp, buf, size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 /* no bam_write/bam_tell/bam_seek() here */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 /*! @typedef
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 @abstract Structure for the alignment header.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 @field n_targets number of reference sequences
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 @field target_name names of the reference sequences
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 @field target_len lengths of the referene sequences
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 @field dict header dictionary
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 @field hash hash table for fast name lookup
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 @field rg2lib hash table for @RG-ID -> LB lookup
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 @field l_text length of the plain text in the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 @field text plain text
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 @discussion Field hash points to null by default. It is a private
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 member.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 int32_t n_targets;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 char **target_name;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 uint32_t *target_len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 void *dict, *hash, *rg2lib;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 uint32_t l_text, n_text;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 char *text;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 } bam_header_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 #define BAM_FPAIRED 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 /*! @abstract the read is mapped in a proper pair */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 #define BAM_FPROPER_PAIR 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 #define BAM_FUNMAP 4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 /*! @abstract the mate is unmapped */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 #define BAM_FMUNMAP 8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 /*! @abstract the read is mapped to the reverse strand */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 #define BAM_FREVERSE 16
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 /*! @abstract the mate is mapped to the reverse strand */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 #define BAM_FMREVERSE 32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 /*! @abstract this is read1 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 #define BAM_FREAD1 64
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 /*! @abstract this is read2 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 #define BAM_FREAD2 128
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 /*! @abstract not primary alignment */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 #define BAM_FSECONDARY 256
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 /*! @abstract QC failure */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 #define BAM_FQCFAIL 512
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 /*! @abstract optical or PCR duplicate */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 #define BAM_FDUP 1024
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 #define BAM_OFDEC 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 #define BAM_OFHEX 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 #define BAM_OFSTR 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 /*! @abstract defautl mask for pileup */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 #define BAM_CORE_SIZE sizeof(bam1_core_t)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 * Describing how CIGAR operation/length is packed in a 32-bit integer.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 #define BAM_CIGAR_SHIFT 4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 #define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 /*
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 CIGAR operations.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 /*! @abstract CIGAR: M = match or mismatch*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 #define BAM_CMATCH 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 /*! @abstract CIGAR: I = insertion to the reference */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 #define BAM_CINS 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 /*! @abstract CIGAR: D = deletion from the reference */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 #define BAM_CDEL 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 /*! @abstract CIGAR: N = skip on the reference (e.g. spliced alignment) */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 #define BAM_CREF_SKIP 3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 /*! @abstract CIGAR: S = clip on the read with clipped sequence
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 present in qseq */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 #define BAM_CSOFT_CLIP 4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 /*! @abstract CIGAR: H = clip on the read with clipped sequence trimmed off */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 #define BAM_CHARD_CLIP 5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 /*! @abstract CIGAR: P = padding */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 #define BAM_CPAD 6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 /*! @abstract CIGAR: equals = match */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 #define BAM_CEQUAL 7
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 /*! @abstract CIGAR: X = mismatch */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 #define BAM_CDIFF 8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 #define BAM_CBACK 9
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 #define BAM_CIGAR_STR "MIDNSHP=XB"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 #define BAM_CIGAR_TYPE 0x3C1A7
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 #define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 #define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 #define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 #define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 #define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 /*! @typedef
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 @abstract Structure for core alignment information.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 @field tid chromosome ID, defined by bam_header_t
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 @field pos 0-based leftmost coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 @field bin bin calculated by bam_reg2bin()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 @field qual mapping quality
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 @field l_qname length of the query name
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 @field flag bitwise flag
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 @field n_cigar number of CIGAR operations
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 @field l_qseq length of the query sequence (read)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 int32_t tid;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 int32_t pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 uint32_t bin:16, qual:8, l_qname:8;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 uint32_t flag:16, n_cigar:16;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 int32_t l_qseq;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 int32_t mtid;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 int32_t mpos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 int32_t isize;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 } bam1_core_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 /*! @typedef
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 @abstract Structure for one alignment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 @field core core information about the alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 @field l_aux length of auxiliary data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 @field data_len current length of bam1_t::data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 @field m_data maximum length of bam1_t::data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 @field data all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 @discussion Notes:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 1. qname is zero tailing and core.l_qname includes the tailing '\0'.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 2. l_qseq is calculated from the total length of an alignment block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 on reading or from CIGAR.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 3. cigar data is encoded 4 bytes per CIGAR operation.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 4. seq is nybble-encoded according to bam_nt16_table.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 bam1_core_t core;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 int l_aux, data_len, m_data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 uint8_t *data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 } bam1_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 typedef struct __bam_iter_t *bam_iter_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 #define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 @abstract Get the CIGAR array
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 @param b pointer to an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 @return pointer to the CIGAR array
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 @discussion In the CIGAR array, each element is a 32-bit integer. The
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 length of a CIGAR.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 #define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 @abstract Get the name of the query
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 @param b pointer to an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 @return pointer to the name string, null terminated
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 #define bam1_qname(b) ((char*)((b)->data))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 @abstract Get query sequence
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 @param b pointer to an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 @return pointer to sequence
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 8 for T and 15 for N. Two bases are packed in one byte with the base
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 at the higher 4 bits having smaller coordinate on the read. It is
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 recommended to use bam1_seqi() macro to get the base.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 #define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 @abstract Get query quality
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 @param b pointer to an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 @return pointer to quality string
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 #define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 @abstract Get a base on read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 @param s Query sequence returned by bam1_seq()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 @param i The i-th position, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 @return 4-bit integer representing the base.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 //#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 #define bam1_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 #define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 @abstract Get query sequence and quality
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 @param b pointer to an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 @return pointer to the concatenated auxiliary data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 #define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 #ifndef kroundup32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 @abstract Round an integer to the next closest power-2 integer.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 @param x integer to be rounded (in place)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 @discussion x will be modified.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 @abstract Whether the machine is big-endian; modified only in
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 bam_header_init().
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 extern int bam_is_be;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 @abstract Verbose level between 0 and 3; 0 is supposed to disable all
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 debugging information, though this may not have been implemented.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 extern int bam_verbose;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 extern int bam_no_B;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 /*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 extern unsigned char bam_nt16_table[256];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 /*! @abstract Table for converting a 4-bit encoded nucleotide to a letter. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 extern char *bam_nt16_rev_table;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 extern char bam_nt16_nt4_table[];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 #ifdef __cplusplus
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 extern "C" {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 /*********************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 * Low-level SAM I/O *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 *********************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 /*! @abstract TAM file handler */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 typedef struct __tamFile_t *tamFile;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 @abstract Open a SAM file for reading, either uncompressed or compressed by gzip/zlib.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 @param fn SAM file name
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 @return SAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 tamFile sam_open(const char *fn);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 @abstract Close a SAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 @param fp SAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 void sam_close(tamFile fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 @abstract Read one alignment from a SAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 @param fp SAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 @param header header information (ordered names of chromosomes)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 @param b read alignment; all members in b will be updated
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 @return 0 if successful; otherwise negative
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 @abstract Read header information from a TAB-delimited list file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 @param fn_list file name for the list
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 @return a pointer to the header structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 @discussion Each line in this file consists of chromosome name and
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 the length of chromosome.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 bam_header_t *sam_header_read2(const char *fn_list);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 @abstract Read header from a SAM file (if present)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 @param fp SAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 @return pointer to header struct; 0 if no @SQ lines available
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 bam_header_t *sam_header_read(tamFile fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 @abstract Parse @SQ lines a update a header struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 @param h pointer to the header struct to be updated
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 @return number of target sequences
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 @discussion bam_header_t::{n_targets,target_len,target_name} will
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 be destroyed in the first place.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 int sam_header_parse(bam_header_t *h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 @abstract Parse @RG lines a update a header struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 @param h pointer to the header struct to be updated
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 @return number of @RG lines
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 @discussion bam_header_t::rg2lib will be destroyed in the first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 place.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 int sam_header_parse_rg(bam_header_t *h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 #define sam_write1(header, b) bam_view1(header, b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 /********************************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 * APIs for string dictionaries *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 ********************************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 int bam_strmap_put(void *strmap, const char *rg, const char *lib);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 const char *bam_strmap_get(const void *strmap, const char *rg);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 void *bam_strmap_dup(const void*);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 void *bam_strmap_init();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 void bam_strmap_destroy(void *strmap);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 /*********************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 * Low-level BAM I/O *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 *********************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 @abstract Initialize a header structure.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 @return the pointer to the header structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 @discussion This function also modifies the global variable
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 bam_is_be.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 bam_header_t *bam_header_init();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 @abstract Destroy a header structure.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 @param header pointer to the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 void bam_header_destroy(bam_header_t *header);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 @abstract Read a header structure from BAM.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 @param fp BAM file handler, opened by bam_open()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 @return pointer to the header structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 @discussion The file position indicator must be placed at the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 beginning of the file. Upon success, the position indicator will
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 be set at the start of the first alignment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 bam_header_t *bam_header_read(bamFile fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 @abstract Write a header structure to BAM.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 @param fp BAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 @param header pointer to the header structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 @return always 0 currently
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 int bam_header_write(bamFile fp, const bam_header_t *header);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 @abstract Read an alignment from BAM.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 @param fp BAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 @param b read alignment; all members are updated.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 @return number of bytes read from the file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 @discussion The file position indicator must be
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 placed right before an alignment. Upon success, this function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 will set the position indicator to the start of the next
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 alignment. This function is not affected by the machine
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 endianness.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 int bam_read1(bamFile fp, bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 int bam_remove_B(bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 @abstract Write an alignment to BAM.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 @param fp BAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 @param c pointer to the bam1_core_t structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 @param data_len total length of variable size data related to
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 the alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 @param data pointer to the concatenated data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 @return number of bytes written to the file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 @discussion This function is not affected by the machine
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 endianness.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 @abstract Write an alignment to BAM.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 @param fp BAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 @param b alignment to write
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 @return number of bytes written to the file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 @abstract It is equivalent to:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 bam_write1_core(fp, &b->core, b->data_len, b->data)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 int bam_write1(bamFile fp, const bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 @abstract Initiate a pointer to bam1_t struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 #define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t)))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 /*! @function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 @abstract Free the memory allocated for an alignment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 @param b pointer to an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 #define bam_destroy1(b) do { \
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 if (b) { free((b)->data); free(b); } \
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 } while (0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 @abstract Format a BAM record in the SAM format
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 @param header pointer to the header structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 @param b alignment to print
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 @return a pointer to the SAM string
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 char *bam_format1(const bam_header_t *header, const bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 @abstract Check whether a BAM record is plausibly valid
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 @param header associated header structure, or NULL if unavailable
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 @param b alignment to validate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 @return 0 if the alignment is invalid; non-zero otherwise
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 @discussion Simple consistency check of some of the fields of the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 alignment record. If the header is provided, several additional checks
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 are made. Not all fields are checked, so a non-zero result is not a
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 guarantee that the record is valid. However it is usually good enough
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 to detect when bam_seek() has been called with a virtual file offset
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 that is not the offset of an alignment record.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 int bam_validate1(const bam_header_t *header, const bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 const char *bam_get_library(bam_header_t *header, const bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 /***************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 * pileup APIs *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 ***************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 /*! @typedef
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 @abstract Structure for one alignment covering the pileup position.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 @field b pointer to the alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 @field qpos position of the read base at the pileup site, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 @field indel indel length; 0 for no indel, positive for ins and negative for del
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 @field is_del 1 iff the base on the padded read is a deletion
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 @field level the level of the read in the "viewer" mode
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 difference between the two functions is that the former does not
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 set bam_pileup1_t::level, while the later does. Level helps the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 implementation of alignment viewers, but calculating this has some
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 overhead.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 bam1_t *b;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 int32_t qpos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 int indel, level;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 } bam_pileup1_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 struct __bam_plp_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 typedef struct __bam_plp_t *bam_plp_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 int bam_plp_push(bam_plp_t iter, const bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 void bam_plp_set_mask(bam_plp_t iter, int mask);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 void bam_plp_reset(bam_plp_t iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 void bam_plp_destroy(bam_plp_t iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 struct __bam_mplp_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 typedef struct __bam_mplp_t *bam_mplp_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 void bam_mplp_destroy(bam_mplp_t iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 /*! @typedef
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 @abstract Type of function to be called by bam_plbuf_push().
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 @param tid chromosome ID as is defined in the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 @param pos start coordinate of the alignment, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 @param n number of elements in pl array
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 @param pl array of alignments
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 @param data user provided data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 @discussion See also bam_plbuf_push(), bam_plbuf_init() and bam_pileup1_t.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 bam_plp_t iter;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 bam_pileup_f func;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568 void *data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 } bam_plbuf_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 void bam_plbuf_reset(bam_plbuf_t *buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 void bam_plbuf_destroy(bam_plbuf_t *buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577 int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 struct __bam_lplbuf_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 typedef struct __bam_lplbuf_t bam_lplbuf_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 void bam_lplbuf_reset(bam_lplbuf_t *buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 /*! @abstract bam_plbuf_init() equivalent with level calculated. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 void bam_lplbuf_destroy(bam_lplbuf_t *tv);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 /*! @abstract bam_plbuf_push() equivalent with level calculated. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 /*********************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 * BAM indexing APIs *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 *********************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 struct __bam_index_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 typedef struct __bam_index_t bam_index_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 @abstract Build index for a BAM file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 @discussion Index file "fn.bai" will be created.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 @param fn name of the BAM file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 @return always 0 currently
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 int bam_index_build(const char *fn);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610 @abstract Load index from file "fn.bai".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 @param fn name of the BAM file (NOT the index file)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 @return pointer to the index structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 bam_index_t *bam_index_load(const char *fn);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 @abstract Destroy an index structure.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 @param idx pointer to the index structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 void bam_index_destroy(bam_index_t *idx);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 /*! @typedef
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 @abstract Type of function to be called by bam_fetch().
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 @param b the alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 @param data user provided data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627 typedef int (*bam_fetch_f)(const bam1_t *b, void *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 @abstract Retrieve the alignments that are overlapped with the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 specified region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 @discussion A user defined function will be called for each
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 retrieved alignment ordered by its start position.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 @param fp BAM file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 @param idx pointer to the alignment index
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 @param tid chromosome ID as is defined in the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 @param beg start coordinate, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 @param end end coordinate, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 @param data user provided data (will be transferred to func)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 @param func user defined function
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 void bam_iter_destroy(bam_iter_t iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 @abstract Parse a region in the format: "chr2:100,000-200,000".
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 @discussion bam_header_t::hash will be initialized if empty.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 @param header pointer to the header structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 @param str string to be parsed
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 @param ref_id the returned chromosome ID
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 @param begin the returned start coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 @param end the returned end coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 @return 0 on success; -1 on failure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663 /**************************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664 * APIs for optional tags *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 **************************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668 @abstract Retrieve data of a tag
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 @param b pointer to an alignment struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670 @param tag two-character tag to be retrieved
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 @return pointer to the type and data. The first character is the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 type that can be 'iIsScCdfAZH'.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 @discussion Use bam_aux2?() series to convert the returned data to
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 the corresponding type.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678 uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 int32_t bam_aux2i(const uint8_t *s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 float bam_aux2f(const uint8_t *s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 double bam_aux2d(const uint8_t *s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683 char bam_aux2A(const uint8_t *s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 char *bam_aux2Z(const uint8_t *s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 int bam_aux_del(bam1_t *b, uint8_t *s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 /*****************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 * Miscellaneous *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 *****************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
695 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
696 @abstract Calculate the rightmost coordinate of an alignment on the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
697 reference genome.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
698
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
699 @param c pointer to the bam1_core_t structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
700 @param cigar the corresponding CIGAR array (from bam1_t::cigar)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
701 @return the rightmost coordinate, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
702 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
703 uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
704
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
705 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
706 @abstract Calculate the length of the query sequence from CIGAR.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
707 @param c pointer to the bam1_core_t structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
708 @param cigar the corresponding CIGAR array (from bam1_t::cigar)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
709 @return length of the query sequence
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
710 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
711 int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
712
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
713 #ifdef __cplusplus
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
714 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
715 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
716
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
717 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
718 @abstract Calculate the minimum bin that contains a region [beg,end).
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
719 @param beg start of the region, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
720 @param end end of the region, 0-based
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
721 @return bin
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
722 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
723 static inline int bam_reg2bin(uint32_t beg, uint32_t end)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
724 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
725 --end;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
726 if (beg>>14 == end>>14) return 4681 + (beg>>14);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
727 if (beg>>17 == end>>17) return 585 + (beg>>17);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
728 if (beg>>20 == end>>20) return 73 + (beg>>20);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
729 if (beg>>23 == end>>23) return 9 + (beg>>23);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
730 if (beg>>26 == end>>26) return 1 + (beg>>26);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
731 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
732 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
733
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
734 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
735 @abstract Copy an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
736 @param bdst destination alignment struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
737 @param bsrc source alignment struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
738 @return pointer to the destination alignment struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
739 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
740 static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
741 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
742 uint8_t *data = bdst->data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
743 int m_data = bdst->m_data; // backup data and m_data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
744 if (m_data < bsrc->data_len) { // double the capacity
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
745 m_data = bsrc->data_len; kroundup32(m_data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
746 data = (uint8_t*)realloc(data, m_data);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
747 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
748 memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
749 *bdst = *bsrc; // copy the rest
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
750 // restore the backup
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
751 bdst->m_data = m_data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
752 bdst->data = data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
753 return bdst;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
754 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
755
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
756 /*!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
757 @abstract Duplicate an alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
758 @param src source alignment struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
759 @return pointer to the destination alignment struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
760 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
761 static inline bam1_t *bam_dup1(const bam1_t *src)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
762 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
763 bam1_t *b;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
764 b = bam_init1();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
765 *b = *src;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
766 b->m_data = b->data_len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
767 b->data = (uint8_t*)calloc(b->data_len, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
768 memcpy(b->data, src->data, b->data_len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
769 return b;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
770 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
771
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
772 static inline int bam_aux_type2size(int x)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
773 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
774 if (x == 'C' || x == 'c' || x == 'A') return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
775 else if (x == 'S' || x == 's') return 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
776 else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
777 else return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
778 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
779
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
780 /*********************************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
781 *** Compatibility with htslib ***
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
782 *********************************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
783
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
784 typedef bam_header_t bam_hdr_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
785
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
786 #define bam_get_qname(b) bam1_qname(b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
787 #define bam_get_cigar(b) bam1_cigar(b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
788
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
789 #define bam_hdr_read(fp) bam_header_read(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
790 #define bam_hdr_write(fp, h) bam_header_write(fp, h)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
791 #define bam_hdr_destroy(fp) bam_header_destroy(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
792
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
793 #endif