comparison pyPRADA_1.2/tools/samtools-0.1.16/kseq.h @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:acc2ca1a3ba4
1 /* The MIT License
2
3 Copyright (c) 2008 Genome Research Ltd (GRL).
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
28 /*
29 2009-07-16 (lh3): in kstream_t, change "char*" to "unsigned char*"
30 */
31
32 /* Last Modified: 12APR2009 */
33
34 #ifndef AC_KSEQ_H
35 #define AC_KSEQ_H
36
37 #include <ctype.h>
38 #include <string.h>
39 #include <stdlib.h>
40
41 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
42 #define KS_SEP_TAB 1 // isspace() && !' '
43 #define KS_SEP_MAX 1
44
45 #define __KS_TYPE(type_t) \
46 typedef struct __kstream_t { \
47 unsigned char *buf; \
48 int begin, end, is_eof; \
49 type_t f; \
50 } kstream_t;
51
52 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
53 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
54
55 #define __KS_BASIC(type_t, __bufsize) \
56 static inline kstream_t *ks_init(type_t f) \
57 { \
58 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
59 ks->f = f; \
60 ks->buf = malloc(__bufsize); \
61 return ks; \
62 } \
63 static inline void ks_destroy(kstream_t *ks) \
64 { \
65 if (ks) { \
66 free(ks->buf); \
67 free(ks); \
68 } \
69 }
70
71 #define __KS_GETC(__read, __bufsize) \
72 static inline int ks_getc(kstream_t *ks) \
73 { \
74 if (ks->is_eof && ks->begin >= ks->end) return -1; \
75 if (ks->begin >= ks->end) { \
76 ks->begin = 0; \
77 ks->end = __read(ks->f, ks->buf, __bufsize); \
78 if (ks->end < __bufsize) ks->is_eof = 1; \
79 if (ks->end == 0) return -1; \
80 } \
81 return (int)ks->buf[ks->begin++]; \
82 }
83
84 #ifndef KSTRING_T
85 #define KSTRING_T kstring_t
86 typedef struct __kstring_t {
87 size_t l, m;
88 char *s;
89 } kstring_t;
90 #endif
91
92 #ifndef kroundup32
93 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
94 #endif
95
96 #define __KS_GETUNTIL(__read, __bufsize) \
97 static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
98 { \
99 if (dret) *dret = 0; \
100 str->l = 0; \
101 if (ks->begin >= ks->end && ks->is_eof) return -1; \
102 for (;;) { \
103 int i; \
104 if (ks->begin >= ks->end) { \
105 if (!ks->is_eof) { \
106 ks->begin = 0; \
107 ks->end = __read(ks->f, ks->buf, __bufsize); \
108 if (ks->end < __bufsize) ks->is_eof = 1; \
109 if (ks->end == 0) break; \
110 } else break; \
111 } \
112 if (delimiter > KS_SEP_MAX) { \
113 for (i = ks->begin; i < ks->end; ++i) \
114 if (ks->buf[i] == delimiter) break; \
115 } else if (delimiter == KS_SEP_SPACE) { \
116 for (i = ks->begin; i < ks->end; ++i) \
117 if (isspace(ks->buf[i])) break; \
118 } else if (delimiter == KS_SEP_TAB) { \
119 for (i = ks->begin; i < ks->end; ++i) \
120 if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
121 } else i = 0; /* never come to here! */ \
122 if (str->m - str->l < i - ks->begin + 1) { \
123 str->m = str->l + (i - ks->begin) + 1; \
124 kroundup32(str->m); \
125 str->s = (char*)realloc(str->s, str->m); \
126 } \
127 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
128 str->l = str->l + (i - ks->begin); \
129 ks->begin = i + 1; \
130 if (i < ks->end) { \
131 if (dret) *dret = ks->buf[i]; \
132 break; \
133 } \
134 } \
135 if (str->l == 0) { \
136 str->m = 1; \
137 str->s = (char*)calloc(1, 1); \
138 } \
139 str->s[str->l] = '\0'; \
140 return str->l; \
141 }
142
143 #define KSTREAM_INIT(type_t, __read, __bufsize) \
144 __KS_TYPE(type_t) \
145 __KS_BASIC(type_t, __bufsize) \
146 __KS_GETC(__read, __bufsize) \
147 __KS_GETUNTIL(__read, __bufsize)
148
149 #define __KSEQ_BASIC(type_t) \
150 static inline kseq_t *kseq_init(type_t fd) \
151 { \
152 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
153 s->f = ks_init(fd); \
154 return s; \
155 } \
156 static inline void kseq_rewind(kseq_t *ks) \
157 { \
158 ks->last_char = 0; \
159 ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
160 } \
161 static inline void kseq_destroy(kseq_t *ks) \
162 { \
163 if (!ks) return; \
164 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
165 ks_destroy(ks->f); \
166 free(ks); \
167 }
168
169 /* Return value:
170 >=0 length of the sequence (normal)
171 -1 end-of-file
172 -2 truncated quality string
173 */
174 #define __KSEQ_READ \
175 static int kseq_read(kseq_t *seq) \
176 { \
177 int c; \
178 kstream_t *ks = seq->f; \
179 if (seq->last_char == 0) { /* then jump to the next header line */ \
180 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
181 if (c == -1) return -1; /* end of file */ \
182 seq->last_char = c; \
183 } /* the first header char has been read */ \
184 seq->comment.l = seq->seq.l = seq->qual.l = 0; \
185 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
186 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
187 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
188 if (isgraph(c)) { /* printable non-space character */ \
189 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
190 seq->seq.m = seq->seq.l + 2; \
191 kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
192 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
193 } \
194 seq->seq.s[seq->seq.l++] = (char)c; \
195 } \
196 } \
197 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
198 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
199 if (c != '+') return seq->seq.l; /* FASTA */ \
200 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
201 seq->qual.m = seq->seq.m; \
202 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
203 } \
204 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
205 if (c == -1) return -2; /* we should not stop here */ \
206 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
207 if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
208 seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
209 seq->last_char = 0; /* we have not come to the next header line */ \
210 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
211 return seq->seq.l; \
212 }
213
214 #define __KSEQ_TYPE(type_t) \
215 typedef struct { \
216 kstring_t name, comment, seq, qual; \
217 int last_char; \
218 kstream_t *f; \
219 } kseq_t;
220
221 #define KSEQ_INIT(type_t, __read) \
222 KSTREAM_INIT(type_t, __read, 4096) \
223 __KSEQ_TYPE(type_t) \
224 __KSEQ_BASIC(type_t) \
225 __KSEQ_READ
226
227 #endif