comparison ezBAMQC/src/htslib/kstring.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfa3745e5fd8
1 /* The MIT License
2
3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdint.h>
31 #include "htslib/kstring.h"
32
33 int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
34 {
35 va_list args;
36 int l;
37 va_copy(args, ap);
38 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
39 va_end(args);
40 if (l + 1 > s->m - s->l) {
41 s->m = s->l + l + 2;
42 kroundup32(s->m);
43 s->s = (char*)realloc(s->s, s->m);
44 va_copy(args, ap);
45 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args);
46 va_end(args);
47 }
48 s->l += l;
49 return l;
50 }
51
52 int ksprintf(kstring_t *s, const char *fmt, ...)
53 {
54 va_list ap;
55 int l;
56 va_start(ap, fmt);
57 l = kvsprintf(s, fmt, ap);
58 va_end(ap);
59 return l;
60 }
61
62 char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux)
63 {
64 const char *p, *start;
65 if (sep) { // set up the table
66 if (str == 0 && (aux->tab[0]&1)) return 0; // no need to set up if we have finished
67 aux->finished = 0;
68 if (sep[1]) {
69 aux->sep = -1;
70 aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
71 for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
72 } else aux->sep = sep[0];
73 }
74 if (aux->finished) return 0;
75 else if (str) aux->p = str - 1, aux->finished = 0;
76 if (aux->sep < 0) {
77 for (p = start = aux->p + 1; *p; ++p)
78 if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
79 } else {
80 for (p = start = aux->p + 1; *p; ++p)
81 if (*p == aux->sep) break;
82 }
83 aux->p = p; // end of token
84 if (*p == 0) aux->finished = 1; // no more tokens
85 return (char*)start;
86 }
87
88 // s MUST BE a null terminated string; l = strlen(s)
89 int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
90 {
91 int i, n, max, last_char, last_start, *offsets, l;
92 n = 0; max = *_max; offsets = *_offsets;
93 l = strlen(s);
94
95 #define __ksplit_aux do { \
96 if (_offsets) { \
97 s[i] = 0; \
98 if (n == max) { \
99 int *tmp; \
100 max = max? max<<1 : 2; \
101 if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \
102 offsets = tmp; \
103 } else { \
104 free(offsets); \
105 *_offsets = NULL; \
106 return 0; \
107 } \
108 } \
109 offsets[n++] = last_start; \
110 } else ++n; \
111 } while (0)
112
113 for (i = 0, last_char = last_start = 0; i <= l; ++i) {
114 if (delimiter == 0) {
115 if (isspace(s[i]) || s[i] == 0) {
116 if (isgraph(last_char)) __ksplit_aux; // the end of a field
117 } else {
118 if (isspace(last_char) || last_char == 0) last_start = i;
119 }
120 } else {
121 if (s[i] == delimiter || s[i] == 0) {
122 if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
123 } else {
124 if (last_char == delimiter || last_char == 0) last_start = i;
125 }
126 }
127 last_char = s[i];
128 }
129 *_max = max; *_offsets = offsets;
130 return n;
131 }
132
133 /**********************
134 * Boyer-Moore search *
135 **********************/
136
137 typedef unsigned char ubyte_t;
138
139 // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
140 static int *ksBM_prep(const ubyte_t *pat, int m)
141 {
142 int i, *suff, *prep, *bmGs, *bmBc;
143 prep = (int*)calloc(m + 256, sizeof(int));
144 bmGs = prep; bmBc = prep + m;
145 { // preBmBc()
146 for (i = 0; i < 256; ++i) bmBc[i] = m;
147 for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
148 }
149 suff = (int*)calloc(m, sizeof(int));
150 { // suffixes()
151 int f = 0, g;
152 suff[m - 1] = m;
153 g = m - 1;
154 for (i = m - 2; i >= 0; --i) {
155 if (i > g && suff[i + m - 1 - f] < i - g)
156 suff[i] = suff[i + m - 1 - f];
157 else {
158 if (i < g) g = i;
159 f = i;
160 while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
161 suff[i] = f - g;
162 }
163 }
164 }
165 { // preBmGs()
166 int j = 0;
167 for (i = 0; i < m; ++i) bmGs[i] = m;
168 for (i = m - 1; i >= 0; --i)
169 if (suff[i] == i + 1)
170 for (; j < m - 1 - i; ++j)
171 if (bmGs[j] == m)
172 bmGs[j] = m - 1 - i;
173 for (i = 0; i <= m - 2; ++i)
174 bmGs[m - 1 - suff[i]] = m - 1 - i;
175 }
176 free(suff);
177 return prep;
178 }
179
180 void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
181 {
182 int i, j, *prep = 0, *bmGs, *bmBc;
183 const ubyte_t *str, *pat;
184 str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
185 prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep;
186 if (_prep && *_prep == 0) *_prep = prep;
187 bmGs = prep; bmBc = prep + m;
188 j = 0;
189 while (j <= n - m) {
190 for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
191 if (i >= 0) {
192 int max = bmBc[str[i+j]] - m + 1 + i;
193 if (max < bmGs[i]) max = bmGs[i];
194 j += max;
195 } else return (void*)(str + j);
196 }
197 if (_prep == 0) free(prep);
198 return 0;
199 }
200
201 char *kstrstr(const char *str, const char *pat, int **_prep)
202 {
203 return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
204 }
205
206 char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
207 {
208 return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
209 }
210
211 /***********************
212 * The main() function *
213 ***********************/
214
215 #ifdef KSTRING_MAIN
216 #include <stdio.h>
217 int main()
218 {
219 kstring_t *s;
220 int *fields, n, i;
221 ks_tokaux_t aux;
222 char *p;
223 s = (kstring_t*)calloc(1, sizeof(kstring_t));
224 // test ksprintf()
225 ksprintf(s, " abcdefg: %d ", 100);
226 printf("'%s'\n", s->s);
227 // test ksplit()
228 fields = ksplit(s, 0, &n);
229 for (i = 0; i < n; ++i)
230 printf("field[%d] = '%s'\n", i, s->s + fields[i]);
231 // test kstrtok()
232 s->l = 0;
233 for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
234 kputsn(p, aux.p - p, s);
235 kputc('\n', s);
236 }
237 printf("%s", s->s);
238 // free
239 free(s->s); free(s); free(fields);
240
241 {
242 static char *str = "abcdefgcdgcagtcakcdcd";
243 static char *pat = "cd";
244 char *ret, *s = str;
245 int *prep = 0;
246 while ((ret = kstrstr(s, pat, &prep)) != 0) {
247 printf("match: %s\n", ret);
248 s = ret + prep[0];
249 }
250 free(prep);
251 }
252 return 0;
253 }
254 #endif