annotate pyPRADA_1.2/tools/bwa-0.5.7-mh/is.c @ 3:f17965495ec9 draft default tip

Uploaded
author siyuan
date Tue, 11 Mar 2014 12:14:01 -0400
parents acc2ca1a3ba4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 /*
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 * sais.c for sais-lite
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 * Copyright (c) 2008 Yuta Mori All Rights Reserved.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 * Permission is hereby granted, free of charge, to any person
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 * obtaining a copy of this software and associated documentation
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 * files (the "Software"), to deal in the Software without
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 * restriction, including without limitation the rights to use,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 * copies of the Software, and to permit persons to whom the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 * Software is furnished to do so, subject to the following
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12 * conditions:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 * The above copyright notice and this permission notice shall be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 * included in all copies or substantial portions of the Software.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 *
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 * OTHER DEALINGS IN THE SOFTWARE.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 typedef unsigned char ubyte_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 #define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 /* find the start or end of each bucket */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 static void getCounts(const unsigned char *T, int *C, int n, int k, int cs)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 for (i = 0; i < k; ++i) C[i] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 for (i = 0; i < n; ++i) ++C[chr(i)];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 static void getBuckets(const int *C, int *B, int k, int end)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 int i, sum = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 if (end) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 for (i = 0; i < k; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 sum += C[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 B[i] = sum;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 for (i = 0; i < k; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 sum += C[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 B[i] = sum - C[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 /* compute SA */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 static void induceSA(const unsigned char *T, int *SA, int *C, int *B, int n, int k, int cs)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 int *b, i, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 int c0, c1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 /* compute SAl */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 if (C == B) getCounts(T, C, n, k, cs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 getBuckets(C, B, k, 0); /* find starts of buckets */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 j = n - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 b = SA + B[c1 = chr(j)];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 for (i = 0; i < n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 j = SA[i], SA[i] = ~j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 if (0 < j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 --j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 if ((c0 = chr(j)) != c1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 B[c1] = b - SA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 b = SA + B[c1 = c0];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 /* compute SAs */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 if (C == B) getCounts(T, C, n, k, cs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 getBuckets(C, B, k, 1); /* find ends of buckets */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 for (i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 if (0 < (j = SA[i])) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 --j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 if ((c0 = chr(j)) != c1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 B[c1] = b - SA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 b = SA + B[c1 = c0];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 *--b = ((j == 0) || (chr(j - 1) > c1)) ? ~j : j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 } else SA[i] = ~j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 /*
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 * find the suffix array SA of T[0..n-1] in {0..k-1}^n use a working
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 * space (excluding T and SA) of at most 2n+O(1) for a constant alphabet
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 static int sais_main(const unsigned char *T, int *SA, int fs, int n, int k, int cs)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 int *C, *B, *RA;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 int i, j, c, m, p, q, plen, qlen, name;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 int c0, c1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 int diff;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 /* stage 1: reduce the problem by at least 1/2 sort all the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 * S-substrings */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 if (k <= fs) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 C = SA + n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 B = (k <= (fs - k)) ? C + k : C;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 } else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 getCounts(T, C, n, k, cs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 getBuckets(C, B, k, 1); /* find ends of buckets */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 for (i = 0; i < n; ++i) SA[i] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 for (i = n - 2, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 if ((c0 = chr(i)) < (c1 + c)) c = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 else if (c != 0) SA[--B[c1]] = i + 1, c = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 induceSA(T, SA, C, B, n, k, cs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 if (fs < k) free(C);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 /* compact all the sorted substrings into the first m items of SA
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 * 2*m must be not larger than n (proveable) */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 for (i = 0, m = 0; i < n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 p = SA[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 if ((0 < p) && (chr(p - 1) > (c0 = chr(p)))) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 for (j = p + 1; (j < n) && (c0 == (c1 = chr(j))); ++j);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 if ((j < n) && (c0 < c1)) SA[m++] = p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 for (i = m; i < n; ++i) SA[i] = 0; /* init the name array buffer */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 /* store the length of all substrings */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 for (i = n - 2, j = n, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 if ((c0 = chr(i)) < (c1 + c)) c = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 else if (c != 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 SA[m + ((i + 1) >> 1)] = j - i - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 j = i + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 c = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 /* find the lexicographic names of all substrings */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 p = SA[i], plen = SA[m + (p >> 1)], diff = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 if (plen == qlen) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 for (j = 0; (j < plen) && (chr(p + j) == chr(q + j)); j++);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 if (j == plen) diff = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 if (diff != 0) ++name, q = p, qlen = plen;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 SA[m + (p >> 1)] = name;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 /* stage 2: solve the reduced problem recurse if names are not yet
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 * unique */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 if (name < m) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 RA = SA + n + fs - m;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 for (i = n - 1, j = m - 1; m <= i; --i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 if (SA[i] != 0) RA[j--] = SA[i] - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 if (sais_main((unsigned char *) RA, SA, fs + n - m * 2, m, name, sizeof(int)) != 0) return -2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 for (i = n - 2, j = m - 1, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 if ((c0 = chr(i)) < (c1 + c)) c = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 else if (c != 0) RA[j--] = i + 1, c = 0; /* get p1 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 for (i = 0; i < m; ++i) SA[i] = RA[SA[i]]; /* get index */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 /* stage 3: induce the result for the original problem */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 if (k <= fs) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 C = SA + n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 B = (k <= (fs - k)) ? C + k : C;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 } else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 /* put all left-most S characters into their buckets */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 getCounts(T, C, n, k, cs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 getBuckets(C, B, k, 1); /* find ends of buckets */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 for (i = m; i < n; ++i) SA[i] = 0; /* init SA[m..n-1] */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 for (i = m - 1; 0 <= i; --i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 j = SA[i], SA[i] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 SA[--B[chr(j)]] = j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 induceSA(T, SA, C, B, n, k, cs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 if (fs < k) free(C);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 /**
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 * Constructs the suffix array of a given string.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 * @param T[0..n-1] The input string.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 * @param SA[0..n] The output array of suffixes.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 * @param n The length of the given string.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 * @return 0 if no error occurred
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 int is_sa(const ubyte_t *T, int *SA, int n)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 if ((T == NULL) || (SA == NULL) || (n < 0)) return -1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 SA[0] = n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 if (n <= 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 if (n == 1) SA[1] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 return sais_main(T, SA+1, 0, n, 256, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 /**
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199 * Constructs the burrows-wheeler transformed string of a given string.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 * @param T[0..n-1] The input string.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 * @param n The length of the given string.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 * @return The primary index if no error occurred, -1 or -2 otherwise.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 int is_bwt(ubyte_t *T, int n)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 int *SA, i, primary = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 SA = (int*)calloc(n+1, sizeof(int));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 is_sa(T, SA, n);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210 for (i = 0; i <= n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 if (SA[i] == 0) primary = i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 else SA[i] = T[SA[i] - 1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 for (i = 0; i < primary; ++i) T[i] = SA[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 for (; i < n; ++i) T[i] = SA[i + 1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 free(SA);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 return primary;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 }