annotate bwa-0.7.9a/bwtindex.c @ 0:ce5a8082bbb8 draft

Uploaded
author xilinxu
date Thu, 14 Aug 2014 02:16:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
1 /* The MIT License
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
2
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
3 Copyright (c) 2008 Genome Research Ltd (GRL).
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
4
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
6 a copy of this software and associated documentation files (the
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
11 the following conditions:
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
12
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
15
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
23 SOFTWARE.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
24 */
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
25
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
27
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
28 #include <stdio.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
29 #include <stdlib.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
30 #include <string.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
31 #include <unistd.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
32 #include <time.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
33 #include <zlib.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
34 #include "bntseq.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
35 #include "bwt.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
36 #include "utils.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
37
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
38 #ifdef _DIVBWT
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
39 #include "divsufsort.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
40 #endif
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
41
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
42 #ifdef USE_MALLOC_WRAPPERS
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
43 # include "malloc_wrap.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
44 #endif
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
45
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
46
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
47 int is_bwt(ubyte_t *T, int n);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
48
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
49 int64_t bwa_seq_len(const char *fn_pac)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
50 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
51 FILE *fp;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
52 int64_t pac_len;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
53 ubyte_t c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
54 fp = xopen(fn_pac, "rb");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
55 err_fseek(fp, -1, SEEK_END);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
56 pac_len = err_ftell(fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
57 err_fread_noeof(&c, 1, 1, fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
58 err_fclose(fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
59 return (pac_len - 1) * 4 + (int)c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
60 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
61
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
62 bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
63 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
64 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
65 ubyte_t *buf, *buf2;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
66 int i, pac_size;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
67 FILE *fp;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
68
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
69 // initialization
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
70 bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
71 bwt->seq_len = bwa_seq_len(fn_pac);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
72 bwt->bwt_size = (bwt->seq_len + 15) >> 4;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
73 fp = xopen(fn_pac, "rb");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
74
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
75 // prepare sequence
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
76 pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
77 buf2 = (ubyte_t*)calloc(pac_size, 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
78 err_fread_noeof(buf2, 1, pac_size, fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
79 err_fclose(fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
80 memset(bwt->L2, 0, 5 * 4);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
81 buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
82 for (i = 0; i < bwt->seq_len; ++i) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
83 buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
84 ++bwt->L2[1+buf[i]];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
85 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
86 for (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
87 free(buf2);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
88
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
89 // Burrows-Wheeler Transform
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
90 if (use_is) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
91 bwt->primary = is_bwt(buf, bwt->seq_len);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
92 } else {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
93 #ifdef _DIVBWT
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
94 bwt->primary = divbwt(buf, buf, 0, bwt->seq_len);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
95 #else
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
96 err_fatal_simple("libdivsufsort is not compiled in.");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
97 #endif
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
98 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
99 bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
100 for (i = 0; i < bwt->seq_len; ++i)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
101 bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
102 free(buf);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
103 return bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
104 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
105
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
106 int bwa_pac2bwt(int argc, char *argv[]) // the "pac2bwt" command; IMPORTANT: bwt generated at this step CANNOT be used with BWA. bwtupdate is required!
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
107 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
108 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
109 int c, use_is = 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
110 while ((c = getopt(argc, argv, "d")) >= 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
111 switch (c) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
112 case 'd': use_is = 0; break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
113 default: return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
114 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
115 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
116 if (optind + 2 > argc) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
117 fprintf(stderr, "Usage: bwa pac2bwt [-d] <in.pac> <out.bwt>\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
118 return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
119 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
120 bwt = bwt_pac2bwt(argv[optind], use_is);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
121 bwt_dump_bwt(argv[optind+1], bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
122 bwt_destroy(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
123 return 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
124 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
125
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
126 #define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
127
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
128 void bwt_bwtupdate_core(bwt_t *bwt)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
129 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
130 bwtint_t i, k, c[4], n_occ;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
131 uint32_t *buf;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
132
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
133 n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
134 bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
135 buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
136 c[0] = c[1] = c[2] = c[3] = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
137 for (i = k = 0; i < bwt->seq_len; ++i) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
138 if (i % OCC_INTERVAL == 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
139 memcpy(buf + k, c, sizeof(bwtint_t) * 4);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
140 k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
141 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
142 if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
143 ++c[bwt_B00(bwt, i)];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
144 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
145 // the last element
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
146 memcpy(buf + k, c, sizeof(bwtint_t) * 4);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
147 xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
148 // update bwt
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
149 free(bwt->bwt); bwt->bwt = buf;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
150 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
151
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
152 int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
153 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
154 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
155 if (argc < 2) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
156 fprintf(stderr, "Usage: bwa bwtupdate <the.bwt>\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
157 return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
158 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
159 bwt = bwt_restore_bwt(argv[1]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
160 bwt_bwtupdate_core(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
161 bwt_dump_bwt(argv[1], bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
162 bwt_destroy(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
163 return 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
164 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
165
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
166 int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
167 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
168 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
169 int c, sa_intv = 32;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
170 while ((c = getopt(argc, argv, "i:")) >= 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
171 switch (c) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
172 case 'i': sa_intv = atoi(optarg); break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
173 default: return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
174 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
175 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
176 if (optind + 2 > argc) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
177 fprintf(stderr, "Usage: bwa bwt2sa [-i %d] <in.bwt> <out.sa>\n", sa_intv);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
178 return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
179 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
180 bwt = bwt_restore_bwt(argv[optind]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
181 bwt_cal_sa(bwt, sa_intv);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
182 bwt_dump_sa(argv[optind+1], bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
183 bwt_destroy(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
184 return 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
185 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
186
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
187 int bwa_index(int argc, char *argv[]) // the "index" command
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
188 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
189 extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
190
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
191 char *prefix = 0, *str, *str2, *str3;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
192 int c, algo_type = 0, is_64 = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
193 clock_t t;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
194 int64_t l_pac;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
195
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
196 while ((c = getopt(argc, argv, "6a:p:")) >= 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
197 switch (c) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
198 case 'a': // if -a is not set, algo_type will be determined later
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
199 if (strcmp(optarg, "div") == 0) algo_type = 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
200 else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
201 else if (strcmp(optarg, "is") == 0) algo_type = 3;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
202 else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
203 break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
204 case 'p': prefix = strdup(optarg); break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
205 case '6': is_64 = 1; break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
206 default: return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
207 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
208 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
209
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
210 if (optind + 1 > argc) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
211 fprintf(stderr, "\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
212 fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
213 fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
214 fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
215 fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
216 fprintf(stderr, "\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
217 fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
218 fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
219 fprintf(stderr, " according to the length of the genome.\n\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
220 return 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
221 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
222 if (prefix == 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
223 prefix = malloc(strlen(argv[optind]) + 4);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
224 strcpy(prefix, argv[optind]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
225 if (is_64) strcat(prefix, ".64");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
226 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
227 str = (char*)calloc(strlen(prefix) + 10, 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
228 str2 = (char*)calloc(strlen(prefix) + 10, 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
229 str3 = (char*)calloc(strlen(prefix) + 10, 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
230
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
231 { // nucleotide indexing
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
232 gzFile fp = xzopen(argv[optind], "r");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
233 t = clock();
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
234 fprintf(stderr, "[bwa_index] Pack FASTA... ");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
235 l_pac = bns_fasta2bntseq(fp, prefix, 0);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
236 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
237 err_gzclose(fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
238 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
239 if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
240 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
241 strcpy(str, prefix); strcat(str, ".pac");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
242 strcpy(str2, prefix); strcat(str2, ".bwt");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
243 t = clock();
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
244 fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
245 if (algo_type == 2) bwt_bwtgen(str, str2);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
246 else if (algo_type == 1 || algo_type == 3) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
247 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
248 bwt = bwt_pac2bwt(str, algo_type == 3);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
249 bwt_dump_bwt(str2, bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
250 bwt_destroy(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
251 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
252 fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
253 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
254 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
255 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
256 strcpy(str, prefix); strcat(str, ".bwt");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
257 t = clock();
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
258 fprintf(stderr, "[bwa_index] Update BWT... ");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
259 bwt = bwt_restore_bwt(str);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
260 bwt_bwtupdate_core(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
261 bwt_dump_bwt(str, bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
262 bwt_destroy(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
263 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
264 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
265 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
266 gzFile fp = xzopen(argv[optind], "r");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
267 t = clock();
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
268 fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
269 l_pac = bns_fasta2bntseq(fp, prefix, 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
270 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
271 err_gzclose(fp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
272 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
273 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
274 bwt_t *bwt;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
275 strcpy(str, prefix); strcat(str, ".bwt");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
276 strcpy(str3, prefix); strcat(str3, ".sa");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
277 t = clock();
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
278 fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
279 bwt = bwt_restore_bwt(str);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
280 bwt_cal_sa(bwt, 32);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
281 bwt_dump_sa(str3, bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
282 bwt_destroy(bwt);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
283 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
284 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
285 free(str3); free(str2); free(str); free(prefix);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
286 return 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
287 }