annotate pyPRADA_1.2/tools/bwa-0.5.7-mh/bwtindex.c @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 /* The MIT License
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 Copyright (c) 2008 Genome Research Ltd (GRL).
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 a copy of this software and associated documentation files (the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 the following conditions:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 SOFTWARE.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 #include <stdio.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 #include <unistd.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 #include <time.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 #include <zlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 #include "bntseq.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 #include "bwt.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 #include "main.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 #include "utils.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 void bwa_pac_rev_core(const char *fn, const char *fn_rev);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 int bwa_index(int argc, char *argv[])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 char *prefix = 0, *str, *str2, *str3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 int c, algo_type = 3, is_color = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 clock_t t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 while ((c = getopt(argc, argv, "ca:p:")) >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 switch (c) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 case 'a':
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 if (strcmp(optarg, "div") == 0) algo_type = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 else if (strcmp(optarg, "is") == 0) algo_type = 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 case 'p': prefix = strdup(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 case 'c': is_color = 1; break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 default: return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 if (optind + 1 > argc) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 fprintf(stderr, "\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 fprintf(stderr, "Usage: bwa index [-a bwtsw|div|is] [-c] <in.fasta>\n\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [is]\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 fprintf(stderr, " -c build color-space index\n\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 fprintf(stderr, " according to the length of the genome.\n\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 if (prefix == 0) prefix = strdup(argv[optind]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 str = (char*)calloc(strlen(prefix) + 10, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 str2 = (char*)calloc(strlen(prefix) + 10, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 str3 = (char*)calloc(strlen(prefix) + 10, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 if (is_color == 0) { // nucleotide indexing
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 gzFile fp = xzopen(argv[optind], "r");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 fprintf(stderr, "[bwa_index] Pack FASTA... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 bns_fasta2bntseq(fp, prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 gzclose(fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 } else { // color indexing
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 gzFile fp = xzopen(argv[optind], "r");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 strcat(strcpy(str, prefix), ".nt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 bns_fasta2bntseq(fp, str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 gzclose(fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 char *tmp_argv[3];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 bwa_pac2cspac(3, tmp_argv);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 strcpy(str, prefix); strcat(str, ".pac");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 strcpy(str2, prefix); strcat(str2, ".rpac");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 fprintf(stderr, "[bwa_index] Reverse the packed sequence... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 bwa_pac_rev_core(str, str2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 strcpy(str, prefix); strcat(str, ".pac");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 strcpy(str2, prefix); strcat(str2, ".bwt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 if (algo_type == 2) bwt_bwtgen(str, str2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 else if (algo_type == 1 || algo_type == 3) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 bwt = bwt_pac2bwt(str, algo_type == 3);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 bwt_dump_bwt(str2, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 strcpy(str, prefix); strcat(str, ".rpac");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 strcpy(str2, prefix); strcat(str2, ".rbwt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 fprintf(stderr, "[bwa_index] Construct BWT for the reverse packed sequence...\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 if (algo_type == 2) bwt_bwtgen(str, str2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 else if (algo_type == 1 || algo_type == 3) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 bwt = bwt_pac2bwt(str, algo_type == 3);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 bwt_dump_bwt(str2, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 strcpy(str, prefix); strcat(str, ".bwt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 fprintf(stderr, "[bwa_index] Update BWT... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 bwt = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 bwt_bwtupdate_core(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 bwt_dump_bwt(str, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 strcpy(str, prefix); strcat(str, ".rbwt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 fprintf(stderr, "[bwa_index] Update reverse BWT... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 bwt = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 bwt_bwtupdate_core(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 bwt_dump_bwt(str, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 strcpy(str, prefix); strcat(str, ".bwt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 strcpy(str3, prefix); strcat(str3, ".sa");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 bwt = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 bwt_cal_sa(bwt, 32);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 bwt_dump_sa(str3, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 strcpy(str, prefix); strcat(str, ".rbwt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 strcpy(str3, prefix); strcat(str3, ".rsa");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 fprintf(stderr, "[bwa_index] Construct SA from reverse BWT and Occ... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 bwt = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 bwt_cal_sa(bwt, 32);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 bwt_dump_sa(str3, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 free(str3); free(str2); free(str); free(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 }