0
|
1 #include <string.h>
|
|
2 #include <stdlib.h>
|
|
3 #include <sys/stat.h>
|
|
4 #include <unistd.h>
|
|
5 #include "knetfile.h"
|
|
6 #include "bcf.h"
|
|
7
|
|
8 #include "kseq.h"
|
|
9 KSTREAM_INIT(gzFile, gzread, 0x10000)
|
|
10
|
|
11 int bcfview(int argc, char *argv[]);
|
|
12 int bcf_main_index(int argc, char *argv[]);
|
|
13
|
|
14 #define BUF_SIZE 0x10000
|
|
15
|
|
16 int bcf_cat(int n, char * const *fn)
|
|
17 {
|
|
18 int i;
|
|
19 bcf_t *out;
|
|
20 uint8_t *buf;
|
|
21 buf = malloc(BUF_SIZE);
|
|
22 out = bcf_open("-", "w");
|
|
23 for (i = 0; i < n; ++i) {
|
|
24 bcf_t *in;
|
|
25 bcf_hdr_t *h;
|
|
26 off_t end;
|
|
27 struct stat s;
|
|
28 in = bcf_open(fn[i], "r");
|
|
29 h = bcf_hdr_read(in);
|
|
30 if (i == 0) bcf_hdr_write(out, h);
|
|
31 bcf_hdr_destroy(h);
|
|
32 #ifdef _USE_KNETFILE
|
|
33 fstat(knet_fileno((knetFile*)in->fp->fp), &s);
|
|
34 end = s.st_size - 28;
|
|
35 while (knet_tell((knetFile*)in->fp->fp) < end) {
|
|
36 int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp);
|
|
37 knet_read(in->fp->fp, buf, size);
|
|
38 fwrite(buf, 1, size, out->fp->fp);
|
|
39 }
|
|
40 #else
|
|
41 abort(); // FIXME: not implemented
|
|
42 #endif
|
|
43 bcf_close(in);
|
|
44 }
|
|
45 bcf_close(out);
|
|
46 free(buf);
|
|
47 return 0;
|
|
48 }
|
|
49
|
|
50 extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
|
|
51
|
|
52 int bcf_main_ldpair(int argc, char *argv[])
|
|
53 {
|
|
54 bcf_t *fp;
|
|
55 bcf_hdr_t *h;
|
|
56 bcf1_t *b0, *b1;
|
|
57 bcf_idx_t *idx;
|
|
58 kstring_t str;
|
|
59 void *str2id;
|
|
60 gzFile fplist;
|
|
61 kstream_t *ks;
|
|
62 int dret, lineno = 0;
|
|
63 if (argc < 3) {
|
|
64 fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n");
|
|
65 return 1;
|
|
66 }
|
|
67 fplist = gzopen(argv[2], "rb");
|
|
68 ks = ks_init(fplist);
|
|
69 memset(&str, 0, sizeof(kstring_t));
|
|
70 fp = bcf_open(argv[1], "rb");
|
|
71 h = bcf_hdr_read(fp);
|
|
72 str2id = bcf_build_refhash(h);
|
|
73 idx = bcf_idx_load(argv[1]);
|
|
74 if (idx == 0) {
|
|
75 fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__);
|
|
76 return 1;
|
|
77 }
|
|
78 b0 = calloc(1, sizeof(bcf1_t));
|
|
79 b1 = calloc(1, sizeof(bcf1_t));
|
|
80 while (ks_getuntil(ks, '\n', &str, &dret) >= 0) {
|
|
81 char *p, *q;
|
|
82 int k;
|
|
83 int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1;
|
|
84 ++lineno;
|
|
85 for (p = q = str.s, k = 0; *p; ++p) {
|
|
86 if (*p == ' ' || *p == '\t') {
|
|
87 *p = '\0';
|
|
88 if (k == 0) tid0 = bcf_str2id(str2id, q);
|
|
89 else if (k == 1) pos0 = atoi(q) - 1;
|
|
90 else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0;
|
|
91 else if (k == 3) pos1 = atoi(q) - 1;
|
|
92 q = p + 1;
|
|
93 ++k;
|
|
94 }
|
|
95 }
|
|
96 if (k == 3) pos1 = atoi(q) - 1;
|
|
97 if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) {
|
|
98 uint64_t off;
|
|
99 double r, f[4];
|
|
100 off = bcf_idx_query(idx, tid0, pos0);
|
|
101 bgzf_seek(fp->fp, off, SEEK_SET);
|
|
102 while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0);
|
|
103 off = bcf_idx_query(idx, tid1, pos1);
|
|
104 bgzf_seek(fp->fp, off, SEEK_SET);
|
|
105 while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1);
|
|
106 r = bcf_pair_freq(b0, b1, f);
|
|
107 r *= r;
|
|
108 printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1,
|
|
109 r, f[0], f[1], f[2], f[3]);
|
|
110 } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno);
|
|
111 }
|
|
112 bcf_destroy(b0); bcf_destroy(b1);
|
|
113 bcf_idx_destroy(idx);
|
|
114 bcf_str2id_destroy(str2id);
|
|
115 bcf_hdr_destroy(h);
|
|
116 bcf_close(fp);
|
|
117 free(str.s);
|
|
118 ks_destroy(ks);
|
|
119 gzclose(fplist);
|
|
120 return 0;
|
|
121 }
|
|
122
|
|
123 int bcf_main_ld(int argc, char *argv[])
|
|
124 {
|
|
125 bcf_t *fp;
|
|
126 bcf_hdr_t *h;
|
|
127 bcf1_t **b, *b0;
|
|
128 int i, j, m, n;
|
|
129 double f[4];
|
|
130 if (argc == 1) {
|
|
131 fprintf(stderr, "Usage: bcftools ld <in.bcf>\n");
|
|
132 return 1;
|
|
133 }
|
|
134 fp = bcf_open(argv[1], "rb");
|
|
135 h = bcf_hdr_read(fp);
|
|
136 // read the entire BCF
|
|
137 m = n = 0; b = 0;
|
|
138 b0 = calloc(1, sizeof(bcf1_t));
|
|
139 while (bcf_read(fp, h, b0) >= 0) {
|
|
140 if (m == n) {
|
|
141 m = m? m<<1 : 16;
|
|
142 b = realloc(b, sizeof(void*) * m);
|
|
143 }
|
|
144 b[n] = calloc(1, sizeof(bcf1_t));
|
|
145 bcf_cpy(b[n++], b0);
|
|
146 }
|
|
147 bcf_destroy(b0);
|
|
148 // compute pair-wise r^2
|
|
149 printf("%d\n", n); // the number of loci
|
|
150 for (i = 0; i < n; ++i) {
|
|
151 printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
|
|
152 for (j = 0; j < i; ++j) {
|
|
153 double r = bcf_pair_freq(b[i], b[j], f);
|
|
154 printf("\t%.3f", r*r);
|
|
155 }
|
|
156 printf("\t1.000\n");
|
|
157 }
|
|
158 // free
|
|
159 for (i = 0; i < n; ++i) bcf_destroy(b[i]);
|
|
160 free(b);
|
|
161 bcf_hdr_destroy(h);
|
|
162 bcf_close(fp);
|
|
163 return 0;
|
|
164 }
|
|
165
|
|
166 int main(int argc, char *argv[])
|
|
167 {
|
|
168 if (argc == 1) {
|
|
169 fprintf(stderr, "\n");
|
|
170 fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n");
|
|
171 fprintf(stderr, "Version: %s\n\n", BCF_VERSION);
|
|
172 fprintf(stderr, "Usage: bcftools <command> <arguments>\n\n");
|
|
173 fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n");
|
|
174 fprintf(stderr, " index index BCF\n");
|
|
175 fprintf(stderr, " cat concatenate BCFs\n");
|
|
176 fprintf(stderr, " ld compute all-pair r^2\n");
|
|
177 fprintf(stderr, " ldpair compute r^2 between requested pairs\n");
|
|
178 fprintf(stderr, "\n");
|
|
179 return 1;
|
|
180 }
|
|
181 if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1);
|
|
182 else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
|
|
183 else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1);
|
|
184 else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1);
|
|
185 else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ...
|
|
186 else {
|
|
187 fprintf(stderr, "[main] Unrecognized command.\n");
|
|
188 return 1;
|
|
189 }
|
|
190 return 0;
|
|
191 }
|