annotate pyPRADA_1.2/tools/samtools-0.1.16/bcftools/main.c @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 #include <sys/stat.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 #include <unistd.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 #include "bcf.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 #include "kseq.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 KSTREAM_INIT(gzFile, gzread, 0x10000)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 int bcfview(int argc, char *argv[]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 int bcf_main_index(int argc, char *argv[]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 #define BUF_SIZE 0x10000
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 int bcf_cat(int n, char * const *fn)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 bcf_t *out;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 uint8_t *buf;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 buf = malloc(BUF_SIZE);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 out = bcf_open("-", "w");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 for (i = 0; i < n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 bcf_t *in;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 bcf_hdr_t *h;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 off_t end;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 struct stat s;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 in = bcf_open(fn[i], "r");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 h = bcf_hdr_read(in);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 if (i == 0) bcf_hdr_write(out, h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 bcf_hdr_destroy(h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 #ifdef _USE_KNETFILE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 fstat(knet_fileno(in->fp->x.fpr), &s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 end = s.st_size - 28;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 while (knet_tell(in->fp->x.fpr) < end) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 int size = knet_tell(in->fp->x.fpr) + BUF_SIZE < end? BUF_SIZE : end - knet_tell(in->fp->x.fpr);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 knet_read(in->fp->x.fpr, buf, size);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 fwrite(buf, 1, size, out->fp->x.fpw);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 #else
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 abort(); // FIXME: not implemented
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 #endif
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 bcf_close(in);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 bcf_close(out);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 free(buf);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 int bcf_main_ldpair(int argc, char *argv[])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 bcf_t *fp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 bcf_hdr_t *h;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 bcf1_t *b0, *b1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 bcf_idx_t *idx;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 kstring_t str;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 void *str2id;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 gzFile fplist;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 kstream_t *ks;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 int dret, lineno = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 if (argc < 3) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 fplist = gzopen(argv[2], "rb");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 ks = ks_init(fplist);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 memset(&str, 0, sizeof(kstring_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 fp = bcf_open(argv[1], "rb");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 h = bcf_hdr_read(fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 str2id = bcf_build_refhash(h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 idx = bcf_idx_load(argv[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 if (idx == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 b0 = calloc(1, sizeof(bcf1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 b1 = calloc(1, sizeof(bcf1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 while (ks_getuntil(ks, '\n', &str, &dret) >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 char *p, *q;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 int k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 ++lineno;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 for (p = q = str.s, k = 0; *p; ++p) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 if (*p == ' ' || *p == '\t') {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 *p = '\0';
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 if (k == 0) tid0 = bcf_str2id(str2id, q);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 else if (k == 1) pos0 = atoi(q) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 else if (k == 3) pos1 = atoi(q) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 q = p + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 ++k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 if (k == 3) pos1 = atoi(q) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 uint64_t off;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 double r, f[4];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 off = bcf_idx_query(idx, tid0, pos0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 bgzf_seek(fp->fp, off, SEEK_SET);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 off = bcf_idx_query(idx, tid1, pos1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 bgzf_seek(fp->fp, off, SEEK_SET);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 r = bcf_pair_freq(b0, b1, f);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 r *= r;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 r, f[0], f[1], f[2], f[3]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 bcf_destroy(b0); bcf_destroy(b1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 bcf_idx_destroy(idx);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 bcf_str2id_destroy(str2id);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 bcf_hdr_destroy(h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 bcf_close(fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 free(str.s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 ks_destroy(ks);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 gzclose(fplist);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 int bcf_main_ld(int argc, char *argv[])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 bcf_t *fp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 bcf_hdr_t *h;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 bcf1_t **b, *b0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 int i, j, m, n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 double f[4];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 if (argc == 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 fprintf(stderr, "Usage: bcftools ld <in.bcf>\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 fp = bcf_open(argv[1], "rb");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 h = bcf_hdr_read(fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 // read the entire BCF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 m = n = 0; b = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 b0 = calloc(1, sizeof(bcf1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 while (bcf_read(fp, h, b0) >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 if (m == n) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 m = m? m<<1 : 16;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 b = realloc(b, sizeof(void*) * m);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 b[n] = calloc(1, sizeof(bcf1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 bcf_cpy(b[n++], b0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 bcf_destroy(b0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 // compute pair-wise r^2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 printf("%d\n", n); // the number of loci
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 for (i = 0; i < n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 for (j = 0; j < i; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 double r = bcf_pair_freq(b[i], b[j], f);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 printf("\t%.3f", r*r);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 printf("\t1.000\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 // free
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 for (i = 0; i < n; ++i) bcf_destroy(b[i]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 free(b);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 bcf_hdr_destroy(h);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 bcf_close(fp);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 int main(int argc, char *argv[])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 if (argc == 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 fprintf(stderr, "\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 fprintf(stderr, "Usage: bcftools <command> <arguments>\n\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 fprintf(stderr, " index index BCF\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 fprintf(stderr, " cat concatenate BCFs\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 fprintf(stderr, " ld compute all-pair r^2\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 fprintf(stderr, " ldpair compute r^2 between requested pairs\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 fprintf(stderr, "\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ...
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 fprintf(stderr, "[main] Unrecognized command.\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 }