Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/bcftools/main.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #include <string.h> | |
2 #include <stdlib.h> | |
3 #include <sys/stat.h> | |
4 #include <unistd.h> | |
5 #include "knetfile.h" | |
6 #include "bcf.h" | |
7 | |
8 #include "kseq.h" | |
9 KSTREAM_INIT(gzFile, gzread, 0x10000) | |
10 | |
11 int bcfview(int argc, char *argv[]); | |
12 int bcf_main_index(int argc, char *argv[]); | |
13 | |
14 #define BUF_SIZE 0x10000 | |
15 | |
16 int bcf_cat(int n, char * const *fn) | |
17 { | |
18 int i; | |
19 bcf_t *out; | |
20 uint8_t *buf; | |
21 buf = malloc(BUF_SIZE); | |
22 out = bcf_open("-", "w"); | |
23 for (i = 0; i < n; ++i) { | |
24 bcf_t *in; | |
25 bcf_hdr_t *h; | |
26 off_t end; | |
27 struct stat s; | |
28 in = bcf_open(fn[i], "r"); | |
29 h = bcf_hdr_read(in); | |
30 if (i == 0) bcf_hdr_write(out, h); | |
31 bcf_hdr_destroy(h); | |
32 #ifdef _USE_KNETFILE | |
33 fstat(knet_fileno((knetFile*)in->fp->fp), &s); | |
34 end = s.st_size - 28; | |
35 while (knet_tell((knetFile*)in->fp->fp) < end) { | |
36 int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp); | |
37 knet_read(in->fp->fp, buf, size); | |
38 fwrite(buf, 1, size, out->fp->fp); | |
39 } | |
40 #else | |
41 abort(); // FIXME: not implemented | |
42 #endif | |
43 bcf_close(in); | |
44 } | |
45 bcf_close(out); | |
46 free(buf); | |
47 return 0; | |
48 } | |
49 | |
50 extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]); | |
51 | |
52 int bcf_main_ldpair(int argc, char *argv[]) | |
53 { | |
54 bcf_t *fp; | |
55 bcf_hdr_t *h; | |
56 bcf1_t *b0, *b1; | |
57 bcf_idx_t *idx; | |
58 kstring_t str; | |
59 void *str2id; | |
60 gzFile fplist; | |
61 kstream_t *ks; | |
62 int dret, lineno = 0; | |
63 if (argc < 3) { | |
64 fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n"); | |
65 return 1; | |
66 } | |
67 fplist = gzopen(argv[2], "rb"); | |
68 ks = ks_init(fplist); | |
69 memset(&str, 0, sizeof(kstring_t)); | |
70 fp = bcf_open(argv[1], "rb"); | |
71 h = bcf_hdr_read(fp); | |
72 str2id = bcf_build_refhash(h); | |
73 idx = bcf_idx_load(argv[1]); | |
74 if (idx == 0) { | |
75 fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__); | |
76 return 1; | |
77 } | |
78 b0 = calloc(1, sizeof(bcf1_t)); | |
79 b1 = calloc(1, sizeof(bcf1_t)); | |
80 while (ks_getuntil(ks, '\n', &str, &dret) >= 0) { | |
81 char *p, *q; | |
82 int k; | |
83 int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1; | |
84 ++lineno; | |
85 for (p = q = str.s, k = 0; *p; ++p) { | |
86 if (*p == ' ' || *p == '\t') { | |
87 *p = '\0'; | |
88 if (k == 0) tid0 = bcf_str2id(str2id, q); | |
89 else if (k == 1) pos0 = atoi(q) - 1; | |
90 else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0; | |
91 else if (k == 3) pos1 = atoi(q) - 1; | |
92 q = p + 1; | |
93 ++k; | |
94 } | |
95 } | |
96 if (k == 3) pos1 = atoi(q) - 1; | |
97 if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) { | |
98 uint64_t off; | |
99 double r, f[4]; | |
100 off = bcf_idx_query(idx, tid0, pos0); | |
101 bgzf_seek(fp->fp, off, SEEK_SET); | |
102 while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0); | |
103 off = bcf_idx_query(idx, tid1, pos1); | |
104 bgzf_seek(fp->fp, off, SEEK_SET); | |
105 while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1); | |
106 r = bcf_pair_freq(b0, b1, f); | |
107 r *= r; | |
108 printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1, | |
109 r, f[0], f[1], f[2], f[3]); | |
110 } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno); | |
111 } | |
112 bcf_destroy(b0); bcf_destroy(b1); | |
113 bcf_idx_destroy(idx); | |
114 bcf_str2id_destroy(str2id); | |
115 bcf_hdr_destroy(h); | |
116 bcf_close(fp); | |
117 free(str.s); | |
118 ks_destroy(ks); | |
119 gzclose(fplist); | |
120 return 0; | |
121 } | |
122 | |
123 int bcf_main_ld(int argc, char *argv[]) | |
124 { | |
125 bcf_t *fp; | |
126 bcf_hdr_t *h; | |
127 bcf1_t **b, *b0; | |
128 int i, j, m, n; | |
129 double f[4]; | |
130 if (argc == 1) { | |
131 fprintf(stderr, "Usage: bcftools ld <in.bcf>\n"); | |
132 return 1; | |
133 } | |
134 fp = bcf_open(argv[1], "rb"); | |
135 h = bcf_hdr_read(fp); | |
136 // read the entire BCF | |
137 m = n = 0; b = 0; | |
138 b0 = calloc(1, sizeof(bcf1_t)); | |
139 while (bcf_read(fp, h, b0) >= 0) { | |
140 if (m == n) { | |
141 m = m? m<<1 : 16; | |
142 b = realloc(b, sizeof(void*) * m); | |
143 } | |
144 b[n] = calloc(1, sizeof(bcf1_t)); | |
145 bcf_cpy(b[n++], b0); | |
146 } | |
147 bcf_destroy(b0); | |
148 // compute pair-wise r^2 | |
149 printf("%d\n", n); // the number of loci | |
150 for (i = 0; i < n; ++i) { | |
151 printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1); | |
152 for (j = 0; j < i; ++j) { | |
153 double r = bcf_pair_freq(b[i], b[j], f); | |
154 printf("\t%.3f", r*r); | |
155 } | |
156 printf("\t1.000\n"); | |
157 } | |
158 // free | |
159 for (i = 0; i < n; ++i) bcf_destroy(b[i]); | |
160 free(b); | |
161 bcf_hdr_destroy(h); | |
162 bcf_close(fp); | |
163 return 0; | |
164 } | |
165 | |
166 int main(int argc, char *argv[]) | |
167 { | |
168 if (argc == 1) { | |
169 fprintf(stderr, "\n"); | |
170 fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n"); | |
171 fprintf(stderr, "Version: %s\n\n", BCF_VERSION); | |
172 fprintf(stderr, "Usage: bcftools <command> <arguments>\n\n"); | |
173 fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n"); | |
174 fprintf(stderr, " index index BCF\n"); | |
175 fprintf(stderr, " cat concatenate BCFs\n"); | |
176 fprintf(stderr, " ld compute all-pair r^2\n"); | |
177 fprintf(stderr, " ldpair compute r^2 between requested pairs\n"); | |
178 fprintf(stderr, "\n"); | |
179 return 1; | |
180 } | |
181 if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1); | |
182 else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1); | |
183 else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1); | |
184 else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1); | |
185 else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ... | |
186 else { | |
187 fprintf(stderr, "[main] Unrecognized command.\n"); | |
188 return 1; | |
189 } | |
190 return 0; | |
191 } |