| 0 | 1 #include <string.h> | 
|  | 2 #include <stdlib.h> | 
|  | 3 #include <sys/stat.h> | 
|  | 4 #include <unistd.h> | 
|  | 5 #include "knetfile.h" | 
|  | 6 #include "bcf.h" | 
|  | 7 | 
|  | 8 #include "kseq.h" | 
|  | 9 KSTREAM_INIT(gzFile, gzread, 0x10000) | 
|  | 10 | 
|  | 11 int bcfview(int argc, char *argv[]); | 
|  | 12 int bcf_main_index(int argc, char *argv[]); | 
|  | 13 | 
|  | 14 #define BUF_SIZE 0x10000 | 
|  | 15 | 
|  | 16 int bcf_cat(int n, char * const *fn) | 
|  | 17 { | 
|  | 18 	int i; | 
|  | 19 	bcf_t *out; | 
|  | 20 	uint8_t *buf; | 
|  | 21 	buf = malloc(BUF_SIZE); | 
|  | 22 	out = bcf_open("-", "w"); | 
|  | 23 	for (i = 0; i < n; ++i) { | 
|  | 24 		bcf_t *in; | 
|  | 25 		bcf_hdr_t *h; | 
|  | 26 		off_t end; | 
|  | 27 		struct stat s; | 
|  | 28 		in = bcf_open(fn[i], "r"); | 
|  | 29 		h = bcf_hdr_read(in); | 
|  | 30 		if (i == 0) bcf_hdr_write(out, h); | 
|  | 31 		bcf_hdr_destroy(h); | 
|  | 32 #ifdef _USE_KNETFILE | 
|  | 33 		fstat(knet_fileno((knetFile*)in->fp->fp), &s); | 
|  | 34 		end = s.st_size - 28; | 
|  | 35 		while (knet_tell((knetFile*)in->fp->fp) < end) { | 
|  | 36 			int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp); | 
|  | 37 			knet_read(in->fp->fp, buf, size); | 
|  | 38 			fwrite(buf, 1, size, out->fp->fp); | 
|  | 39 		} | 
|  | 40 #else | 
|  | 41 		abort(); // FIXME: not implemented | 
|  | 42 #endif | 
|  | 43 		bcf_close(in); | 
|  | 44 	} | 
|  | 45 	bcf_close(out); | 
|  | 46 	free(buf); | 
|  | 47 	return 0; | 
|  | 48 } | 
|  | 49 | 
|  | 50 extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]); | 
|  | 51 | 
|  | 52 int bcf_main_ldpair(int argc, char *argv[]) | 
|  | 53 { | 
|  | 54 	bcf_t *fp; | 
|  | 55 	bcf_hdr_t *h; | 
|  | 56 	bcf1_t *b0, *b1; | 
|  | 57 	bcf_idx_t *idx; | 
|  | 58 	kstring_t str; | 
|  | 59 	void *str2id; | 
|  | 60 	gzFile fplist; | 
|  | 61 	kstream_t *ks; | 
|  | 62 	int dret, lineno = 0; | 
|  | 63 	if (argc < 3) { | 
|  | 64 		fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n"); | 
|  | 65 		return 1; | 
|  | 66 	} | 
|  | 67 	fplist = gzopen(argv[2], "rb"); | 
|  | 68 	ks = ks_init(fplist); | 
|  | 69 	memset(&str, 0, sizeof(kstring_t)); | 
|  | 70 	fp = bcf_open(argv[1], "rb"); | 
|  | 71 	h = bcf_hdr_read(fp); | 
|  | 72 	str2id = bcf_build_refhash(h); | 
|  | 73 	idx = bcf_idx_load(argv[1]); | 
|  | 74 	if (idx == 0) { | 
|  | 75 		fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__); | 
|  | 76 		return 1; | 
|  | 77 	} | 
|  | 78 	b0 = calloc(1, sizeof(bcf1_t)); | 
|  | 79 	b1 = calloc(1, sizeof(bcf1_t)); | 
|  | 80 	while (ks_getuntil(ks, '\n', &str, &dret) >= 0) { | 
|  | 81 		char *p, *q; | 
|  | 82 		int k; | 
|  | 83 		int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1; | 
|  | 84 		++lineno; | 
|  | 85 		for (p = q = str.s, k = 0; *p; ++p) { | 
|  | 86 			if (*p == ' ' || *p == '\t') { | 
|  | 87 				*p = '\0'; | 
|  | 88 				if (k == 0) tid0 = bcf_str2id(str2id, q); | 
|  | 89 				else if (k == 1) pos0 = atoi(q) - 1; | 
|  | 90 				else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0; | 
|  | 91 				else if (k == 3) pos1 = atoi(q) - 1; | 
|  | 92 				q = p + 1; | 
|  | 93 				++k; | 
|  | 94 			} | 
|  | 95 		} | 
|  | 96 		if (k == 3) pos1 = atoi(q) - 1; | 
|  | 97 		if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) { | 
|  | 98 			uint64_t off; | 
|  | 99 			double r, f[4]; | 
|  | 100 			off = bcf_idx_query(idx, tid0, pos0); | 
|  | 101 			bgzf_seek(fp->fp, off, SEEK_SET); | 
|  | 102 			while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0); | 
|  | 103 			off = bcf_idx_query(idx, tid1, pos1); | 
|  | 104 			bgzf_seek(fp->fp, off, SEEK_SET); | 
|  | 105 			while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1); | 
|  | 106 			r = bcf_pair_freq(b0, b1, f); | 
|  | 107 			r *= r; | 
|  | 108 			printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1, | 
|  | 109 				r, f[0], f[1], f[2], f[3]); | 
|  | 110 		} //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno); | 
|  | 111 	} | 
|  | 112 	bcf_destroy(b0); bcf_destroy(b1); | 
|  | 113 	bcf_idx_destroy(idx); | 
|  | 114 	bcf_str2id_destroy(str2id); | 
|  | 115 	bcf_hdr_destroy(h); | 
|  | 116 	bcf_close(fp); | 
|  | 117 	free(str.s); | 
|  | 118 	ks_destroy(ks); | 
|  | 119 	gzclose(fplist); | 
|  | 120 	return 0; | 
|  | 121 } | 
|  | 122 | 
|  | 123 int bcf_main_ld(int argc, char *argv[]) | 
|  | 124 { | 
|  | 125 	bcf_t *fp; | 
|  | 126 	bcf_hdr_t *h; | 
|  | 127 	bcf1_t **b, *b0; | 
|  | 128 	int i, j, m, n; | 
|  | 129 	double f[4]; | 
|  | 130 	if (argc == 1) { | 
|  | 131 		fprintf(stderr, "Usage: bcftools ld <in.bcf>\n"); | 
|  | 132 		return 1; | 
|  | 133 	} | 
|  | 134 	fp = bcf_open(argv[1], "rb"); | 
|  | 135 	h = bcf_hdr_read(fp); | 
|  | 136 	// read the entire BCF | 
|  | 137 	m = n = 0; b = 0; | 
|  | 138 	b0 = calloc(1, sizeof(bcf1_t)); | 
|  | 139 	while (bcf_read(fp, h, b0) >= 0) { | 
|  | 140 		if (m == n) { | 
|  | 141 			m = m? m<<1 : 16; | 
|  | 142 			b = realloc(b, sizeof(void*) * m); | 
|  | 143 		} | 
|  | 144 		b[n] = calloc(1, sizeof(bcf1_t)); | 
|  | 145 		bcf_cpy(b[n++], b0); | 
|  | 146 	} | 
|  | 147 	bcf_destroy(b0); | 
|  | 148 	// compute pair-wise r^2 | 
|  | 149 	printf("%d\n", n); // the number of loci | 
|  | 150 	for (i = 0; i < n; ++i) { | 
|  | 151 		printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1); | 
|  | 152 		for (j = 0; j < i; ++j) { | 
|  | 153 			double r = bcf_pair_freq(b[i], b[j], f); | 
|  | 154 			printf("\t%.3f", r*r); | 
|  | 155 		} | 
|  | 156 		printf("\t1.000\n"); | 
|  | 157 	} | 
|  | 158 	// free | 
|  | 159 	for (i = 0; i < n; ++i) bcf_destroy(b[i]); | 
|  | 160 	free(b); | 
|  | 161 	bcf_hdr_destroy(h); | 
|  | 162 	bcf_close(fp); | 
|  | 163 	return 0; | 
|  | 164 } | 
|  | 165 | 
|  | 166 int main(int argc, char *argv[]) | 
|  | 167 { | 
|  | 168 	if (argc == 1) { | 
|  | 169 		fprintf(stderr, "\n"); | 
|  | 170 		fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n"); | 
|  | 171 		fprintf(stderr, "Version: %s\n\n", BCF_VERSION); | 
|  | 172 		fprintf(stderr, "Usage:   bcftools <command> <arguments>\n\n"); | 
|  | 173 		fprintf(stderr, "Command: view      print, extract, convert and call SNPs from BCF\n"); | 
|  | 174 		fprintf(stderr, "         index     index BCF\n"); | 
|  | 175 		fprintf(stderr, "         cat       concatenate BCFs\n"); | 
|  | 176 		fprintf(stderr, "         ld        compute all-pair r^2\n"); | 
|  | 177 		fprintf(stderr, "         ldpair    compute r^2 between requested pairs\n"); | 
|  | 178 		fprintf(stderr, "\n"); | 
|  | 179 		return 1; | 
|  | 180 	} | 
|  | 181 	if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1); | 
|  | 182 	else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1); | 
|  | 183 	else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1); | 
|  | 184 	else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1); | 
|  | 185 	else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ... | 
|  | 186 	else { | 
|  | 187 		fprintf(stderr, "[main] Unrecognized command.\n"); | 
|  | 188 		return 1; | 
|  | 189 	} | 
|  | 190 	return 0; | 
|  | 191 } |