Mercurial > repos > lsong10 > psiclass
diff PsiCLASS-1.0.2/samtools-0.1.19/bcftools/main.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PsiCLASS-1.0.2/samtools-0.1.19/bcftools/main.c Fri Mar 26 16:52:45 2021 +0000 @@ -0,0 +1,191 @@ +#include <string.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> +#include "knetfile.h" +#include "bcf.h" + +#include "kseq.h" +KSTREAM_INIT(gzFile, gzread, 0x10000) + +int bcfview(int argc, char *argv[]); +int bcf_main_index(int argc, char *argv[]); + +#define BUF_SIZE 0x10000 + +int bcf_cat(int n, char * const *fn) +{ + int i; + bcf_t *out; + uint8_t *buf; + buf = malloc(BUF_SIZE); + out = bcf_open("-", "w"); + for (i = 0; i < n; ++i) { + bcf_t *in; + bcf_hdr_t *h; + off_t end; + struct stat s; + in = bcf_open(fn[i], "r"); + h = bcf_hdr_read(in); + if (i == 0) bcf_hdr_write(out, h); + bcf_hdr_destroy(h); +#ifdef _USE_KNETFILE + fstat(knet_fileno((knetFile*)in->fp->fp), &s); + end = s.st_size - 28; + while (knet_tell((knetFile*)in->fp->fp) < end) { + int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp); + knet_read(in->fp->fp, buf, size); + fwrite(buf, 1, size, out->fp->fp); + } +#else + abort(); // FIXME: not implemented +#endif + bcf_close(in); + } + bcf_close(out); + free(buf); + return 0; +} + +extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]); + +int bcf_main_ldpair(int argc, char *argv[]) +{ + bcf_t *fp; + bcf_hdr_t *h; + bcf1_t *b0, *b1; + bcf_idx_t *idx; + kstring_t str; + void *str2id; + gzFile fplist; + kstream_t *ks; + int dret, lineno = 0; + if (argc < 3) { + fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n"); + return 1; + } + fplist = gzopen(argv[2], "rb"); + ks = ks_init(fplist); + memset(&str, 0, sizeof(kstring_t)); + fp = bcf_open(argv[1], "rb"); + h = bcf_hdr_read(fp); + str2id = bcf_build_refhash(h); + idx = bcf_idx_load(argv[1]); + if (idx == 0) { + fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__); + return 1; + } + b0 = calloc(1, sizeof(bcf1_t)); + b1 = calloc(1, sizeof(bcf1_t)); + while (ks_getuntil(ks, '\n', &str, &dret) >= 0) { + char *p, *q; + int k; + int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1; + ++lineno; + for (p = q = str.s, k = 0; *p; ++p) { + if (*p == ' ' || *p == '\t') { + *p = '\0'; + if (k == 0) tid0 = bcf_str2id(str2id, q); + else if (k == 1) pos0 = atoi(q) - 1; + else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0; + else if (k == 3) pos1 = atoi(q) - 1; + q = p + 1; + ++k; + } + } + if (k == 3) pos1 = atoi(q) - 1; + if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) { + uint64_t off; + double r, f[4]; + off = bcf_idx_query(idx, tid0, pos0); + bgzf_seek(fp->fp, off, SEEK_SET); + while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0); + off = bcf_idx_query(idx, tid1, pos1); + bgzf_seek(fp->fp, off, SEEK_SET); + while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1); + r = bcf_pair_freq(b0, b1, f); + r *= r; + printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1, + r, f[0], f[1], f[2], f[3]); + } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno); + } + bcf_destroy(b0); bcf_destroy(b1); + bcf_idx_destroy(idx); + bcf_str2id_destroy(str2id); + bcf_hdr_destroy(h); + bcf_close(fp); + free(str.s); + ks_destroy(ks); + gzclose(fplist); + return 0; +} + +int bcf_main_ld(int argc, char *argv[]) +{ + bcf_t *fp; + bcf_hdr_t *h; + bcf1_t **b, *b0; + int i, j, m, n; + double f[4]; + if (argc == 1) { + fprintf(stderr, "Usage: bcftools ld <in.bcf>\n"); + return 1; + } + fp = bcf_open(argv[1], "rb"); + h = bcf_hdr_read(fp); + // read the entire BCF + m = n = 0; b = 0; + b0 = calloc(1, sizeof(bcf1_t)); + while (bcf_read(fp, h, b0) >= 0) { + if (m == n) { + m = m? m<<1 : 16; + b = realloc(b, sizeof(void*) * m); + } + b[n] = calloc(1, sizeof(bcf1_t)); + bcf_cpy(b[n++], b0); + } + bcf_destroy(b0); + // compute pair-wise r^2 + printf("%d\n", n); // the number of loci + for (i = 0; i < n; ++i) { + printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1); + for (j = 0; j < i; ++j) { + double r = bcf_pair_freq(b[i], b[j], f); + printf("\t%.3f", r*r); + } + printf("\t1.000\n"); + } + // free + for (i = 0; i < n; ++i) bcf_destroy(b[i]); + free(b); + bcf_hdr_destroy(h); + bcf_close(fp); + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc == 1) { + fprintf(stderr, "\n"); + fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n"); + fprintf(stderr, "Version: %s\n\n", BCF_VERSION); + fprintf(stderr, "Usage: bcftools <command> <arguments>\n\n"); + fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n"); + fprintf(stderr, " index index BCF\n"); + fprintf(stderr, " cat concatenate BCFs\n"); + fprintf(stderr, " ld compute all-pair r^2\n"); + fprintf(stderr, " ldpair compute r^2 between requested pairs\n"); + fprintf(stderr, "\n"); + return 1; + } + if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1); + else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1); + else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1); + else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1); + else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ... + else { + fprintf(stderr, "[main] Unrecognized command.\n"); + return 1; + } + return 0; +}