Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/bamshuf.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #include <unistd.h> | |
2 #include <stdio.h> | |
3 #include <stdlib.h> | |
4 #include <string.h> | |
5 #include <assert.h> | |
6 #include "sam.h" | |
7 #include "ksort.h" | |
8 | |
9 #define DEF_CLEVEL 1 | |
10 | |
11 static inline unsigned hash_Wang(unsigned key) | |
12 { | |
13 key += ~(key << 15); | |
14 key ^= (key >> 10); | |
15 key += (key << 3); | |
16 key ^= (key >> 6); | |
17 key += ~(key << 11); | |
18 key ^= (key >> 16); | |
19 return key; | |
20 } | |
21 | |
22 static inline unsigned hash_X31_Wang(const char *s) | |
23 { | |
24 unsigned h = *s; | |
25 if (h) { | |
26 for (++s ; *s; ++s) h = (h << 5) - h + *s; | |
27 return hash_Wang(h); | |
28 } else return 0; | |
29 } | |
30 | |
31 typedef struct { | |
32 unsigned key; | |
33 bam1_t *b; | |
34 } elem_t; | |
35 | |
36 static inline int elem_lt(elem_t x, elem_t y) | |
37 { | |
38 if (x.key < y.key) return 1; | |
39 if (x.key == y.key) { | |
40 int t; | |
41 t = strcmp(bam_get_qname(x.b), bam_get_qname(y.b)); | |
42 if (t < 0) return 1; | |
43 return (t == 0 && ((x.b->core.flag>>6&3) < (y.b->core.flag>>6&3))); | |
44 } else return 0; | |
45 } | |
46 | |
47 KSORT_INIT(bamshuf, elem_t, elem_lt) | |
48 | |
49 static void bamshuf(const char *fn, int n_files, const char *pre, int clevel, int is_stdout) | |
50 { | |
51 BGZF *fp, *fpw, **fpt; | |
52 char **fnt, modew[8]; | |
53 bam1_t *b; | |
54 int i, l; | |
55 bam_hdr_t *h; | |
56 int64_t *cnt; | |
57 | |
58 // split | |
59 fp = strcmp(fn, "-")? bgzf_open(fn, "r") : bgzf_dopen(fileno(stdin), "r"); | |
60 assert(fp); | |
61 h = bam_hdr_read(fp); | |
62 fnt = (char**)calloc(n_files, sizeof(void*)); | |
63 fpt = (BGZF**)calloc(n_files, sizeof(void*)); | |
64 cnt = (int64_t*)calloc(n_files, 8); | |
65 l = strlen(pre); | |
66 for (i = 0; i < n_files; ++i) { | |
67 fnt[i] = (char*)calloc(l + 10, 1); | |
68 sprintf(fnt[i], "%s.%.4d.bam", pre, i); | |
69 fpt[i] = bgzf_open(fnt[i], "w1"); | |
70 bam_hdr_write(fpt[i], h); | |
71 } | |
72 b = bam_init1(); | |
73 while (bam_read1(fp, b) >= 0) { | |
74 uint32_t x; | |
75 x = hash_X31_Wang(bam_get_qname(b)) % n_files; | |
76 bam_write1(fpt[x], b); | |
77 ++cnt[x]; | |
78 } | |
79 bam_destroy1(b); | |
80 for (i = 0; i < n_files; ++i) bgzf_close(fpt[i]); | |
81 free(fpt); | |
82 bgzf_close(fp); | |
83 // merge | |
84 sprintf(modew, "w%d", (clevel >= 0 && clevel <= 9)? clevel : DEF_CLEVEL); | |
85 if (!is_stdout) { // output to a file | |
86 char *fnw = (char*)calloc(l + 5, 1); | |
87 sprintf(fnw, "%s.bam", pre); | |
88 fpw = bgzf_open(fnw, modew); | |
89 free(fnw); | |
90 } else fpw = bgzf_dopen(fileno(stdout), modew); // output to stdout | |
91 bam_hdr_write(fpw, h); | |
92 bam_hdr_destroy(h); | |
93 for (i = 0; i < n_files; ++i) { | |
94 int64_t j, c = cnt[i]; | |
95 elem_t *a; | |
96 fp = bgzf_open(fnt[i], "r"); | |
97 bam_hdr_destroy(bam_hdr_read(fp)); | |
98 a = (elem_t*)calloc(c, sizeof(elem_t)); | |
99 for (j = 0; j < c; ++j) { | |
100 a[j].b = bam_init1(); | |
101 assert(bam_read1(fp, a[j].b) >= 0); | |
102 a[j].key = hash_X31_Wang(bam_get_qname(a[j].b)); | |
103 } | |
104 bgzf_close(fp); | |
105 unlink(fnt[i]); | |
106 free(fnt[i]); | |
107 ks_introsort(bamshuf, c, a); | |
108 for (j = 0; j < c; ++j) { | |
109 bam_write1(fpw, a[j].b); | |
110 bam_destroy1(a[j].b); | |
111 } | |
112 free(a); | |
113 } | |
114 bgzf_close(fpw); | |
115 free(fnt); free(cnt); | |
116 } | |
117 | |
118 int main_bamshuf(int argc, char *argv[]) | |
119 { | |
120 int c, n_files = 64, clevel = DEF_CLEVEL, is_stdout = 0, is_un = 0; | |
121 while ((c = getopt(argc, argv, "n:l:uO")) >= 0) { | |
122 switch (c) { | |
123 case 'n': n_files = atoi(optarg); break; | |
124 case 'l': clevel = atoi(optarg); break; | |
125 case 'u': is_un = 1; break; | |
126 case 'O': is_stdout = 1; break; | |
127 } | |
128 } | |
129 if (is_un) clevel = 0; | |
130 if (optind + 2 > argc) { | |
131 fprintf(stderr, "\nUsage: bamshuf [-Ou] [-n nFiles] [-c cLevel] <in.bam> <out.prefix>\n\n"); | |
132 fprintf(stderr, "Options: -O output to stdout\n"); | |
133 fprintf(stderr, " -u uncompressed BAM output\n"); | |
134 fprintf(stderr, " -l INT compression level [%d]\n", DEF_CLEVEL); | |
135 fprintf(stderr, " -n INT number of temporary files [%d]\n", n_files); | |
136 fprintf(stderr, "\n"); | |
137 return 1; | |
138 } | |
139 bamshuf(argv[optind], n_files, argv[optind+1], clevel, is_stdout); | |
140 return 0; | |
141 } |