annotate PsiCLASS-1.0.2/samtools-0.1.19/bam_cat.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /*
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 bam_cat -- efficiently concatenates bam files
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 bam_cat can be used to concatenate BAM files. Under special
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 circumstances, it can be used as an alternative to 'samtools merge' to
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 concatenate multiple sorted files into a single sorted file. For this
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 to work each file must be sorted, and the sorted files must be given
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 as command line arguments in order such that the final read in file i
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 is less than or equal to the first read in file i+1.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 This code is derived from the bam_reheader function in samtools 0.1.8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 and modified to perform concatenation by Chris Saunders on behalf of
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 Illumina.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 ########## License:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 The MIT License
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 Original SAMtools work copyright (c) 2008-2009 Genome Research Ltd.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 Modified SAMtools work copyright (c) 2010 Illumina, Inc.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 Permission is hereby granted, free of charge, to any person obtaining a copy
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 of this software and associated documentation files (the "Software"), to deal
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 in the Software without restriction, including without limitation the rights
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 copies of the Software, and to permit persons to whom the Software is
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 furnished to do so, subject to the following conditions:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 The above copyright notice and this permission notice shall be included in
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 all copies or substantial portions of the Software.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 THE SOFTWARE.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 /*
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 makefile:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 """
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 CC=gcc
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 CFLAGS+=-g -Wall -O2 -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -I$(SAMTOOLS_DIR)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 LDFLAGS+=-L$(SAMTOOLS_DIR)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 LDLIBS+=-lbam -lz
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 all:bam_cat
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 """
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 #include <unistd.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 #include "knetfile.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 #include "bgzf.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 #include "bam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 #define BUF_SIZE 0x10000
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 #define GZIPID1 31
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 #define GZIPID2 139
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 #define BGZF_EMPTY_BLOCK_SIZE 28
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 BGZF *fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 FILE* fp_file;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 uint8_t *buf;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 const int es=BGZF_EMPTY_BLOCK_SIZE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 if (fp == 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 if (h) bam_header_write(fp, h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 buf = (uint8_t*) malloc(BUF_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 for(i = 0; i < nfn; ++i){
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 BGZF *in;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 bam_header_t *old;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 int len,j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(fileno(stdin), "r");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 if (in == 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 if (in->is_write) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 old = bam_header_read(in);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 if (h == 0 && i == 0) bam_header_write(fp, old);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 if (in->block_offset < in->block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 bgzf_flush(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 j=0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 #ifdef _USE_KNETFILE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 fp_file = fp->fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 #else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 fp_file = fp->fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 if(len<es){
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 int diff=es-len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 if(j==0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 fwrite(ebuf, 1, len, fp_file);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 memcpy(ebuf,ebuf+len,diff);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 memcpy(ebuf+diff,buf,len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 if(j!=0) fwrite(ebuf, 1, es, fp_file);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 len-= es;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 memcpy(ebuf,buf+len,es);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 fwrite(buf, 1, len, fp_file);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 j=1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 /* check final gzip block */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 const uint8_t gzip1=ebuf[0];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 const uint8_t gzip2=ebuf[1];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 const uint32_t isize=*((uint32_t*)(ebuf+es-4));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 fprintf(stderr, " Possible output corruption.\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 fwrite(ebuf, 1, es, fp_file);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 bam_header_destroy(old);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 bgzf_close(in);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 free(buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 bgzf_close(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 int main_cat(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 bam_header_t *h = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 char *outfn = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 int c, ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 while ((c = getopt(argc, argv, "h:o:")) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 switch (c) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 case 'h': {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 tamFile fph = sam_open(optarg);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 if (fph == 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 h = sam_header_read(fph);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 sam_close(fph);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 case 'o': outfn = strdup(optarg); break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 if (argc - optind < 2) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 free(outfn);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 return ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 }