comparison PsiCLASS-1.0.2/samtools-0.1.19/bam_cat.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:903fc43d6227
1 /*
2
3 bam_cat -- efficiently concatenates bam files
4
5 bam_cat can be used to concatenate BAM files. Under special
6 circumstances, it can be used as an alternative to 'samtools merge' to
7 concatenate multiple sorted files into a single sorted file. For this
8 to work each file must be sorted, and the sorted files must be given
9 as command line arguments in order such that the final read in file i
10 is less than or equal to the first read in file i+1.
11
12 This code is derived from the bam_reheader function in samtools 0.1.8
13 and modified to perform concatenation by Chris Saunders on behalf of
14 Illumina.
15
16
17 ########## License:
18
19 The MIT License
20
21 Original SAMtools work copyright (c) 2008-2009 Genome Research Ltd.
22 Modified SAMtools work copyright (c) 2010 Illumina, Inc.
23
24 Permission is hereby granted, free of charge, to any person obtaining a copy
25 of this software and associated documentation files (the "Software"), to deal
26 in the Software without restriction, including without limitation the rights
27 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
28 copies of the Software, and to permit persons to whom the Software is
29 furnished to do so, subject to the following conditions:
30
31 The above copyright notice and this permission notice shall be included in
32 all copies or substantial portions of the Software.
33
34 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
37 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
38 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
39 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 THE SOFTWARE.
41
42 */
43
44
45 /*
46 makefile:
47 """
48 CC=gcc
49 CFLAGS+=-g -Wall -O2 -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -I$(SAMTOOLS_DIR)
50 LDFLAGS+=-L$(SAMTOOLS_DIR)
51 LDLIBS+=-lbam -lz
52
53 all:bam_cat
54 """
55 */
56
57
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <unistd.h>
61
62 #include "knetfile.h"
63 #include "bgzf.h"
64 #include "bam.h"
65
66 #define BUF_SIZE 0x10000
67
68 #define GZIPID1 31
69 #define GZIPID2 139
70
71 #define BGZF_EMPTY_BLOCK_SIZE 28
72
73
74 int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
75 {
76 BGZF *fp;
77 FILE* fp_file;
78 uint8_t *buf;
79 uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
80 const int es=BGZF_EMPTY_BLOCK_SIZE;
81 int i;
82
83 fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
84 if (fp == 0) {
85 fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
86 return 1;
87 }
88 if (h) bam_header_write(fp, h);
89
90 buf = (uint8_t*) malloc(BUF_SIZE);
91 for(i = 0; i < nfn; ++i){
92 BGZF *in;
93 bam_header_t *old;
94 int len,j;
95
96 in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(fileno(stdin), "r");
97 if (in == 0) {
98 fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
99 return -1;
100 }
101 if (in->is_write) return -1;
102
103 old = bam_header_read(in);
104 if (h == 0 && i == 0) bam_header_write(fp, old);
105
106 if (in->block_offset < in->block_length) {
107 bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
108 bgzf_flush(fp);
109 }
110
111 j=0;
112 #ifdef _USE_KNETFILE
113 fp_file = fp->fp;
114 while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) {
115 #else
116 fp_file = fp->fp;
117 while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
118 #endif
119 if(len<es){
120 int diff=es-len;
121 if(j==0) {
122 fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
123 return -1;
124 }
125 fwrite(ebuf, 1, len, fp_file);
126 memcpy(ebuf,ebuf+len,diff);
127 memcpy(ebuf+diff,buf,len);
128 } else {
129 if(j!=0) fwrite(ebuf, 1, es, fp_file);
130 len-= es;
131 memcpy(ebuf,buf+len,es);
132 fwrite(buf, 1, len, fp_file);
133 }
134 j=1;
135 }
136
137 /* check final gzip block */
138 {
139 const uint8_t gzip1=ebuf[0];
140 const uint8_t gzip2=ebuf[1];
141 const uint32_t isize=*((uint32_t*)(ebuf+es-4));
142 if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
143 fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
144 fprintf(stderr, " Possible output corruption.\n");
145 fwrite(ebuf, 1, es, fp_file);
146 }
147 }
148 bam_header_destroy(old);
149 bgzf_close(in);
150 }
151 free(buf);
152 bgzf_close(fp);
153 return 0;
154 }
155
156
157
158 int main_cat(int argc, char *argv[])
159 {
160 bam_header_t *h = 0;
161 char *outfn = 0;
162 int c, ret;
163 while ((c = getopt(argc, argv, "h:o:")) >= 0) {
164 switch (c) {
165 case 'h': {
166 tamFile fph = sam_open(optarg);
167 if (fph == 0) {
168 fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
169 return 1;
170 }
171 h = sam_header_read(fph);
172 sam_close(fph);
173 break;
174 }
175 case 'o': outfn = strdup(optarg); break;
176 }
177 }
178 if (argc - optind < 2) {
179 fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n");
180 return 1;
181 }
182 ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
183 free(outfn);
184 return ret;
185 }