comparison py/biopython_modified_bgzf.py @ 0:6e75a84e9338 draft

planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author thondeboer
date Tue, 15 May 2018 02:39:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6e75a84e9338
1 #!/usr/bin/env python
2 # Copyright 2010-2013 by Peter Cock.
3 # All rights reserved.
4 # This code is part of the Biopython distribution and governed by its
5 # license. Please see the LICENSE file that should have been included
6 # as part of this package.
7
8 """ ############################################################################
9 ####### #######
10 ####### 06/02/2015: #######
11 ####### - I picked out the bits and pieces of code needed #######
12 ####### to write BAM files, removed python 3.0 compatibility #######
13 ####### #######
14 ############################################################################ """
15
16 import zlib
17 import struct
18
19 _bgzf_header = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00"
20 _bgzf_eof = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00"
21
22 class BgzfWriter(object):
23
24 def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6):
25 if fileobj:
26 assert filename is None
27 handle = fileobj
28 else:
29 if "w" not in mode.lower() \
30 and "a" not in mode.lower():
31 raise ValueError("Must use write or append mode, not %r" % mode)
32 if "a" in mode.lower():
33 handle = open(filename, "ab")
34 else:
35 handle = open(filename, "wb")
36 self._text = "b" not in mode.lower()
37 self._handle = handle
38 self._buffer = b""
39 self.compresslevel = compresslevel
40
41 def _write_block(self, block):
42 start_offset = self._handle.tell()
43 assert len(block) <= 65536
44 # Giving a negative window bits means no gzip/zlib headers, -15 used in samtools
45 c = zlib.compressobj(self.compresslevel,
46 zlib.DEFLATED,
47 -15,
48 zlib.DEF_MEM_LEVEL,
49 0)
50 compressed = c.compress(block) + c.flush()
51 del c
52 assert len(compressed) < 65536, "TODO - Didn't compress enough, try less data in this block"
53 crc = zlib.crc32(block)
54 # Should cope with a mix of Python platforms...
55 if crc < 0:
56 crc = struct.pack("<i", crc)
57 else:
58 crc = struct.pack("<I", crc)
59 bsize = struct.pack("<H", len(compressed) + 25) # includes -1
60 crc = struct.pack("<I", zlib.crc32(block) & 0xffffffff)
61 uncompressed_length = struct.pack("<I", len(block))
62 data = _bgzf_header + bsize + compressed + crc + uncompressed_length
63 self._handle.write(data)
64
65 def write(self, data):
66 data_len = len(data)
67 if len(self._buffer) + data_len < 65536:
68 self._buffer += data
69 return
70 else:
71 self._buffer += data
72 while len(self._buffer) >= 65536:
73 self._write_block(self._buffer[:65536])
74 self._buffer = self._buffer[65536:]
75
76 def flush(self):
77 while len(self._buffer) >= 65536:
78 self._write_block(self._buffer[:65535])
79 self._buffer = self._buffer[65535:]
80 self._write_block(self._buffer)
81 self._buffer = b""
82 self._handle.flush()
83
84 def close(self):
85 """Flush data, write 28 bytes empty BGZF EOF marker, and close the BGZF file."""
86 if self._buffer:
87 self.flush()
88 # samtools will look for a magic EOF marker, just a 28 byte empty BGZF block,
89 # and if it is missing warns the BAM file may be truncated. In addition to
90 # samtools writing this block, so too does bgzip - so we should too.
91 self._handle.write(_bgzf_eof)
92 self._handle.flush()
93 self._handle.close()
94
95 def __enter__(self):
96 return self
97
98 def __exit__(self, type, value, traceback):
99 self.close()
100
101
102 if __name__ == "__main__":
103 pass