Mercurial > repos > thondeboer > neat_genreads
comparison py/biopython_modified_bgzf.py @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
| author | thondeboer |
|---|---|
| date | Tue, 15 May 2018 02:39:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6e75a84e9338 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2010-2013 by Peter Cock. | |
| 3 # All rights reserved. | |
| 4 # This code is part of the Biopython distribution and governed by its | |
| 5 # license. Please see the LICENSE file that should have been included | |
| 6 # as part of this package. | |
| 7 | |
| 8 """ ############################################################################ | |
| 9 ####### ####### | |
| 10 ####### 06/02/2015: ####### | |
| 11 ####### - I picked out the bits and pieces of code needed ####### | |
| 12 ####### to write BAM files, removed python 3.0 compatibility ####### | |
| 13 ####### ####### | |
| 14 ############################################################################ """ | |
| 15 | |
| 16 import zlib | |
| 17 import struct | |
| 18 | |
| 19 _bgzf_header = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00" | |
| 20 _bgzf_eof = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00" | |
| 21 | |
| 22 class BgzfWriter(object): | |
| 23 | |
| 24 def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6): | |
| 25 if fileobj: | |
| 26 assert filename is None | |
| 27 handle = fileobj | |
| 28 else: | |
| 29 if "w" not in mode.lower() \ | |
| 30 and "a" not in mode.lower(): | |
| 31 raise ValueError("Must use write or append mode, not %r" % mode) | |
| 32 if "a" in mode.lower(): | |
| 33 handle = open(filename, "ab") | |
| 34 else: | |
| 35 handle = open(filename, "wb") | |
| 36 self._text = "b" not in mode.lower() | |
| 37 self._handle = handle | |
| 38 self._buffer = b"" | |
| 39 self.compresslevel = compresslevel | |
| 40 | |
| 41 def _write_block(self, block): | |
| 42 start_offset = self._handle.tell() | |
| 43 assert len(block) <= 65536 | |
| 44 # Giving a negative window bits means no gzip/zlib headers, -15 used in samtools | |
| 45 c = zlib.compressobj(self.compresslevel, | |
| 46 zlib.DEFLATED, | |
| 47 -15, | |
| 48 zlib.DEF_MEM_LEVEL, | |
| 49 0) | |
| 50 compressed = c.compress(block) + c.flush() | |
| 51 del c | |
| 52 assert len(compressed) < 65536, "TODO - Didn't compress enough, try less data in this block" | |
| 53 crc = zlib.crc32(block) | |
| 54 # Should cope with a mix of Python platforms... | |
| 55 if crc < 0: | |
| 56 crc = struct.pack("<i", crc) | |
| 57 else: | |
| 58 crc = struct.pack("<I", crc) | |
| 59 bsize = struct.pack("<H", len(compressed) + 25) # includes -1 | |
| 60 crc = struct.pack("<I", zlib.crc32(block) & 0xffffffff) | |
| 61 uncompressed_length = struct.pack("<I", len(block)) | |
| 62 data = _bgzf_header + bsize + compressed + crc + uncompressed_length | |
| 63 self._handle.write(data) | |
| 64 | |
| 65 def write(self, data): | |
| 66 data_len = len(data) | |
| 67 if len(self._buffer) + data_len < 65536: | |
| 68 self._buffer += data | |
| 69 return | |
| 70 else: | |
| 71 self._buffer += data | |
| 72 while len(self._buffer) >= 65536: | |
| 73 self._write_block(self._buffer[:65536]) | |
| 74 self._buffer = self._buffer[65536:] | |
| 75 | |
| 76 def flush(self): | |
| 77 while len(self._buffer) >= 65536: | |
| 78 self._write_block(self._buffer[:65535]) | |
| 79 self._buffer = self._buffer[65535:] | |
| 80 self._write_block(self._buffer) | |
| 81 self._buffer = b"" | |
| 82 self._handle.flush() | |
| 83 | |
| 84 def close(self): | |
| 85 """Flush data, write 28 bytes empty BGZF EOF marker, and close the BGZF file.""" | |
| 86 if self._buffer: | |
| 87 self.flush() | |
| 88 # samtools will look for a magic EOF marker, just a 28 byte empty BGZF block, | |
| 89 # and if it is missing warns the BAM file may be truncated. In addition to | |
| 90 # samtools writing this block, so too does bgzip - so we should too. | |
| 91 self._handle.write(_bgzf_eof) | |
| 92 self._handle.flush() | |
| 93 self._handle.close() | |
| 94 | |
| 95 def __enter__(self): | |
| 96 return self | |
| 97 | |
| 98 def __exit__(self, type, value, traceback): | |
| 99 self.close() | |
| 100 | |
| 101 | |
| 102 if __name__ == "__main__": | |
| 103 pass |
