Mercurial > repos > thondeboer > neat_genreads
diff utilities/validateBam.py @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
| author | thondeboer |
|---|---|
| date | Tue, 15 May 2018 02:39:53 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utilities/validateBam.py Tue May 15 02:39:53 2018 -0400 @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +import sys +import os +import gzip +from struct import unpack + +BAM_EOF = ['1f', '8b', '08', '04', '00', '00', '00', '00', '00', 'ff', '06', '00', '42', '43', '02', '00', '1b', '00', '03', '00', '00', '00', '00', '00', '00', '00', '00', '00'] + +def getBytes(fmt,amt): + if fmt == '<i' or fmt == '<I': + mySize = 4 + elif fmt == '<c' or fmt == '<b' or fmt == '<B': + mySize = 1 + else: + print '\nError, unknown format:',fmt,'\n' + exit(1) + if amt == 1: + fread = f.read(mySize) + if not fread: + return None + return unpack(fmt,fread)[0] + else: + fread = f.read(mySize*amt) + if not fread: + return None + return unpack(fmt,fread) + +# check eof +IN_BAM = sys.argv[1] +f = open(IN_BAM,'rb') +f.seek(os.path.getsize(IN_BAM)-28) +EOF = [format(ord(n),'02x') for n in f.read()] +print 'EOF_MARKER: ', ' '.join(EOF) +if EOF != BAM_EOF: + print '\nWARNING: BAM EOF DOES NOT MATCH EXPECTED STRING.\n' +f.close() + +# check other stuff +f = gzip.open(IN_BAM,'rb') + +print 'MAGIC STRING:', f.read(4) +l_text = getBytes('<i',1) +print 'l_text: ', l_text +print 'text: \n', f.read(l_text) +n_ref = getBytes('<i',1) +print 'n_ref: ', n_ref + +for i in xrange(n_ref): + l_name = getBytes('<i',1) + print 'ref'+str(i)+' - l_name:', l_name + print 'ref'+str(i)+' - name: ', f.read(l_name) + print 'ref'+str(i)+' - l_ref: ', getBytes('<i',1) + +print '\nEXAMINING ALIGNMENT DATA:\n' +aln_N = 0 +while True: + aln_N += 1 + blockSize = getBytes('<i',1) + if blockSize == None: + break + print '['+str(aln_N)+']:', 'blockSize:', blockSize + print '-- refID:', getBytes('<i',1) + print '-- pos: ', getBytes('<i',1) + bmqnl = getBytes('<I',1) + binv = (bmqnl>>16)&65535 + mapq = (bmqnl>>8)&255 + lrn = bmqnl&255 + print '-- bmqnl:', bmqnl, '(bin='+str(binv)+', mapq='+str(mapq)+', l_readname+1='+str(lrn)+')' + flgnc = getBytes('<I',1) + flag = (flgnc>>16)&65535 + ncig = flgnc&65535 + print '-- flgnc:', flgnc, '(flag='+str(flag)+', ncig='+str(ncig)+')' + print '-- l_seq:', getBytes('<i',1) + print '-- nxtID:', getBytes('<i',1) + print '-- nxtPo:', getBytes('<i',1) + print '-- tlen: ', getBytes('<i',1) + print '-- rname:', str([f.read(lrn)])[1:-1] + + f.read(blockSize-32-lrn) + #print [block] + + +f.close()
