Mercurial > repos > thondeboer > neat_genreads
comparison utilities/validateBam.py @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
| author | thondeboer |
|---|---|
| date | Tue, 15 May 2018 02:39:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6e75a84e9338 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import sys | |
| 4 import os | |
| 5 import gzip | |
| 6 from struct import unpack | |
| 7 | |
| 8 BAM_EOF = ['1f', '8b', '08', '04', '00', '00', '00', '00', '00', 'ff', '06', '00', '42', '43', '02', '00', '1b', '00', '03', '00', '00', '00', '00', '00', '00', '00', '00', '00'] | |
| 9 | |
| 10 def getBytes(fmt,amt): | |
| 11 if fmt == '<i' or fmt == '<I': | |
| 12 mySize = 4 | |
| 13 elif fmt == '<c' or fmt == '<b' or fmt == '<B': | |
| 14 mySize = 1 | |
| 15 else: | |
| 16 print '\nError, unknown format:',fmt,'\n' | |
| 17 exit(1) | |
| 18 if amt == 1: | |
| 19 fread = f.read(mySize) | |
| 20 if not fread: | |
| 21 return None | |
| 22 return unpack(fmt,fread)[0] | |
| 23 else: | |
| 24 fread = f.read(mySize*amt) | |
| 25 if not fread: | |
| 26 return None | |
| 27 return unpack(fmt,fread) | |
| 28 | |
| 29 # check eof | |
| 30 IN_BAM = sys.argv[1] | |
| 31 f = open(IN_BAM,'rb') | |
| 32 f.seek(os.path.getsize(IN_BAM)-28) | |
| 33 EOF = [format(ord(n),'02x') for n in f.read()] | |
| 34 print 'EOF_MARKER: ', ' '.join(EOF) | |
| 35 if EOF != BAM_EOF: | |
| 36 print '\nWARNING: BAM EOF DOES NOT MATCH EXPECTED STRING.\n' | |
| 37 f.close() | |
| 38 | |
| 39 # check other stuff | |
| 40 f = gzip.open(IN_BAM,'rb') | |
| 41 | |
| 42 print 'MAGIC STRING:', f.read(4) | |
| 43 l_text = getBytes('<i',1) | |
| 44 print 'l_text: ', l_text | |
| 45 print 'text: \n', f.read(l_text) | |
| 46 n_ref = getBytes('<i',1) | |
| 47 print 'n_ref: ', n_ref | |
| 48 | |
| 49 for i in xrange(n_ref): | |
| 50 l_name = getBytes('<i',1) | |
| 51 print 'ref'+str(i)+' - l_name:', l_name | |
| 52 print 'ref'+str(i)+' - name: ', f.read(l_name) | |
| 53 print 'ref'+str(i)+' - l_ref: ', getBytes('<i',1) | |
| 54 | |
| 55 print '\nEXAMINING ALIGNMENT DATA:\n' | |
| 56 aln_N = 0 | |
| 57 while True: | |
| 58 aln_N += 1 | |
| 59 blockSize = getBytes('<i',1) | |
| 60 if blockSize == None: | |
| 61 break | |
| 62 print '['+str(aln_N)+']:', 'blockSize:', blockSize | |
| 63 print '-- refID:', getBytes('<i',1) | |
| 64 print '-- pos: ', getBytes('<i',1) | |
| 65 bmqnl = getBytes('<I',1) | |
| 66 binv = (bmqnl>>16)&65535 | |
| 67 mapq = (bmqnl>>8)&255 | |
| 68 lrn = bmqnl&255 | |
| 69 print '-- bmqnl:', bmqnl, '(bin='+str(binv)+', mapq='+str(mapq)+', l_readname+1='+str(lrn)+')' | |
| 70 flgnc = getBytes('<I',1) | |
| 71 flag = (flgnc>>16)&65535 | |
| 72 ncig = flgnc&65535 | |
| 73 print '-- flgnc:', flgnc, '(flag='+str(flag)+', ncig='+str(ncig)+')' | |
| 74 print '-- l_seq:', getBytes('<i',1) | |
| 75 print '-- nxtID:', getBytes('<i',1) | |
| 76 print '-- nxtPo:', getBytes('<i',1) | |
| 77 print '-- tlen: ', getBytes('<i',1) | |
| 78 print '-- rname:', str([f.read(lrn)])[1:-1] | |
| 79 | |
| 80 f.read(blockSize-32-lrn) | |
| 81 #print [block] | |
| 82 | |
| 83 | |
| 84 f.close() |
