comparison utilities/validateBam.py @ 0:6e75a84e9338 draft

planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author thondeboer
date Tue, 15 May 2018 02:39:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6e75a84e9338
1 #!/usr/bin/env python
2
3 import sys
4 import os
5 import gzip
6 from struct import unpack
7
8 BAM_EOF = ['1f', '8b', '08', '04', '00', '00', '00', '00', '00', 'ff', '06', '00', '42', '43', '02', '00', '1b', '00', '03', '00', '00', '00', '00', '00', '00', '00', '00', '00']
9
10 def getBytes(fmt,amt):
11 if fmt == '<i' or fmt == '<I':
12 mySize = 4
13 elif fmt == '<c' or fmt == '<b' or fmt == '<B':
14 mySize = 1
15 else:
16 print '\nError, unknown format:',fmt,'\n'
17 exit(1)
18 if amt == 1:
19 fread = f.read(mySize)
20 if not fread:
21 return None
22 return unpack(fmt,fread)[0]
23 else:
24 fread = f.read(mySize*amt)
25 if not fread:
26 return None
27 return unpack(fmt,fread)
28
29 # check eof
30 IN_BAM = sys.argv[1]
31 f = open(IN_BAM,'rb')
32 f.seek(os.path.getsize(IN_BAM)-28)
33 EOF = [format(ord(n),'02x') for n in f.read()]
34 print 'EOF_MARKER: ', ' '.join(EOF)
35 if EOF != BAM_EOF:
36 print '\nWARNING: BAM EOF DOES NOT MATCH EXPECTED STRING.\n'
37 f.close()
38
39 # check other stuff
40 f = gzip.open(IN_BAM,'rb')
41
42 print 'MAGIC STRING:', f.read(4)
43 l_text = getBytes('<i',1)
44 print 'l_text: ', l_text
45 print 'text: \n', f.read(l_text)
46 n_ref = getBytes('<i',1)
47 print 'n_ref: ', n_ref
48
49 for i in xrange(n_ref):
50 l_name = getBytes('<i',1)
51 print 'ref'+str(i)+' - l_name:', l_name
52 print 'ref'+str(i)+' - name: ', f.read(l_name)
53 print 'ref'+str(i)+' - l_ref: ', getBytes('<i',1)
54
55 print '\nEXAMINING ALIGNMENT DATA:\n'
56 aln_N = 0
57 while True:
58 aln_N += 1
59 blockSize = getBytes('<i',1)
60 if blockSize == None:
61 break
62 print '['+str(aln_N)+']:', 'blockSize:', blockSize
63 print '-- refID:', getBytes('<i',1)
64 print '-- pos: ', getBytes('<i',1)
65 bmqnl = getBytes('<I',1)
66 binv = (bmqnl>>16)&65535
67 mapq = (bmqnl>>8)&255
68 lrn = bmqnl&255
69 print '-- bmqnl:', bmqnl, '(bin='+str(binv)+', mapq='+str(mapq)+', l_readname+1='+str(lrn)+')'
70 flgnc = getBytes('<I',1)
71 flag = (flgnc>>16)&65535
72 ncig = flgnc&65535
73 print '-- flgnc:', flgnc, '(flag='+str(flag)+', ncig='+str(ncig)+')'
74 print '-- l_seq:', getBytes('<i',1)
75 print '-- nxtID:', getBytes('<i',1)
76 print '-- nxtPo:', getBytes('<i',1)
77 print '-- tlen: ', getBytes('<i',1)
78 print '-- rname:', str([f.read(lrn)])[1:-1]
79
80 f.read(blockSize-32-lrn)
81 #print [block]
82
83
84 f.close()