0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import argparse
|
|
6
|
|
7 #from Bio import Blast
|
|
8
|
|
9 from Bio.Blast import NCBIXML
|
|
10 # from Blast import NCBIXML
|
|
11
|
|
12
|
|
13 def doStuff(args):
|
|
14 blast_records = NCBIXML.parse(open(args.blastXMLInput))
|
|
15
|
|
16 with open(args.blastHitsTSV, 'wb') as out:
|
|
17 for blast_record in blast_records:
|
|
18 qlen = blast_record.query_length
|
|
19 qid = blast_record.query
|
|
20 # hits[qid] = []
|
|
21 for alignment in blast_record.alignments:
|
|
22 sid = alignment.title.split()[1]
|
|
23 tid = sid.split('|')[0]
|
|
24 for hsp in alignment.hsps:
|
|
25 if hsp.expect >= args.evalue:
|
|
26 continue
|
|
27 qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen
|
|
28 if qcov < args.min_query_coverage:
|
|
29 continue
|
|
30 identity = hsp.identities / float(hsp.align_length)
|
|
31 if identity < args.min_identity:
|
|
32 continue
|
|
33
|
|
34 hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity)
|
|
35 out.write('\t'.join([qid] + map(str, hit)) + '\n')
|
|
36 pass
|
|
37
|
|
38
|
|
39 def main(argv):
|
|
40
|
|
41 descr = ''
|
|
42 parser = argparse.ArgumentParser(description=descr)
|
|
43 parser.add_argument('--evalue', type=float, default=1e-10)
|
|
44 parser.add_argument('--min-identity', type=float, default=0.75)
|
|
45 parser.add_argument('--min-query-coverage', type=float, default=0.75)
|
|
46 parser.add_argument('blastXMLInput', type=str)
|
|
47 parser.add_argument('blastHitsTSV', type=str)
|
|
48
|
|
49 try:
|
|
50 args = parser.parse_args()
|
|
51 except:
|
|
52 sys.exit(1)
|
|
53
|
|
54 if not os.path.exists(args.blastXMLInput):
|
|
55 sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput)
|
|
56 sys.exit(1)
|
|
57
|
|
58 doStuff(args)
|
|
59
|
|
60 pass
|
|
61
|
|
62
|
|
63 if __name__ == '__main__': main(sys.argv[1:])
|