comparison synteny_parse.py @ 0:a512d17b12c9 draft

Uploaded
author cschu
date Wed, 01 Apr 2015 05:34:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a512d17b12c9
1 #!/usr/bin/env python
2
3 import os
4 import sys
5 import argparse
6
7 #from Bio import Blast
8
9 from Bio.Blast import NCBIXML
10 # from Blast import NCBIXML
11
12
13 def doStuff(args):
14 blast_records = NCBIXML.parse(open(args.blastXMLInput))
15
16 with open(args.blastHitsTSV, 'wb') as out:
17 for blast_record in blast_records:
18 qlen = blast_record.query_length
19 qid = blast_record.query
20 # hits[qid] = []
21 for alignment in blast_record.alignments:
22 sid = alignment.title.split()[1]
23 tid = sid.split('|')[0]
24 for hsp in alignment.hsps:
25 if hsp.expect >= args.evalue:
26 continue
27 qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen
28 if qcov < args.min_query_coverage:
29 continue
30 identity = hsp.identities / float(hsp.align_length)
31 if identity < args.min_identity:
32 continue
33
34 hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity)
35 out.write('\t'.join([qid] + map(str, hit)) + '\n')
36 pass
37
38
39 def main(argv):
40
41 descr = ''
42 parser = argparse.ArgumentParser(description=descr)
43 parser.add_argument('--evalue', type=float, default=1e-10)
44 parser.add_argument('--min-identity', type=float, default=0.75)
45 parser.add_argument('--min-query-coverage', type=float, default=0.75)
46 parser.add_argument('blastXMLInput', type=str)
47 parser.add_argument('blastHitsTSV', type=str)
48
49 try:
50 args = parser.parse_args()
51 except:
52 sys.exit(1)
53
54 if not os.path.exists(args.blastXMLInput):
55 sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput)
56 sys.exit(1)
57
58 doStuff(args)
59
60 pass
61
62
63 if __name__ == '__main__': main(sys.argv[1:])