annotate synteny_parse.py @ 2:ea73705ac0f6 draft default tip

Uploaded
author cschu
date Wed, 01 Apr 2015 05:36:10 -0400
parents a512d17b12c9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
1 #!/usr/bin/env python
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
2
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
3 import os
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
4 import sys
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
5 import argparse
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
6
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
7 #from Bio import Blast
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
8
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
9 from Bio.Blast import NCBIXML
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
10 # from Blast import NCBIXML
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
11
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
12
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
13 def doStuff(args):
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
14 blast_records = NCBIXML.parse(open(args.blastXMLInput))
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
15
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
16 with open(args.blastHitsTSV, 'wb') as out:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
17 for blast_record in blast_records:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
18 qlen = blast_record.query_length
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
19 qid = blast_record.query
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
20 # hits[qid] = []
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
21 for alignment in blast_record.alignments:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
22 sid = alignment.title.split()[1]
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
23 tid = sid.split('|')[0]
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
24 for hsp in alignment.hsps:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
25 if hsp.expect >= args.evalue:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
26 continue
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
27 qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
28 if qcov < args.min_query_coverage:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
29 continue
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
30 identity = hsp.identities / float(hsp.align_length)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
31 if identity < args.min_identity:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
32 continue
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
33
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
34 hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
35 out.write('\t'.join([qid] + map(str, hit)) + '\n')
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
36 pass
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
37
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
38
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
39 def main(argv):
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
40
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
41 descr = ''
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
42 parser = argparse.ArgumentParser(description=descr)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
43 parser.add_argument('--evalue', type=float, default=1e-10)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
44 parser.add_argument('--min-identity', type=float, default=0.75)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
45 parser.add_argument('--min-query-coverage', type=float, default=0.75)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
46 parser.add_argument('blastXMLInput', type=str)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
47 parser.add_argument('blastHitsTSV', type=str)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
48
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
49 try:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
50 args = parser.parse_args()
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
51 except:
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
52 sys.exit(1)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
53
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
54 if not os.path.exists(args.blastXMLInput):
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
55 sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
56 sys.exit(1)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
57
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
58 doStuff(args)
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
59
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
60 pass
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
61
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
62
a512d17b12c9 Uploaded
cschu
parents:
diff changeset
63 if __name__ == '__main__': main(sys.argv[1:])