Mercurial > repos > cschu > cs_test_me
diff synteny_parse.py @ 0:a512d17b12c9 draft
Uploaded
author | cschu |
---|---|
date | Wed, 01 Apr 2015 05:34:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/synteny_parse.py Wed Apr 01 05:34:17 2015 -0400 @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import os +import sys +import argparse + +#from Bio import Blast + +from Bio.Blast import NCBIXML +# from Blast import NCBIXML + + +def doStuff(args): + blast_records = NCBIXML.parse(open(args.blastXMLInput)) + + with open(args.blastHitsTSV, 'wb') as out: + for blast_record in blast_records: + qlen = blast_record.query_length + qid = blast_record.query + # hits[qid] = [] + for alignment in blast_record.alignments: + sid = alignment.title.split()[1] + tid = sid.split('|')[0] + for hsp in alignment.hsps: + if hsp.expect >= args.evalue: + continue + qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen + if qcov < args.min_query_coverage: + continue + identity = hsp.identities / float(hsp.align_length) + if identity < args.min_identity: + continue + + hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity) + out.write('\t'.join([qid] + map(str, hit)) + '\n') + pass + + +def main(argv): + + descr = '' + parser = argparse.ArgumentParser(description=descr) + parser.add_argument('--evalue', type=float, default=1e-10) + parser.add_argument('--min-identity', type=float, default=0.75) + parser.add_argument('--min-query-coverage', type=float, default=0.75) + parser.add_argument('blastXMLInput', type=str) + parser.add_argument('blastHitsTSV', type=str) + + try: + args = parser.parse_args() + except: + sys.exit(1) + + if not os.path.exists(args.blastXMLInput): + sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput) + sys.exit(1) + + doStuff(args) + + pass + + +if __name__ == '__main__': main(sys.argv[1:])