Mercurial > repos > cschu > cs_test_me
view synteny_parse.py @ 2:ea73705ac0f6 draft default tip
Uploaded
author | cschu |
---|---|
date | Wed, 01 Apr 2015 05:36:10 -0400 |
parents | a512d17b12c9 |
children |
line wrap: on
line source
#!/usr/bin/env python import os import sys import argparse #from Bio import Blast from Bio.Blast import NCBIXML # from Blast import NCBIXML def doStuff(args): blast_records = NCBIXML.parse(open(args.blastXMLInput)) with open(args.blastHitsTSV, 'wb') as out: for blast_record in blast_records: qlen = blast_record.query_length qid = blast_record.query # hits[qid] = [] for alignment in blast_record.alignments: sid = alignment.title.split()[1] tid = sid.split('|')[0] for hsp in alignment.hsps: if hsp.expect >= args.evalue: continue qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen if qcov < args.min_query_coverage: continue identity = hsp.identities / float(hsp.align_length) if identity < args.min_identity: continue hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity) out.write('\t'.join([qid] + map(str, hit)) + '\n') pass def main(argv): descr = '' parser = argparse.ArgumentParser(description=descr) parser.add_argument('--evalue', type=float, default=1e-10) parser.add_argument('--min-identity', type=float, default=0.75) parser.add_argument('--min-query-coverage', type=float, default=0.75) parser.add_argument('blastXMLInput', type=str) parser.add_argument('blastHitsTSV', type=str) try: args = parser.parse_args() except: sys.exit(1) if not os.path.exists(args.blastXMLInput): sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput) sys.exit(1) doStuff(args) pass if __name__ == '__main__': main(sys.argv[1:])