Mercurial > repos > cschu > cs_test_me
changeset 0:a512d17b12c9 draft
Uploaded
author | cschu |
---|---|
date | Wed, 01 Apr 2015 05:34:17 -0400 |
parents | |
children | 4fdaf732231a |
files | synteny_parse.py synteny_parse.xml tool_dependencies.xml |
diffstat | 3 files changed, 99 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/synteny_parse.py Wed Apr 01 05:34:17 2015 -0400 @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import os +import sys +import argparse + +#from Bio import Blast + +from Bio.Blast import NCBIXML +# from Blast import NCBIXML + + +def doStuff(args): + blast_records = NCBIXML.parse(open(args.blastXMLInput)) + + with open(args.blastHitsTSV, 'wb') as out: + for blast_record in blast_records: + qlen = blast_record.query_length + qid = blast_record.query + # hits[qid] = [] + for alignment in blast_record.alignments: + sid = alignment.title.split()[1] + tid = sid.split('|')[0] + for hsp in alignment.hsps: + if hsp.expect >= args.evalue: + continue + qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen + if qcov < args.min_query_coverage: + continue + identity = hsp.identities / float(hsp.align_length) + if identity < args.min_identity: + continue + + hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity) + out.write('\t'.join([qid] + map(str, hit)) + '\n') + pass + + +def main(argv): + + descr = '' + parser = argparse.ArgumentParser(description=descr) + parser.add_argument('--evalue', type=float, default=1e-10) + parser.add_argument('--min-identity', type=float, default=0.75) + parser.add_argument('--min-query-coverage', type=float, default=0.75) + parser.add_argument('blastXMLInput', type=str) + parser.add_argument('blastHitsTSV', type=str) + + try: + args = parser.parse_args() + except: + sys.exit(1) + + if not os.path.exists(args.blastXMLInput): + sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput) + sys.exit(1) + + doStuff(args) + + pass + + +if __name__ == '__main__': main(sys.argv[1:])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/synteny_parse.xml Wed Apr 01 05:34:17 2015 -0400 @@ -0,0 +1,30 @@ +<tool id="synteny_parse" name="synteny_parse"> + <description>Parse synteny information from BlastXML.</description> + <requirements> + <!-- <requirement type="package" version="2.7.4">python</requirement> --> + <requirement type="package" version="1.62">biopython</requirement> + <requirement type="python-module">Bio</requirement> + <requirement type="python-module">Bio.Blast</requirement> + </requirements> + <command interpreter="python">synteny_parse.py + --evalue="${minE.value}" + --min-identity="${minID.value}" + --min-query-coverage="${minQCOV.value}" + $blastXML_in $out + </command> + <inputs> + <param name="blastXML_in" type="data" format="xml" label="BLAST xml output" /> + <param name="minE" type="float" value="1e-10" label="e-value cutoff" /> + <param name="minID" type="float" value="0.75" label="identity cutoff" /> + <param name="minQCOV" type="float" value="0.75" label="query-coverage cutoff" /> + amino acid sequences" /> + </inputs> + <outputs> + <data format="tabular" name="out" label="Filtered Blast results ${on_string}" /> + </outputs> + + <help> + This tool does stuff. + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Apr 01 05:34:17 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="biopython" version="1.65"> + <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>