# HG changeset patch
# User cschu
# Date 1427880970 14400
# Node ID ea73705ac0f6c67f51c95206d4b31974ce1255bb
# Parent 4fdaf732231a0b6acb262af4fce0a6a0e665ec98
Uploaded
diff -r 4fdaf732231a -r ea73705ac0f6 synteny_parse.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/synteny_parse.py Wed Apr 01 05:36:10 2015 -0400
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import argparse
+
+#from Bio import Blast
+
+from Bio.Blast import NCBIXML
+# from Blast import NCBIXML
+
+
+def doStuff(args):
+ blast_records = NCBIXML.parse(open(args.blastXMLInput))
+
+ with open(args.blastHitsTSV, 'wb') as out:
+ for blast_record in blast_records:
+ qlen = blast_record.query_length
+ qid = blast_record.query
+ # hits[qid] = []
+ for alignment in blast_record.alignments:
+ sid = alignment.title.split()[1]
+ tid = sid.split('|')[0]
+ for hsp in alignment.hsps:
+ if hsp.expect >= args.evalue:
+ continue
+ qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen
+ if qcov < args.min_query_coverage:
+ continue
+ identity = hsp.identities / float(hsp.align_length)
+ if identity < args.min_identity:
+ continue
+
+ hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity)
+ out.write('\t'.join([qid] + map(str, hit)) + '\n')
+ pass
+
+
+def main(argv):
+
+ descr = ''
+ parser = argparse.ArgumentParser(description=descr)
+ parser.add_argument('--evalue', type=float, default=1e-10)
+ parser.add_argument('--min-identity', type=float, default=0.75)
+ parser.add_argument('--min-query-coverage', type=float, default=0.75)
+ parser.add_argument('blastXMLInput', type=str)
+ parser.add_argument('blastHitsTSV', type=str)
+
+ try:
+ args = parser.parse_args()
+ except:
+ sys.exit(1)
+
+ if not os.path.exists(args.blastXMLInput):
+ sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput)
+ sys.exit(1)
+
+ doStuff(args)
+
+ pass
+
+
+if __name__ == '__main__': main(sys.argv[1:])
diff -r 4fdaf732231a -r ea73705ac0f6 synteny_parse.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/synteny_parse.xml Wed Apr 01 05:36:10 2015 -0400
@@ -0,0 +1,30 @@
+
+ Parse synteny information from BlastXML.
+
+
+ biopython
+ Bio
+ Bio.Blast
+
+ synteny_parse.py
+ --evalue="${minE.value}"
+ --min-identity="${minID.value}"
+ --min-query-coverage="${minQCOV.value}"
+ $blastXML_in $out
+
+
+
+
+
+
+ amino acid sequences" />
+
+
+
+
+
+
+ This tool does stuff.
+
+
+
diff -r 4fdaf732231a -r ea73705ac0f6 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Apr 01 05:36:10 2015 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+