changeset 0:a512d17b12c9 draft

Uploaded
author cschu
date Wed, 01 Apr 2015 05:34:17 -0400
parents
children 4fdaf732231a
files synteny_parse.py synteny_parse.xml tool_dependencies.xml
diffstat 3 files changed, 99 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/synteny_parse.py	Wed Apr 01 05:34:17 2015 -0400
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import argparse
+
+#from Bio import Blast
+
+from Bio.Blast import NCBIXML
+# from Blast import NCBIXML
+
+
+def doStuff(args):
+    blast_records = NCBIXML.parse(open(args.blastXMLInput))
+    
+    with open(args.blastHitsTSV, 'wb') as out:
+        for blast_record in blast_records:
+            qlen = blast_record.query_length        
+            qid = blast_record.query
+            # hits[qid] = []
+            for alignment in blast_record.alignments:            
+                sid = alignment.title.split()[1]
+                tid = sid.split('|')[0]
+                for hsp in alignment.hsps:
+                    if hsp.expect >= args.evalue:
+                        continue
+                    qcov = (hsp.query_end - hsp.query_start + 1.0) / qlen
+                    if qcov < args.min_query_coverage:
+                        continue
+                    identity = hsp.identities / float(hsp.align_length)
+                    if identity < args.min_identity:
+                        continue
+               
+                    hit = (tid, sid, hsp.expect, qcov, qlen, hsp.align_length, identity)
+                    out.write('\t'.join([qid] + map(str, hit)) + '\n')  
+    pass
+
+
+def main(argv):
+    
+    descr = ''
+    parser = argparse.ArgumentParser(description=descr)        
+    parser.add_argument('--evalue', type=float, default=1e-10)
+    parser.add_argument('--min-identity', type=float, default=0.75)
+    parser.add_argument('--min-query-coverage', type=float, default=0.75)
+    parser.add_argument('blastXMLInput', type=str)
+    parser.add_argument('blastHitsTSV', type=str)
+
+    try:
+        args = parser.parse_args()
+    except:
+        sys.exit(1)
+
+    if not os.path.exists(args.blastXMLInput):
+        sys.stderr.write('Input file (%s) is missing.\n' % args.blastXMLInput)
+        sys.exit(1)
+
+    doStuff(args)
+      
+    pass
+
+
+if __name__ == '__main__': main(sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/synteny_parse.xml	Wed Apr 01 05:34:17 2015 -0400
@@ -0,0 +1,30 @@
+<tool id="synteny_parse" name="synteny_parse">
+	<description>Parse synteny information from BlastXML.</description>
+	<requirements>
+	  <!-- <requirement type="package" version="2.7.4">python</requirement>	  -->
+	  <requirement type="package" version="1.62">biopython</requirement>
+          <requirement type="python-module">Bio</requirement>               	
+          <requirement type="python-module">Bio.Blast</requirement>               	
+        </requirements>
+	<command interpreter="python">synteny_parse.py
+		--evalue="${minE.value}"
+		--min-identity="${minID.value}"
+		--min-query-coverage="${minQCOV.value}"		
+		$blastXML_in $out
+		</command>
+	<inputs>
+		<param name="blastXML_in" type="data" format="xml" label="BLAST xml output" />
+		<param name="minE" type="float" value="1e-10" label="e-value cutoff" />
+		<param name="minID" type="float" value="0.75" label="identity cutoff" />
+		<param name="minQCOV" type="float" value="0.75" label="query-coverage cutoff" />
+ amino acid sequences" />
+	</inputs>
+	<outputs>
+		<data format="tabular" name="out" label="Filtered Blast results ${on_string}" />
+	</outputs>
+
+	<help>
+		This tool does stuff.
+	</help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Apr 01 05:34:17 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="biopython" version="1.65">
+        <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>