comparison tools/ncbi_blast_plus/blast2go.py @ 1:0f159cf346c8

Migrated tool version 0.0.2 from old tool shed archive to new tool shed repository
author peterjc
date Tue, 07 Jun 2011 16:29:28 -0400
parents cd52c931b325
children a5594772282c
comparison
equal deleted inserted replaced
0:cd52c931b325 1:0f159cf346c8
4 This script takes exactly three command line arguments: 4 This script takes exactly three command line arguments:
5 * Input BLAST XML filename 5 * Input BLAST XML filename
6 * Blast2GO properties filename (settings file) 6 * Blast2GO properties filename (settings file)
7 * Output tabular filename 7 * Output tabular filename
8 8
9 Sadly b2g4pipe v2.3.5 cannot cope with current style large BLAST XML
10 files (e.g. from BLAST 2.2.25+), so we have to reformat these to
11 avoid it crashing with a Java heap space OutOfMemoryError.
12
13 As part of this reformatting, we check for BLASTP or BLASTX output
14 (otherwise raise an error), and print the query count.
15
9 It then calls the Java command line tool, and moves the output file to 16 It then calls the Java command line tool, and moves the output file to
10 the location Galaxy is expecting. 17 the location Galaxy is expecting, and removes the tempory XML file.
11 """ 18 """
12 import sys 19 import sys
13 import os 20 import os
14 import subprocess 21 import subprocess
15 22
25 if len(sys.argv) != 4: 32 if len(sys.argv) != 4:
26 stop_err("Require three arguments: XML filename, properties filename, output tabular filename") 33 stop_err("Require three arguments: XML filename, properties filename, output tabular filename")
27 34
28 xml_file, prop_file, tabular_file = sys.argv[1:] 35 xml_file, prop_file, tabular_file = sys.argv[1:]
29 36
37 #We should have write access here:
38 tmp_xml_file = tabular_file + ".tmp.xml"
39
30 if not os.path.isfile(xml_file): 40 if not os.path.isfile(xml_file):
31 stop_err("Input BLAST XML file not found: %s" % xml_file) 41 stop_err("Input BLAST XML file not found: %s" % xml_file)
32 42
33 if not os.path.isfile(prop_file): 43 if not os.path.isfile(prop_file):
34 stop_err("Blast2GO configuration file not found: %s" % prop_file) 44 stop_err("Blast2GO configuration file not found: %s" % prop_file)
45
46 def prepare_xml(original_xml, mangled_xml):
47 """Reformat BLAST XML to suit Blast2GO.
48
49 Blast2GO can't cope with 1000s of <Iteration> tags within a
50 single <BlastResult> tag, so instead split this into one
51 full XML record per interation (i.e. per query). This gives
52 a concatenated XML file mimicing old versions of BLAST.
53
54 This also checks for BLASTP or BLASTX output, and outputs
55 the number of queries. Galaxy will show this as "info".
56 """
57 in_handle = open(original_xml)
58 footer = " </BlastOutput_iterations>\n</BlastOutput>\n"
59 header = ""
60 while True:
61 line = in_handle.readline()
62 if not line:
63 #No hits?
64 stop_err("Problem with XML file?")
65 if line.strip() == "<Iteration>":
66 break
67 header += line
68
69 if "<BlastOutput_program>blastx</BlastOutput_program>" in header:
70 print "BLASTX output identified"
71 elif "<BlastOutput_program>blastp</BlastOutput_program>" in header:
72 print "BLASTP output identified"
73 else:
74 in_handle.close()
75 stop_err("Expect BLASTP or BLASTX output")
76
77 out_handle = open(mangled_xml, "w")
78 out_handle.write(header)
79 out_handle.write(line)
80 count = 1
81 while True:
82 line = in_handle.readline()
83 if not line:
84 break
85 elif line.strip() == "<Iteration>":
86 #Insert footer/header
87 out_handle.write(footer)
88 out_handle.write(header)
89 count += 1
90 out_handle.write(line)
91
92 out_handle.close()
93 in_handle.close()
94 print "Input has %i queries" % count
95
35 96
36 def run(cmd): 97 def run(cmd):
37 #Avoid using shell=True when we call subprocess to ensure if the Python 98 #Avoid using shell=True when we call subprocess to ensure if the Python
38 #script is killed, so too is the child process. 99 #script is killed, so too is the child process.
39 try: 100 try:
42 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 103 stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
43 #Use .communicate as can get deadlocks with .wait(), 104 #Use .communicate as can get deadlocks with .wait(),
44 stdout, stderr = child.communicate() 105 stdout, stderr = child.communicate()
45 return_code = child.returncode 106 return_code = child.returncode
46 if return_code: 107 if return_code:
108 cmd_str = " ".join(cmd)
47 if stderr and stdout: 109 if stderr and stdout:
48 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) 110 stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr))
49 else: 111 else:
50 stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) 112 stop_err("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr))
51 #For early diagnostics, 113 #For early diagnostics,
52 else: 114 else:
53 print stdout 115 print stdout
54 print stderr 116 print stderr
55 117
56 if not os.path.isfile(blast2go_jar): 118 if not os.path.isfile(blast2go_jar):
57 stop_err("Blast2GO JAR file not found: %s" % blast2go_jar) 119 stop_err("Blast2GO JAR file not found: %s" % blast2go_jar)
58 120
59 #We will have write access whereever the output should be, 121 prepare_xml(xml_file, tmp_xml_file)
122 #print "XML file prepared for Blast2GO"
123
124 #We will have write access wherever the output should be,
60 #so we'll ask Blast2GO to use that as the stem for its output 125 #so we'll ask Blast2GO to use that as the stem for its output
61 #(it will append .annot to the filename) 126 #(it will append .annot to the filename)
62 cmd = ["java", "-jar", blast2go_jar, 127 cmd = ["java", "-jar", blast2go_jar,
63 "-in", xml_file, 128 "-in", tmp_xml_file,
64 "-prop", prop_file, 129 "-prop", prop_file,
65 "-out", tabular_file, 130 "-out", tabular_file, #Used as base name for output files
66 "-a"] 131 "-a", # Generate *.annot tabular file
132 #"-img", # Generate images, feature not in v2.3.5
133 ]
134 #print " ".join(cmd)
67 run(cmd) 135 run(cmd)
136
137 #Remove the temp XML file
138 os.remove(tmp_xml_file)
68 139
69 out_file = tabular_file + ".annot" 140 out_file = tabular_file + ".annot"
70 if not os.path.isfile(out_file): 141 if not os.path.isfile(out_file):
71 stop_err("ERROR - No output annotation file from Blast2GO") 142 stop_err("ERROR - No output annotation file from Blast2GO")
72 143