comparison fubar-galaxytoolfactory-cfcf6c9df5b7/rgToolFactory.py @ 1:87613ace5113 draft

Uploaded
author fubar
date Sat, 11 Aug 2012 02:41:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:2686fd3d0112 1:87613ace5113
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # August 11 2012
11 # changed to use shell=False and cl as a sequence
12
13 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
14 # It also serves as the wrapper for the new tool.
15 #
16 # you paste and run your script
17 # Only works for simple scripts that read one input from the history.
18 # Optionally can write one new history dataset,
19 # and optionally collect any number of outputs into links on an autogenerated HTML page.
20
21 # DO NOT install on a public or important site - please.
22
23 # installed generated tools are fine if the script is safe.
24 # They just run normally and their user cannot do anything unusually insecure
25 # but please, practice safe toolshed.
26 # Read the fucking code before you install any tool
27 # especially this one
28
29 # After you get the script working on some test data, you can
30 # optionally generate a toolshed compatible gzip file
31 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
32 # safe and largely automated installation in a production Galaxy.
33
34 # If you opt for an HTML output, you get all the script outputs arranged
35 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
36 # Ugly but really inexpensive.
37 #
38 # Patches appreciated please.
39 #
40 #
41 # long route to June 2012 product
42 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
43 # derived from an integrated script model
44 # called rgBaseScriptWrapper.py
45 # Note to the unwary:
46 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
47 # There is nothing stopping a malicious user doing whatever they choose
48 # Extremely dangerous!!
49 # Totally insecure. So, trusted users only
50 #
51 # preferred model is a developer using their throw away workstation instance - ie a private site.
52 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
53 #
54
55 import sys
56 import shutil
57 import subprocess
58 import os
59 import time
60 import tempfile
61 import optparse
62 import tarfile
63 import re
64 import shutil
65 import math
66
67 progname = os.path.split(sys.argv[0])[1]
68 myversion = 'V000.2 June 2012'
69 verbose = False
70 debug = False
71 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
72
73 def timenow():
74 """return current time as a string
75 """
76 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
77
78
79 class ScriptRunner:
80 """class is a wrapper for an arbitrary script
81 """
82
83 def __init__(self,opts=None):
84 """
85 cleanup inputs, setup some outputs
86
87 """
88 if opts.output_dir: # simplify for the tool tarball
89 os.chdir(opts.output_dir)
90 self.thumbformat = 'jpg'
91 self.opts = opts
92 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
93 self.toolid = self.toolname
94 s = open(self.opts.script_path,'r').readlines()
95 self.script = ''.join(s)
96 self.indentedScript = ''.join([' %s' % x for x in s]) # for restructured text in help
97 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
98 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
99 self.xmlfile = '%s.xml' % self.toolname
100 self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
101 if opts.output_dir: # may not want these complexities
102 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
103 artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname)
104 artifact = open(artifactpath,'w')
105 artifact.write(self.script)
106 artifact.write('\n')
107 artifact.close()
108 if opts.make_Tool: # need this code and the user script for the tarball
109 localscript = open(self.sfile,'w')
110 localscript.write(self.script)
111 localscript.close()
112 self.cl = []
113 self.html = []
114 a = self.cl.append
115 a(opts.interpreter)
116 a('-') # use stdin
117 a(opts.input_tab)
118 a(opts.output_tab)
119 self.outFormats = 'tabular' # TODO make this an option at tool generation time
120 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
121 self.test1Input = '%s_test1_input.xls' % self.toolname
122 self.test1Output = '%s_test1_output.xls' % self.toolname
123 self.test1HTML = '%s_test1_output.html' % self.toolname
124
125 def makeXML(self):
126 """
127 Create a Galaxy xml tool wrapper for the new script as a string to write out
128 fixme - use templating or something less fugly than this example of what we produce
129
130 <tool id="reverse" name="reverse" version="0.01">
131 <description>a tabular file</description>
132 <command interpreter="python">
133 reverse.py --script_path "$runMe" --interpreter "python"
134 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
135 </command>
136 <inputs>
137 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
138
139 </inputs>
140 <outputs>
141 <data format="tabular" name="tab_file" label="${job_name}"/>
142
143 </outputs>
144 <help>
145
146 **What it Does**
147
148 Reverse the columns in a tabular file
149
150 </help>
151 <configfiles>
152 <configfile name="runMe">
153
154 # reverse order of columns in a tabular file
155 import sys
156 inp = sys.argv[1]
157 outp = sys.argv[2]
158 i = open(inp,'r')
159 o = open(outp,'w')
160 for row in i:
161 rs = row.rstrip().split('\t')
162 rs.reverse()
163 o.write('\t'.join(rs))
164 o.write('\n')
165 i.close()
166 o.close()
167
168
169 </configfile>
170 </configfiles>
171 </tool>
172
173 """
174 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
175 %(tooldesc)s
176 %(command)s
177 <inputs>
178 %(inputs)s
179 </inputs>
180 <outputs>
181 %(outputs)s
182 </outputs>
183 <configfiles>
184 <configfile name="runMe">
185 %(script)s
186 </configfile>
187 </configfiles>
188 %(tooltests)s
189 <help>
190 %(help)s
191 </help>
192 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
193
194 newCommand="""<command interpreter="python">
195 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
196 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
197 </command>""" # may NOT be an input or htmlout
198 tooltestsTabOnly = """<tests><test>
199 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
200 <param name="job_name" value="test1"/>
201 <param name="runMe" value="$runMe"/>
202 <output name="tab_file" file="%(test1Output)s" ftype="tabular"/>
203 </test></tests>"""
204 tooltestsHTMLOnly = """<tests><test>
205 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
206 <param name="job_name" value="test1"/>
207 <param name="runMe" value="$runMe"/>
208 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
209 </test></tests>"""
210 tooltestsBoth = """<tests><test>
211 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
212 <param name="job_name" value="test1"/>
213 <param name="runMe" value="$runMe"/>
214 <output name="tab_file" file="%(test1Output)s" ftype="tabular" />
215 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
216 </test></tests>"""
217 xdict = {}
218 xdict['tool_version'] = self.opts.tool_version
219 xdict['test1Input'] = self.test1Input
220 xdict['test1HTML'] = self.test1HTML
221 xdict['test1Output'] = self.test1Output
222 if self.opts.make_HTML and self.opts.output_tab <> 'None':
223 xdict['tooltests'] = tooltestsBoth % xdict
224 elif self.opts.make_HTML:
225 xdict['tooltests'] = tooltestsHTMLOnly % xdict
226 else:
227 xdict['tooltests'] = tooltestsTabOnly % xdict
228 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
229 if self.opts.help_text:
230 xdict['help'] = open(self.opts.help_text,'r').read()
231 else:
232 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
233 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
234 coda.append(self.indentedScript)
235 coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.' % (self.opts.user_email,timenow()))
236 coda.append('See %s for details of that project' % (toolFactoryURL))
237 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
238 if self.opts.tool_desc:
239 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
240 else:
241 xdict['tooldesc'] = ''
242 xdict['command_outputs'] = ''
243 xdict['outputs'] = ''
244 if self.opts.input_tab <> 'None':
245 xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
246 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
247 else:
248 xdict['command_inputs'] = '' # assume no input - eg a random data generator
249 xdict['inputs'] = ''
250 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
251 xdict['toolname'] = self.toolname
252 xdict['toolid'] = self.toolid
253 xdict['interpreter'] = self.opts.interpreter
254 xdict['scriptname'] = self.sfile
255 if self.opts.make_HTML:
256 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
257 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
258 if self.opts.output_tab <> 'None':
259 xdict['command_outputs'] += ' --output_tab "$tab_file"'
260 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
261 xdict['command'] = newCommand % xdict
262 xmls = newXML % xdict
263 xf = open(self.xmlfile,'w')
264 xf.write(xmls)
265 xf.write('\n')
266 xf.close()
267 # ready for the tarball
268
269
270 def makeTooltar(self):
271 """
272 a tool is a gz tarball with eg
273 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
274 """
275 retval = self.run()
276 if retval:
277 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
278 sys.exit(1)
279 self.makeXML()
280 tdir = self.toolname
281 os.mkdir(tdir)
282 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
283 testdir = os.path.join(tdir,'test-data')
284 os.mkdir(testdir) # make tests directory
285 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
286 if self.opts.output_tab <> 'None':
287 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
288 if self.opts.make_HTML:
289 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
290 if self.opts.output_dir:
291 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
292 op = '%s.py' % self.toolname # new name
293 outpiname = os.path.join(tdir,op) # path for the tool tarball
294 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
295 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
296 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
297 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
298 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
299 notes += pi
300 outpi = open(outpiname,'w')
301 outpi.write(''.join(notes))
302 outpi.write('\n')
303 outpi.close()
304 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
305 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
306 tarpath = "%s.gz" % self.toolname
307 tar = tarfile.open(tarpath, "w:gz")
308 tar.add(tdir,arcname=self.toolname)
309 tar.close()
310 shutil.copyfile(tarpath,self.opts.new_tool)
311 shutil.rmtree(tdir)
312 ## TODO: replace with optional direct upload to local toolshed?
313 return retval
314
315 def compressPDF(self,inpdf=None,thumbformat='png'):
316 """need absolute path to pdf
317 """
318 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
319 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
320 sto = open(hlog,'w')
321 outpdf = '%s_compressed' % inpdf
322 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
323 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
324 retval1 = x.wait()
325 if retval1 == 0:
326 os.unlink(inpdf)
327 shutil.move(outpdf,inpdf)
328 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
329 cl2 = ['convert', inpdf, outpng]
330 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
331 retval2 = x.wait()
332 sto.close()
333 retval = retval1 or retval2
334 return retval
335
336
337 def getfSize(self,fpath,outpath):
338 """
339 format a nice file size string
340 """
341 size = ''
342 fp = os.path.join(outpath,fpath)
343 if os.path.isfile(fp):
344 size = '0 B'
345 n = float(os.path.getsize(fp))
346 if n > 2**20:
347 size = '%1.1f MB' % (n/2**20)
348 elif n > 2**10:
349 size = '%1.1f KB)' % (n/2**10)
350 elif n > 0:
351 size = '%d B' % (int(n))
352 return size
353
354 def makeHtml(self):
355 """ Create an HTML file content to list all the artefacts found in the output_dir
356 """
357
358 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
359 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
360 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
361 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
362 <title></title>
363 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
364 </head>
365 <body>
366 <div class="toolFormBody">
367 """
368 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
369 galhtmlpostfix = """</div></body></html>\n"""
370
371 flist = os.listdir(self.opts.output_dir)
372 flist = [x for x in flist if x <> 'Rplots.pdf']
373 flist.sort()
374 html = []
375 html.append(galhtmlprefix % progname)
376 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
377 fhtml = []
378 if len(flist) > 0:
379 pdflist = []
380 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
381 nacross = 1
382 if npdf > 0:
383 nacross = int(round(math.log(npdf,2)))
384 nacross = max(1,nacross)
385 width = min(400,int(1200/nacross))
386 for rownum,fname in enumerate(flist):
387 dname,e = os.path.splitext(fname)
388 sfsize = self.getfSize(fname,self.opts.output_dir)
389 if e.lower() == '.pdf' : # compress and make a thumbnail
390 thumb = '%s.%s' % (dname,self.thumbformat)
391 pdff = os.path.join(self.opts.output_dir,fname)
392 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
393 if retval == 0:
394 pdflist.append((fname,thumb))
395 if (rownum+1) % 2 == 0:
396 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
397 else:
398 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
399 ntogo = nacross # counter for table row padding with empty cells
400 if len(pdflist) > 0:
401 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
402 for i,paths in enumerate(pdflist):
403 fname,thumb = paths
404 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
405 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
406 if ((i+1) % nacross == 0):
407 s += '</tr>\n'
408 ntogo = 0
409 if i < (npdf - 1): # more to come
410 s += '<tr>'
411 ntogo = nacross
412 else:
413 ntogo -= 1
414 html.append(s)
415 if html[-1].strip().endswith('</tr>'):
416 html.append('</table></div>\n')
417 else:
418 if ntogo > 0: # pad
419 html.append('<td>&nbsp;</td>'*ntogo)
420 html.append('</tr></table></div>\n')
421 if len(fhtml) > 0:
422 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
423 fhtml.append('</table></div><br/>')
424 html += fhtml # add all non-pdf files to the end of the display
425 else:
426 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
427 rlog = open(self.tlog,'r').readlines()
428 rlog = [x for x in rlog if x.strip() > '']
429 if len(rlog) > 1:
430 html.append('<div class="toolFormTitle">%s log</div><pre>\n' % self.opts.interpreter)
431 html += rlog
432 html.append('</pre>\n')
433 html.append(galhtmlattr % (self.toolname))
434 html.append(galhtmlpostfix)
435 htmlf = file(self.opts.output_html,'w')
436 htmlf.write('\n'.join(html))
437 htmlf.write('\n')
438 htmlf.close()
439 self.html = html
440
441
442 def run(self):
443 """
444 scripts must be small enough not to fill the pipe!
445 """
446 if self.opts.output_dir:
447 sto = open(self.tlog,'w')
448 sto.write('## FastQC generated command line = %s\n' % ' '.join(self.cl))
449 sto.flush()
450 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
451 else:
452 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
453 p.stdin.write(self.script)
454 p.stdin.close()
455 retval = p.wait()
456 if self.opts.output_dir:
457 sto.close()
458 if self.opts.make_HTML:
459 self.makeHtml()
460 return retval
461
462
463 def main():
464 u = """
465 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
466 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
467 </command>
468 """
469 op = optparse.OptionParser()
470 a = op.add_option
471 a('--script_path',default=None)
472 a('--tool_name',default=None)
473 a('--interpreter',default=None)
474 a('--output_dir',default=None)
475 a('--output_html',default=None)
476 a('--input_tab',default="None")
477 a('--output_tab',default="None")
478 a('--user_email',default='Unknown')
479 a('--bad_user',default=None)
480 a('--make_Tool',default=None)
481 a('--make_HTML',default=None)
482 a('--help_text',default=None)
483 a('--tool_desc',default=None)
484 a('--new_tool',default=None)
485 a('--tool_version',default=None)
486 opts, args = op.parse_args()
487 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
488 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
489 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
490 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
491 if opts.output_dir:
492 try:
493 os.makedirs(opts.output_dir)
494 except:
495 pass
496 r = ScriptRunner(opts)
497 if opts.make_Tool:
498 retcode = r.makeTooltar()
499 else:
500 retcode = r.run()
501 if retcode:
502 sys.exit(retcode) # indicate failure to job runner
503
504
505 if __name__ == "__main__":
506 main()
507
508