comparison fubar-galaxytoolfactory-2e68c2a22b43/rgToolFactory.py @ 2:b55b59435fb1 draft

Now with bash working I think. Special case but working..
author fubar
date Mon, 13 Aug 2012 06:27:26 -0400
parents
children
comparison
equal deleted inserted replaced
1:87613ace5113 2:b55b59435fb1
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # August 11 2012
11 # changed to use shell=False and cl as a sequence
12
13 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
14 # It also serves as the wrapper for the new tool.
15 #
16 # you paste and run your script
17 # Only works for simple scripts that read one input from the history.
18 # Optionally can write one new history dataset,
19 # and optionally collect any number of outputs into links on an autogenerated HTML page.
20
21 # DO NOT install on a public or important site - please.
22
23 # installed generated tools are fine if the script is safe.
24 # They just run normally and their user cannot do anything unusually insecure
25 # but please, practice safe toolshed.
26 # Read the fucking code before you install any tool
27 # especially this one
28
29 # After you get the script working on some test data, you can
30 # optionally generate a toolshed compatible gzip file
31 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
32 # safe and largely automated installation in a production Galaxy.
33
34 # If you opt for an HTML output, you get all the script outputs arranged
35 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
36 # Ugly but really inexpensive.
37 #
38 # Patches appreciated please.
39 #
40 #
41 # long route to June 2012 product
42 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
43 # derived from an integrated script model
44 # called rgBaseScriptWrapper.py
45 # Note to the unwary:
46 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
47 # There is nothing stopping a malicious user doing whatever they choose
48 # Extremely dangerous!!
49 # Totally insecure. So, trusted users only
50 #
51 # preferred model is a developer using their throw away workstation instance - ie a private site.
52 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
53 #
54
55 #
56 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
57 #
58 # all rights reserved
59 # Licensed under the LGPL if you want to improve it, feel free https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
60
61 import sys
62 import shutil
63 import subprocess
64 import os
65 import time
66 import tempfile
67 import optparse
68 import tarfile
69 import re
70 import shutil
71 import math
72
73 progname = os.path.split(sys.argv[0])[1]
74 myversion = 'V000.2 June 2012'
75 verbose = False
76 debug = False
77 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
78
79 def timenow():
80 """return current time as a string
81 """
82 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
83
84
85 class ScriptRunner:
86 """class is a wrapper for an arbitrary script
87 """
88
89 def __init__(self,opts=None,treatbashSpecial=True):
90 """
91 cleanup inputs, setup some outputs
92
93 """
94 self.treatbashSpecial = treatbashSpecial
95 if opts.output_dir: # simplify for the tool tarball
96 os.chdir(opts.output_dir)
97 self.thumbformat = 'jpg'
98 self.opts = opts
99 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
100 self.toolid = self.toolname
101 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
102 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
103 self.xmlfile = '%s.xml' % self.toolname
104 s = open(self.opts.script_path,'r').readlines()
105 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
106 self.script = '\n'.join(s)
107 fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=opts.interpreter)
108 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
109 tscript.write(self.script)
110 tscript.close()
111 self.indentedScript = ''.join([' %s' % x for x in s]) # for restructured text in help
112 if opts.output_dir: # may not want these complexities
113 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
114 art = '%s.%s' % (self.toolname,opts.interpreter)
115 artpath = os.path.join(self.opts.output_dir,art) # need full path
116 artifact = open(artpath,'w') # use self.sfile as script source for Popen
117 artifact.write(self.script)
118 artifact.close()
119 self.cl = []
120 self.html = []
121 a = self.cl.append
122 a(opts.interpreter)
123 if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
124 a(self.sfile)
125 else:
126 a('-') # stdin
127 a(opts.input_tab)
128 a(opts.output_tab)
129 self.outFormats = 'tabular' # TODO make this an option at tool generation time
130 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
131 self.test1Input = '%s_test1_input.xls' % self.toolname
132 self.test1Output = '%s_test1_output.xls' % self.toolname
133 self.test1HTML = '%s_test1_output.html' % self.toolname
134
135 def makeXML(self):
136 """
137 Create a Galaxy xml tool wrapper for the new script as a string to write out
138 fixme - use templating or something less fugly than this example of what we produce
139
140 <tool id="reverse" name="reverse" version="0.01">
141 <description>a tabular file</description>
142 <command interpreter="python">
143 reverse.py --script_path "$runMe" --interpreter "python"
144 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
145 </command>
146 <inputs>
147 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
148
149 </inputs>
150 <outputs>
151 <data format="tabular" name="tab_file" label="${job_name}"/>
152
153 </outputs>
154 <help>
155
156 **What it Does**
157
158 Reverse the columns in a tabular file
159
160 </help>
161 <configfiles>
162 <configfile name="runMe">
163
164 # reverse order of columns in a tabular file
165 import sys
166 inp = sys.argv[1]
167 outp = sys.argv[2]
168 i = open(inp,'r')
169 o = open(outp,'w')
170 for row in i:
171 rs = row.rstrip().split('\t')
172 rs.reverse()
173 o.write('\t'.join(rs))
174 o.write('\n')
175 i.close()
176 o.close()
177
178
179 </configfile>
180 </configfiles>
181 </tool>
182
183 """
184 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
185 %(tooldesc)s
186 %(command)s
187 <inputs>
188 %(inputs)s
189 </inputs>
190 <outputs>
191 %(outputs)s
192 </outputs>
193 <configfiles>
194 <configfile name="runMe">
195 %(script)s
196 </configfile>
197 </configfiles>
198 %(tooltests)s
199 <help>
200 %(help)s
201 </help>
202 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
203
204 newCommand="""<command interpreter="python">
205 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
206 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
207 </command>""" # may NOT be an input or htmlout
208 tooltestsTabOnly = """<tests><test>
209 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
210 <param name="job_name" value="test1"/>
211 <param name="runMe" value="$runMe"/>
212 <output name="tab_file" file="%(test1Output)s" ftype="tabular"/>
213 </test></tests>"""
214 tooltestsHTMLOnly = """<tests><test>
215 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
216 <param name="job_name" value="test1"/>
217 <param name="runMe" value="$runMe"/>
218 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
219 </test></tests>"""
220 tooltestsBoth = """<tests><test>
221 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
222 <param name="job_name" value="test1"/>
223 <param name="runMe" value="$runMe"/>
224 <output name="tab_file" file="%(test1Output)s" ftype="tabular" />
225 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
226 </test></tests>"""
227 xdict = {}
228 xdict['tool_version'] = self.opts.tool_version
229 xdict['test1Input'] = self.test1Input
230 xdict['test1HTML'] = self.test1HTML
231 xdict['test1Output'] = self.test1Output
232 if self.opts.make_HTML and self.opts.output_tab <> 'None':
233 xdict['tooltests'] = tooltestsBoth % xdict
234 elif self.opts.make_HTML:
235 xdict['tooltests'] = tooltestsHTMLOnly % xdict
236 else:
237 xdict['tooltests'] = tooltestsTabOnly % xdict
238 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
239 if self.opts.help_text:
240 xdict['help'] = open(self.opts.help_text,'r').read()
241 else:
242 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
243 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
244 coda.append(self.indentedScript)
245 coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.' % (self.opts.user_email,timenow()))
246 coda.append('See %s for details of that project' % (toolFactoryURL))
247 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
248 if self.opts.tool_desc:
249 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
250 else:
251 xdict['tooldesc'] = ''
252 xdict['command_outputs'] = ''
253 xdict['outputs'] = ''
254 if self.opts.input_tab <> 'None':
255 xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
256 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
257 else:
258 xdict['command_inputs'] = '' # assume no input - eg a random data generator
259 xdict['inputs'] = ''
260 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
261 xdict['toolname'] = self.toolname
262 xdict['toolid'] = self.toolid
263 xdict['interpreter'] = self.opts.interpreter
264 xdict['scriptname'] = self.sfile
265 if self.opts.make_HTML:
266 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
267 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
268 if self.opts.output_tab <> 'None':
269 xdict['command_outputs'] += ' --output_tab "$tab_file"'
270 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
271 xdict['command'] = newCommand % xdict
272 xmls = newXML % xdict
273 xf = open(self.xmlfile,'w')
274 xf.write(xmls)
275 xf.write('\n')
276 xf.close()
277 # ready for the tarball
278
279
280 def makeTooltar(self):
281 """
282 a tool is a gz tarball with eg
283 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
284 """
285 retval = self.run()
286 if retval:
287 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
288 sys.exit(1)
289 self.makeXML()
290 tdir = self.toolname
291 os.mkdir(tdir)
292 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
293 testdir = os.path.join(tdir,'test-data')
294 os.mkdir(testdir) # make tests directory
295 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
296 if self.opts.output_tab <> 'None':
297 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
298 if self.opts.make_HTML:
299 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
300 if self.opts.output_dir:
301 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
302 op = '%s.py' % self.toolname # new name
303 outpiname = os.path.join(tdir,op) # path for the tool tarball
304 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
305 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
306 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
307 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
308 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
309 notes += pi
310 outpi = open(outpiname,'w')
311 outpi.write(''.join(notes))
312 outpi.write('\n')
313 outpi.close()
314 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
315 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
316 tarpath = "%s.gz" % self.toolname
317 tar = tarfile.open(tarpath, "w:gz")
318 tar.add(tdir,arcname=self.toolname)
319 tar.close()
320 shutil.copyfile(tarpath,self.opts.new_tool)
321 shutil.rmtree(tdir)
322 ## TODO: replace with optional direct upload to local toolshed?
323 return retval
324
325 def compressPDF(self,inpdf=None,thumbformat='png'):
326 """need absolute path to pdf
327 """
328 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
329 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
330 sto = open(hlog,'w')
331 outpdf = '%s_compressed' % inpdf
332 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
333 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
334 retval1 = x.wait()
335 if retval1 == 0:
336 os.unlink(inpdf)
337 shutil.move(outpdf,inpdf)
338 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
339 cl2 = ['convert', inpdf, outpng]
340 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
341 retval2 = x.wait()
342 sto.close()
343 retval = retval1 or retval2
344 return retval
345
346
347 def getfSize(self,fpath,outpath):
348 """
349 format a nice file size string
350 """
351 size = ''
352 fp = os.path.join(outpath,fpath)
353 if os.path.isfile(fp):
354 size = '0 B'
355 n = float(os.path.getsize(fp))
356 if n > 2**20:
357 size = '%1.1f MB' % (n/2**20)
358 elif n > 2**10:
359 size = '%1.1f KB)' % (n/2**10)
360 elif n > 0:
361 size = '%d B' % (int(n))
362 return size
363
364 def makeHtml(self):
365 """ Create an HTML file content to list all the artifacts found in the output_dir
366 """
367
368 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
369 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
370 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
371 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
372 <title></title>
373 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
374 </head>
375 <body>
376 <div class="toolFormBody">
377 """
378 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
379 galhtmlpostfix = """</div></body></html>\n"""
380
381 flist = os.listdir(self.opts.output_dir)
382 flist = [x for x in flist if x <> 'Rplots.pdf']
383 flist.sort()
384 html = []
385 html.append(galhtmlprefix % progname)
386 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
387 fhtml = []
388 if len(flist) > 0:
389 pdflist = []
390 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
391 nacross = 1
392 if npdf > 0:
393 nacross = int(round(math.log(npdf,2)))
394 nacross = max(1,nacross)
395 width = min(400,int(1200/nacross))
396 for rownum,fname in enumerate(flist):
397 dname,e = os.path.splitext(fname)
398 sfsize = self.getfSize(fname,self.opts.output_dir)
399 if e.lower() == '.pdf' : # compress and make a thumbnail
400 thumb = '%s.%s' % (dname,self.thumbformat)
401 pdff = os.path.join(self.opts.output_dir,fname)
402 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
403 if retval == 0:
404 pdflist.append((fname,thumb))
405 if (rownum+1) % 2 == 0:
406 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
407 else:
408 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
409 ntogo = nacross # counter for table row padding with empty cells
410 if len(pdflist) > 0:
411 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
412 for i,paths in enumerate(pdflist):
413 fname,thumb = paths
414 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
415 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
416 if ((i+1) % nacross == 0):
417 s += '</tr>\n'
418 ntogo = 0
419 if i < (npdf - 1): # more to come
420 s += '<tr>'
421 ntogo = nacross
422 else:
423 ntogo -= 1
424 html.append(s)
425 if html[-1].strip().endswith('</tr>'):
426 html.append('</table></div>\n')
427 else:
428 if ntogo > 0: # pad
429 html.append('<td>&nbsp;</td>'*ntogo)
430 html.append('</tr></table></div>\n')
431 if len(fhtml) > 0:
432 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
433 fhtml.append('</table></div><br/>')
434 html += fhtml # add all non-pdf files to the end of the display
435 else:
436 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
437 rlog = open(self.tlog,'r').readlines()
438 rlog = [x for x in rlog if x.strip() > '']
439 if len(rlog) > 1:
440 html.append('<div class="toolFormTitle">%s log</div><pre>\n' % self.opts.interpreter)
441 html += rlog
442 html.append('</pre>\n')
443 html.append(galhtmlattr % (self.toolname))
444 html.append(galhtmlpostfix)
445 htmlf = file(self.opts.output_html,'w')
446 htmlf.write('\n'.join(html))
447 htmlf.write('\n')
448 htmlf.close()
449 self.html = html
450
451
452 def run(self):
453 """
454 scripts must be small enough not to fill the pipe!
455 """
456 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
457 retval = self.runBash()
458 else:
459 if self.opts.output_dir:
460 sto = open(self.tlog,'w')
461 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
462 sto.flush()
463 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
464 else:
465 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
466 p.stdin.write(self.script)
467 p.stdin.close()
468 retval = p.wait()
469 if self.opts.output_dir:
470 sto.close()
471 if self.opts.make_HTML:
472 self.makeHtml()
473 os.unlink(self.sfile)
474 return retval
475
476 def runBash(self):
477 """
478 cannot use - for bash so use self.sfile
479 """
480 if self.opts.output_dir:
481 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
482 sto = open(self.tlog,'w')
483 sto.write(s)
484 sto.flush()
485 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
486 else:
487 p = subprocess.Popen(self.cl,shell=False)
488 retval = p.wait()
489 if self.opts.output_dir:
490 sto.close()
491 if self.opts.make_HTML:
492 self.makeHtml()
493 return retval
494
495
496 def main():
497 u = """
498 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
499 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
500 </command>
501 """
502 op = optparse.OptionParser()
503 a = op.add_option
504 a('--script_path',default=None)
505 a('--tool_name',default=None)
506 a('--interpreter',default=None)
507 a('--output_dir',default=None)
508 a('--output_html',default=None)
509 a('--input_tab',default="None")
510 a('--output_tab',default="None")
511 a('--user_email',default='Unknown')
512 a('--bad_user',default=None)
513 a('--make_Tool',default=None)
514 a('--make_HTML',default=None)
515 a('--help_text',default=None)
516 a('--tool_desc',default=None)
517 a('--new_tool',default=None)
518 a('--tool_version',default=None)
519 opts, args = op.parse_args()
520 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
521 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
522 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
523 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
524 if opts.output_dir:
525 try:
526 os.makedirs(opts.output_dir)
527 except:
528 pass
529 r = ScriptRunner(opts)
530 if opts.make_Tool:
531 retcode = r.makeTooltar()
532 else:
533 retcode = r.run()
534 if retcode:
535 sys.exit(retcode) # indicate failure to job runner
536
537
538 if __name__ == "__main__":
539 main()
540
541