comparison fubar-galaxytoolfactory-25646561839c/rgToolFactory.py @ 0:2686fd3d0112 draft

Uploaded from test toolshed
author fubar
date Sun, 08 Jul 2012 04:58:23 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2686fd3d0112
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
11 # It also serves as the wrapper for the new tool.
12 #
13 # you paste and run your script
14 # Only works for simple scripts that read one input from the history.
15 # Optionally can write one new history dataset,
16 # and optionally collect any number of outputs into links on an autogenerated HTML page.
17
18 # DO NOT install on a public or important site - please.
19
20 # installed generated tools are fine if the script is safe.
21 # They just run normally and their user cannot do anything unusually insecure
22 # but please, practice safe toolshed.
23 # Read the fucking code before you install any tool
24 # especially this one
25
26 # After you get the script working on some test data, you can
27 # optionally generate a toolshed compatible gzip file
28 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
29 # safe and largely automated installation in a production Galaxy.
30
31 # If you opt for an HTML output, you get all the script outputs arranged
32 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
33 # Ugly but really inexpensive.
34 #
35 # Patches appreciated please.
36 #
37 #
38 # long route to June 2012 product
39 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
40 # derived from an integrated script model
41 # called rgBaseScriptWrapper.py
42 # Note to the unwary:
43 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
44 # There is nothing stopping a malicious user doing whatever they choose
45 # Extremely dangerous!!
46 # Totally insecure. So, trusted users only
47 #
48 # preferred model is a developer using their throw away workstation instance - ie a private site.
49 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
50 #
51
52 import sys
53 import shutil
54 import subprocess
55 import os
56 import time
57 import tempfile
58 import optparse
59 import tarfile
60 import re
61 import shutil
62 import math
63
64 progname = os.path.split(sys.argv[0])[1]
65 myversion = 'V000.2 June 2012'
66 verbose = False
67 debug = False
68 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
69
70 def timenow():
71 """return current time as a string
72 """
73 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
74
75
76 class ScriptRunner:
77 """class is a wrapper for an arbitrary script
78 """
79
80 def __init__(self,opts=None):
81 """
82 cleanup inputs, setup some outputs
83
84 """
85 if opts.output_dir: # simplify for the tool tarball
86 os.chdir(opts.output_dir)
87 self.thumbformat = 'jpg'
88 self.opts = opts
89 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
90 self.toolid = self.toolname
91 s = open(self.opts.script_path,'r').readlines()
92 self.script = ''.join(s)
93 self.indentedScript = ''.join([' %s' % x for x in s]) # for restructured text in help
94 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
95 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
96 self.xmlfile = '%s.xml' % self.toolname
97 self.sfile = '%s.%s' % (self.toolname,opts.interpreter)
98 if opts.output_dir: # may not want these complexities
99 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
100 artifactpath = os.path.join(opts.output_dir,'%s_run.script' % self.toolname)
101 artifact = open(artifactpath,'w')
102 artifact.write(self.script)
103 artifact.write('\n')
104 artifact.close()
105 if opts.make_Tool: # need this code and the user script for the tarball
106 localscript = open(self.sfile,'w')
107 localscript.write(self.script)
108 localscript.close()
109 self.cl = []
110 self.html = []
111 a = self.cl.append
112 a(opts.interpreter)
113 a('-') # use stdin
114 a(opts.input_tab)
115 a(opts.output_tab)
116 self.outFormats = 'tabular' # TODO make this an option at tool generation time
117 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
118 self.test1Input = '%s_test1_input.xls' % self.toolname
119 self.test1Output = '%s_test1_output.xls' % self.toolname
120 self.test1HTML = '%s_test1_output.html' % self.toolname
121
122 def makeXML(self):
123 """
124 Create a Galaxy xml tool wrapper for the new script as a string to write out
125 fixme - use templating or something less fugly than this example of what we produce
126
127 <tool id="reverse" name="reverse" version="0.01">
128 <description>a tabular file</description>
129 <command interpreter="python">
130 reverse.py --script_path "$runMe" --interpreter "python"
131 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
132 </command>
133 <inputs>
134 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
135
136 </inputs>
137 <outputs>
138 <data format="tabular" name="tab_file" label="${job_name}"/>
139
140 </outputs>
141 <help>
142
143 **What it Does**
144
145 Reverse the columns in a tabular file
146
147 </help>
148 <configfiles>
149 <configfile name="runMe">
150
151 # reverse order of columns in a tabular file
152 import sys
153 inp = sys.argv[1]
154 outp = sys.argv[2]
155 i = open(inp,'r')
156 o = open(outp,'w')
157 for row in i:
158 rs = row.rstrip().split('\t')
159 rs.reverse()
160 o.write('\t'.join(rs))
161 o.write('\n')
162 i.close()
163 o.close()
164
165
166 </configfile>
167 </configfiles>
168 </tool>
169
170 """
171 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
172 %(tooldesc)s
173 %(command)s
174 <inputs>
175 %(inputs)s
176 </inputs>
177 <outputs>
178 %(outputs)s
179 </outputs>
180 <configfiles>
181 <configfile name="runMe">
182 %(script)s
183 </configfile>
184 </configfiles>
185 %(tooltests)s
186 <help>
187 %(help)s
188 </help>
189 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
190
191 newCommand="""<command interpreter="python">
192 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
193 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
194 </command>""" # may NOT be an input or htmlout
195 tooltestsTabOnly = """<tests><test>
196 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
197 <param name="job_name" value="test1"/>
198 <param name="runMe" value="$runMe"/>
199 <output name="tab_file" file="%(test1Output)s" ftype="tabular"/>
200 </test></tests>"""
201 tooltestsHTMLOnly = """<tests><test>
202 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
203 <param name="job_name" value="test1"/>
204 <param name="runMe" value="$runMe"/>
205 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
206 </test></tests>"""
207 tooltestsBoth = """<tests><test>
208 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
209 <param name="job_name" value="test1"/>
210 <param name="runMe" value="$runMe"/>
211 <output name="tab_file" file="%(test1Output)s" ftype="tabular" />
212 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
213 </test></tests>"""
214 xdict = {}
215 xdict['tool_version'] = self.opts.tool_version
216 xdict['test1Input'] = self.test1Input
217 xdict['test1HTML'] = self.test1HTML
218 xdict['test1Output'] = self.test1Output
219 if self.opts.make_HTML and self.opts.output_tab <> 'None':
220 xdict['tooltests'] = tooltestsBoth % xdict
221 elif self.opts.make_HTML:
222 xdict['tooltests'] = tooltestsHTMLOnly % xdict
223 else:
224 xdict['tooltests'] = tooltestsTabOnly % xdict
225 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
226 if self.opts.help_text:
227 xdict['help'] = open(self.opts.help_text,'r').read()
228 else:
229 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
230 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
231 coda.append(self.indentedScript)
232 coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.' % (self.opts.user_email,timenow()))
233 coda.append('See %s for details of that project' % (toolFactoryURL))
234 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
235 if self.opts.tool_desc:
236 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
237 else:
238 xdict['tooldesc'] = ''
239 xdict['command_outputs'] = ''
240 xdict['outputs'] = ''
241 if self.opts.input_tab <> 'None':
242 xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
243 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
244 else:
245 xdict['command_inputs'] = '' # assume no input - eg a random data generator
246 xdict['inputs'] = ''
247 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
248 xdict['toolname'] = self.toolname
249 xdict['toolid'] = self.toolid
250 xdict['interpreter'] = self.opts.interpreter
251 xdict['scriptname'] = self.sfile
252 if self.opts.make_HTML:
253 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
254 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
255 if self.opts.output_tab <> 'None':
256 xdict['command_outputs'] += ' --output_tab "$tab_file"'
257 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
258 xdict['command'] = newCommand % xdict
259 xmls = newXML % xdict
260 xf = open(self.xmlfile,'w')
261 xf.write(xmls)
262 xf.write('\n')
263 xf.close()
264 # ready for the tarball
265
266
267 def makeTooltar(self):
268 """
269 a tool is a gz tarball with eg
270 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
271 """
272 retval = self.run()
273 if retval:
274 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
275 sys.exit(1)
276 self.makeXML()
277 tdir = self.toolname
278 os.mkdir(tdir)
279 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
280 testdir = os.path.join(tdir,'test-data')
281 os.mkdir(testdir) # make tests directory
282 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
283 if self.opts.output_tab <> 'None':
284 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
285 if self.opts.make_HTML:
286 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
287 if self.opts.output_dir:
288 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
289 op = '%s.py' % self.toolname # new name
290 outpiname = os.path.join(tdir,op) # path for the tool tarball
291 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
292 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
293 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
294 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
295 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
296 notes += pi
297 outpi = open(outpiname,'w')
298 outpi.write(''.join(notes))
299 outpi.write('\n')
300 outpi.close()
301 shutil.copyfile(self.sfile,os.path.join(tdir,self.sfile))
302 shutil.copyfile(self.xmlfile,os.path.join(tdir,self.xmlfile))
303 tarpath = "%s.gz" % self.toolname
304 tar = tarfile.open(tarpath, "w:gz")
305 tar.add(tdir,arcname=self.toolname)
306 tar.close()
307 shutil.copyfile(tarpath,self.opts.new_tool)
308 shutil.rmtree(tdir)
309 ## TODO: replace with optional direct upload to local toolshed?
310 return retval
311
312 def compressPDF(self,inpdf=None,thumbformat='png'):
313 """need absolute path to pdf
314 """
315 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
316 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
317 sto = open(hlog,'w')
318 outpdf = '%s_compressed' % inpdf
319 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
320 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
321 retval1 = x.wait()
322 if retval1 == 0:
323 os.unlink(inpdf)
324 shutil.move(outpdf,inpdf)
325 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
326 cl2 = ['convert', inpdf, outpng]
327 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
328 retval2 = x.wait()
329 sto.close()
330 retval = retval1 or retval2
331 return retval
332
333
334 def getfSize(self,fpath,outpath):
335 """
336 format a nice file size string
337 """
338 size = ''
339 fp = os.path.join(outpath,fpath)
340 if os.path.isfile(fp):
341 size = '0 B'
342 n = float(os.path.getsize(fp))
343 if n > 2**20:
344 size = '%1.1f MB' % (n/2**20)
345 elif n > 2**10:
346 size = '%1.1f KB)' % (n/2**10)
347 elif n > 0:
348 size = '%d B' % (int(n))
349 return size
350
351 def makeHtml(self):
352 """ Create an HTML file content to list all the artefacts found in the output_dir
353 """
354
355 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
356 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
357 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
358 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
359 <title></title>
360 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
361 </head>
362 <body>
363 <div class="toolFormBody">
364 """
365 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
366 galhtmlpostfix = """</div></body></html>\n"""
367
368 flist = os.listdir(self.opts.output_dir)
369 flist = [x for x in flist if x <> 'Rplots.pdf']
370 flist.sort()
371 html = []
372 html.append(galhtmlprefix % progname)
373 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
374 fhtml = []
375 if len(flist) > 0:
376 pdflist = []
377 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
378 nacross = 1
379 if npdf > 0:
380 nacross = int(round(math.log(npdf,2)))
381 nacross = max(1,nacross)
382 width = min(400,int(1200/nacross))
383 for rownum,fname in enumerate(flist):
384 dname,e = os.path.splitext(fname)
385 sfsize = self.getfSize(fname,self.opts.output_dir)
386 if e.lower() == '.pdf' : # compress and make a thumbnail
387 thumb = '%s.%s' % (dname,self.thumbformat)
388 pdff = os.path.join(self.opts.output_dir,fname)
389 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
390 if retval == 0:
391 pdflist.append((fname,thumb))
392 if (rownum+1) % 2 == 0:
393 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
394 else:
395 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
396 ntogo = nacross # counter for table row padding with empty cells
397 if len(pdflist) > 0:
398 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
399 for i,paths in enumerate(pdflist):
400 fname,thumb = paths
401 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
402 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
403 if ((i+1) % nacross == 0):
404 s += '</tr>\n'
405 ntogo = 0
406 if i < (npdf - 1): # more to come
407 s += '<tr>'
408 ntogo = nacross
409 else:
410 ntogo -= 1
411 html.append(s)
412 if html[-1].strip().endswith('</tr>'):
413 html.append('</table></div>\n')
414 else:
415 if ntogo > 0: # pad
416 html.append('<td>&nbsp;</td>'*ntogo)
417 html.append('</tr></table></div>\n')
418 if len(fhtml) > 0:
419 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
420 fhtml.append('</table></div><br/>')
421 html += fhtml # add all non-pdf files to the end of the display
422 else:
423 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
424 rlog = open(self.tlog,'r').readlines()
425 rlog = [x for x in rlog if x.strip() > '']
426 if len(rlog) > 1:
427 html.append('<div class="toolFormTitle">%s log</div><pre>\n' % self.opts.interpreter)
428 html += rlog
429 html.append('</pre>\n')
430 html.append(galhtmlattr % (self.toolname))
431 html.append(galhtmlpostfix)
432 htmlf = file(self.opts.output_html,'w')
433 htmlf.write('\n'.join(html))
434 htmlf.write('\n')
435 htmlf.close()
436 self.html = html
437
438
439 def run(self):
440 """
441 """
442 if self.opts.output_dir:
443 sto = open(self.tlog,'w')
444 p = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
445 else:
446 p = subprocess.Popen(' '.join(self.cl),shell=True,stdin=subprocess.PIPE)
447 p.stdin.write(self.script)
448 p.stdin.close()
449 retval = p.wait()
450 if self.opts.output_dir:
451 sto.close()
452 if self.opts.make_HTML:
453 self.makeHtml()
454 return retval
455
456
457 def main():
458 u = """
459 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
460 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
461 </command>
462 """
463 op = optparse.OptionParser()
464 a = op.add_option
465 a('--script_path',default=None)
466 a('--tool_name',default=None)
467 a('--interpreter',default=None)
468 a('--output_dir',default=None)
469 a('--output_html',default=None)
470 a('--input_tab',default="None")
471 a('--output_tab',default="None")
472 a('--user_email',default='Unknown')
473 a('--bad_user',default=None)
474 a('--make_Tool',default=None)
475 a('--make_HTML',default=None)
476 a('--help_text',default=None)
477 a('--tool_desc',default=None)
478 a('--new_tool',default=None)
479 a('--tool_version',default=None)
480 opts, args = op.parse_args()
481 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
482 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
483 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
484 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
485 if opts.output_dir:
486 try:
487 os.makedirs(opts.output_dir)
488 except:
489 pass
490 r = ScriptRunner(opts)
491 if opts.make_Tool:
492 retcode = r.makeTooltar()
493 else:
494 retcode = r.run()
495 if retcode:
496 sys.exit(retcode) # indicate failure to job runner
497
498
499 if __name__ == "__main__":
500 main()
501
502