comparison scriptrunner.py @ 0:21d312776891 draft

planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit 30f8264cdd67d40dec8acde6407f32152e6a29c1-dirty
author mvdbeek
date Sat, 09 Jul 2016 16:57:13 -0400
parents
children 495946ffc2d6
comparison
equal deleted inserted replaced
-1:000000000000 0:21d312776891
1 # DockerToolFactory.py
2 # see https://github.com/mvdbeek/scriptrunner
3
4 import sys
5 import shutil
6 import subprocess
7 import os
8 import time
9 import tempfile
10 import argparse
11 import getpass
12 import tarfile
13 import re
14 import shutil
15 import math
16 import fileinput
17 from os.path import abspath
18
19
20 progname = os.path.split(sys.argv[0])[1]
21 verbose = False
22 debug = False
23
24 def timenow():
25 """return current time as a string
26 """
27 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
28
29 html_escape_table = {
30 "&": "&",
31 ">": ">",
32 "<": "&lt;",
33 "$": "\$"
34 }
35
36 def html_escape(text):
37 """Produce entities within text."""
38 return "".join(html_escape_table.get(c,c) for c in text)
39
40 def cmd_exists(cmd):
41 return subprocess.call("type " + cmd, shell=True,
42 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
43
44 def construct_bind(host_path, container_path=False, binds=None, ro=True):
45 #TODO remove container_path if it's alwyas going to be the same as host_path
46 '''build or extend binds dictionary with container path. binds is used
47 to mount all files using the docker-py client.'''
48 if not binds:
49 binds={}
50 if isinstance(host_path, list):
51 for k,v in enumerate(host_path):
52 if not container_path:
53 container_path=host_path[k]
54 binds[host_path[k]]={'bind':container_path, 'ro':ro}
55 container_path=False #could be more elegant
56 return binds
57 else:
58 if not container_path:
59 container_path=host_path
60 binds[host_path]={'bind':container_path, 'ro':ro}
61 return binds
62
63 def switch_to_docker(opts):
64 import docker #need local import, as container does not have docker-py
65 user_id = os.getuid()
66 group_id = os.getgid()
67 docker_client=docker.Client()
68 toolfactory_path=abspath(sys.argv[0])
69 binds=construct_bind(host_path=opts.script_path, ro=False)
70 binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False)
71 if len(opts.input_tab)>0:
72 binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True)
73 if not opts.output_tab == 'None':
74 binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False)
75 if opts.make_HTML:
76 binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False)
77 binds=construct_bind(binds=binds, host_path=toolfactory_path)
78 volumes=binds.keys()
79 sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir
80 cmd=['python', '-u']+sys.argv+['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)]
81 image_exists = [ True for image in docker_client.images() if opts.docker_image in image['RepoTags'] ]
82 if not image_exists:
83 docker_client.pull(opts.docker_image)
84 container=docker_client.create_container(
85 image=opts.docker_image,
86 volumes=volumes,
87 command=cmd
88 )
89 docker_client.start(container=container[u'Id'], binds=binds)
90 docker_client.wait(container=container[u'Id'])
91 logs=docker_client.logs(container=container[u'Id'])
92 print "".join([log for log in logs])
93 docker_client.remove_container(container[u'Id'])
94
95 class ScriptRunner:
96 """class is a wrapper for an arbitrary script
97 """
98
99 def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'):
100 """
101 cleanup inputs, setup some outputs
102
103 """
104 self.opts = opts
105 self.scriptname = 'script'
106 self.useIM = cmd_exists('convert')
107 self.useGS = cmd_exists('gs')
108 self.temp_warned = False # we want only one warning if $TMP not set
109 self.treatbashSpecial = treatbashSpecial
110 self.image_tag = image_tag
111 os.chdir(abspath(opts.output_dir))
112 self.thumbformat = 'png'
113 s = open(self.opts.script_path,'r').readlines()
114 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
115 self.script = '\n'.join(s)
116 fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter))
117 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
118 tscript.write(self.script)
119 tscript.close()
120 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
121 self.escapedScript = '\n'.join([html_escape(x) for x in s])
122 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname)
123 if opts.output_dir: # may not want these complexities
124 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname)
125 art = '%s.%s' % (self.scriptname,opts.interpreter)
126 artpath = os.path.join(self.opts.output_dir,art) # need full path
127 artifact = open(artpath,'w') # use self.sfile as script source for Popen
128 artifact.write(self.script)
129 artifact.close()
130 self.cl = []
131 self.html = []
132 a = self.cl.append
133 a(opts.interpreter)
134 if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
135 a(self.sfile)
136 else:
137 a('-') # stdin
138 for input in opts.input_tab:
139 a(input)
140 if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname
141 a(str(self.scriptname)+'.out')
142 a(opts.output_tab)
143 for param in opts.additional_parameters:
144 param, value=param.split(',')
145 a('--'+param)
146 a(value)
147 self.outFormats = opts.output_format
148 self.inputFormats = [formats for formats in opts.input_formats]
149 self.test1Input = '%s_test1_input.xls' % self.scriptname
150 self.test1Output = '%s_test1_output.xls' % self.scriptname
151 self.test1HTML = '%s_test1_output.html' % self.scriptname
152
153
154 def compressPDF(self,inpdf=None,thumbformat='png'):
155 """need absolute path to pdf
156 note that GS gets confoozled if no $TMP or $TEMP
157 so we set it
158 """
159 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
160 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
161 sto = open(hlog,'a')
162 our_env = os.environ.copy()
163 our_tmp = our_env.get('TMP',None)
164 if not our_tmp:
165 our_tmp = our_env.get('TEMP',None)
166 if not (our_tmp and os.path.exists(our_tmp)):
167 newtmp = os.path.join(self.opts.output_dir,'tmp')
168 try:
169 os.mkdir(newtmp)
170 except:
171 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
172 our_env['TEMP'] = newtmp
173 if not self.temp_warned:
174 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
175 self.temp_warned = True
176 outpdf = '%s_compressed' % inpdf
177 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
178 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
179 retval1 = x.wait()
180 sto.close()
181 if retval1 == 0:
182 os.unlink(inpdf)
183 shutil.move(outpdf,inpdf)
184 os.unlink(hlog)
185 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
186 sto = open(hlog,'w')
187 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
188 cl2 = ['convert', inpdf, outpng]
189 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
190 retval2 = x.wait()
191 sto.close()
192 if retval2 == 0:
193 os.unlink(hlog)
194 retval = retval1 or retval2
195 return retval
196
197
198 def getfSize(self,fpath,outpath):
199 """
200 format a nice file size string
201 """
202 size = ''
203 fp = os.path.join(outpath,fpath)
204 if os.path.isfile(fp):
205 size = '0 B'
206 n = float(os.path.getsize(fp))
207 if n > 2**20:
208 size = '%1.1f MB' % (n/2**20)
209 elif n > 2**10:
210 size = '%1.1f KB' % (n/2**10)
211 elif n > 0:
212 size = '%d B' % (int(n))
213 return size
214
215 def makeHtml(self):
216 """ Create an HTML file content to list all the artifacts found in the output_dir
217 """
218
219 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
220 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
221 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
222 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
223 <title></title>
224 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
225 </head>
226 <body>
227 <div class="toolFormBody">
228 """
229 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
230 galhtmlpostfix = """</div></body></html>\n"""
231
232 flist = os.listdir(self.opts.output_dir)
233 flist = [x for x in flist if x <> 'Rplots.pdf']
234 flist.sort()
235 html = []
236 html.append(galhtmlprefix % progname)
237 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow()))
238 fhtml = []
239 if len(flist) > 0:
240 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
241 logfiles.sort()
242 logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)]
243 logfiles.append(abspath(self.tlog)) # make it the last one
244 pdflist = []
245 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
246 for rownum,fname in enumerate(flist):
247 dname,e = os.path.splitext(fname)
248 sfsize = self.getfSize(fname,self.opts.output_dir)
249 if e.lower() == '.pdf' : # compress and make a thumbnail
250 thumb = '%s.%s' % (dname,self.thumbformat)
251 pdff = os.path.join(self.opts.output_dir,fname)
252 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
253 if retval == 0:
254 pdflist.append((fname,thumb))
255 else:
256 pdflist.append((fname,fname))
257 if (rownum+1) % 2 == 0:
258 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
259 else:
260 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
261 for logfname in logfiles: # expect at least tlog - if more
262 if abspath(logfname) == abspath(self.tlog): # handled later
263 sectionname = 'All tool run'
264 if (len(logfiles) > 1):
265 sectionname = 'Other'
266 ourpdfs = pdflist
267 else:
268 realname = os.path.basename(logfname)
269 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
270 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
271 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
272 nacross = 1
273 npdf = len(ourpdfs)
274
275 if npdf > 0:
276 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
277 if int(nacross)**2 != npdf:
278 nacross += 1
279 nacross = int(nacross)
280 width = min(400,int(1200/nacross))
281 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
282 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
283 ntogo = nacross # counter for table row padding with empty cells
284 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
285 for i,paths in enumerate(ourpdfs):
286 fname,thumb = paths
287 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
288 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
289 if ((i+1) % nacross == 0):
290 s += '</tr>\n'
291 ntogo = 0
292 if i < (npdf - 1): # more to come
293 s += '<tr>'
294 ntogo = nacross
295 else:
296 ntogo -= 1
297 html.append(s)
298 if html[-1].strip().endswith('</tr>'):
299 html.append('</table></div>\n')
300 else:
301 if ntogo > 0: # pad
302 html.append('<td>&nbsp;</td>'*ntogo)
303 html.append('</tr></table></div>\n')
304 logt = open(logfname,'r').readlines()
305 logtext = [x for x in logt if x.strip() > '']
306 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
307 if len(logtext) > 1:
308 html.append('\n<pre>\n')
309 html += logtext
310 html.append('\n</pre>\n')
311 else:
312 html.append('%s is empty<br/>' % logfname)
313 if len(fhtml) > 0:
314 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
315 fhtml.append('</table></div><br/>')
316 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
317 html += fhtml # add all non-pdf files to the end of the display
318 else:
319 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
320 html.append(galhtmlpostfix)
321 htmlf = file(self.opts.output_html,'w')
322 htmlf.write('\n'.join(html))
323 htmlf.write('\n')
324 htmlf.close()
325 self.html = html
326
327
328 def run(self):
329 """
330 scripts must be small enough not to fill the pipe!
331 """
332 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
333 retval = self.runBash()
334 else:
335 if self.opts.output_dir:
336 ste = open(self.elog,'w')
337 sto = open(self.tlog,'w')
338 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
339 sto.flush()
340 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
341 else:
342 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
343 p.stdin.write(self.script)
344 p.stdin.close()
345 retval = p.wait()
346 if self.opts.output_dir:
347 sto.close()
348 ste.close()
349 err = open(self.elog,'r').readlines()
350 if retval <> 0 and err: # problem
351 print >> sys.stderr,err #same problem, need to capture docker stdin/stdout
352 if self.opts.make_HTML:
353 self.makeHtml()
354 return retval
355
356 def runBash(self):
357 """
358 cannot use - for bash so use self.sfile
359 """
360 if self.opts.output_dir:
361 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
362 sto = open(self.tlog,'w')
363 sto.write(s)
364 sto.flush()
365 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
366 else:
367 p = subprocess.Popen(self.cl,shell=False)
368 retval = p.wait()
369 if self.opts.output_dir:
370 sto.close()
371 if self.opts.make_HTML:
372 self.makeHtml()
373 return retval
374
375
376 def change_user_id(new_uid, new_gid):
377 """
378 To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container
379 to the user id with which the script has been called initially.
380 """
381 cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"]
382 cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"]
383 cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"]
384 cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"]
385 [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]]
386
387
388 def main():
389 u = """
390 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
391 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
392 </command>
393 """
394 op = argparse.ArgumentParser()
395 a = op.add_argument
396 a('--docker_image',default=None)
397 a('--script_path',default=None)
398 a('--tool_name',default=None)
399 a('--interpreter',default=None)
400 a('--output_dir',default='./')
401 a('--output_html',default=None)
402 a('--input_tab',default='None', nargs='*')
403 a('--output_tab',default='None')
404 a('--user_email',default='Unknown')
405 a('--bad_user',default=None)
406 a('--make_HTML',default=None)
407 a('--new_tool',default=None)
408 a('--dockerized',default=0)
409 a('--group_id',default=None)
410 a('--user_id',default=None)
411 a('--output_format', default='tabular')
412 a('--input_format', dest='input_formats', action='append', default=[])
413 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
414 opts = op.parse_args()
415 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
416 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
417 if opts.output_dir:
418 try:
419 os.makedirs(opts.output_dir)
420 except:
421 pass
422 if opts.dockerized==0:
423 switch_to_docker(opts)
424 return
425 change_user_id(opts.user_id, opts.group_id)
426 os.setgid(int(opts.group_id))
427 os.setuid(int(opts.user_id))
428 r = ScriptRunner(opts)
429 retcode = r.run()
430 os.unlink(r.sfile)
431 if retcode:
432 sys.exit(retcode) # indicate failure to job runner
433
434
435 if __name__ == "__main__":
436 main()