Mercurial > repos > mvdbeek > docker_scriptrunner
comparison scriptrunner.py @ 0:21d312776891 draft
planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit 30f8264cdd67d40dec8acde6407f32152e6a29c1-dirty
author | mvdbeek |
---|---|
date | Sat, 09 Jul 2016 16:57:13 -0400 |
parents | |
children | 495946ffc2d6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21d312776891 |
---|---|
1 # DockerToolFactory.py | |
2 # see https://github.com/mvdbeek/scriptrunner | |
3 | |
4 import sys | |
5 import shutil | |
6 import subprocess | |
7 import os | |
8 import time | |
9 import tempfile | |
10 import argparse | |
11 import getpass | |
12 import tarfile | |
13 import re | |
14 import shutil | |
15 import math | |
16 import fileinput | |
17 from os.path import abspath | |
18 | |
19 | |
20 progname = os.path.split(sys.argv[0])[1] | |
21 verbose = False | |
22 debug = False | |
23 | |
24 def timenow(): | |
25 """return current time as a string | |
26 """ | |
27 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | |
28 | |
29 html_escape_table = { | |
30 "&": "&", | |
31 ">": ">", | |
32 "<": "<", | |
33 "$": "\$" | |
34 } | |
35 | |
36 def html_escape(text): | |
37 """Produce entities within text.""" | |
38 return "".join(html_escape_table.get(c,c) for c in text) | |
39 | |
40 def cmd_exists(cmd): | |
41 return subprocess.call("type " + cmd, shell=True, | |
42 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 | |
43 | |
44 def construct_bind(host_path, container_path=False, binds=None, ro=True): | |
45 #TODO remove container_path if it's alwyas going to be the same as host_path | |
46 '''build or extend binds dictionary with container path. binds is used | |
47 to mount all files using the docker-py client.''' | |
48 if not binds: | |
49 binds={} | |
50 if isinstance(host_path, list): | |
51 for k,v in enumerate(host_path): | |
52 if not container_path: | |
53 container_path=host_path[k] | |
54 binds[host_path[k]]={'bind':container_path, 'ro':ro} | |
55 container_path=False #could be more elegant | |
56 return binds | |
57 else: | |
58 if not container_path: | |
59 container_path=host_path | |
60 binds[host_path]={'bind':container_path, 'ro':ro} | |
61 return binds | |
62 | |
63 def switch_to_docker(opts): | |
64 import docker #need local import, as container does not have docker-py | |
65 user_id = os.getuid() | |
66 group_id = os.getgid() | |
67 docker_client=docker.Client() | |
68 toolfactory_path=abspath(sys.argv[0]) | |
69 binds=construct_bind(host_path=opts.script_path, ro=False) | |
70 binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False) | |
71 if len(opts.input_tab)>0: | |
72 binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True) | |
73 if not opts.output_tab == 'None': | |
74 binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False) | |
75 if opts.make_HTML: | |
76 binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False) | |
77 binds=construct_bind(binds=binds, host_path=toolfactory_path) | |
78 volumes=binds.keys() | |
79 sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir | |
80 cmd=['python', '-u']+sys.argv+['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)] | |
81 image_exists = [ True for image in docker_client.images() if opts.docker_image in image['RepoTags'] ] | |
82 if not image_exists: | |
83 docker_client.pull(opts.docker_image) | |
84 container=docker_client.create_container( | |
85 image=opts.docker_image, | |
86 volumes=volumes, | |
87 command=cmd | |
88 ) | |
89 docker_client.start(container=container[u'Id'], binds=binds) | |
90 docker_client.wait(container=container[u'Id']) | |
91 logs=docker_client.logs(container=container[u'Id']) | |
92 print "".join([log for log in logs]) | |
93 docker_client.remove_container(container[u'Id']) | |
94 | |
95 class ScriptRunner: | |
96 """class is a wrapper for an arbitrary script | |
97 """ | |
98 | |
99 def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'): | |
100 """ | |
101 cleanup inputs, setup some outputs | |
102 | |
103 """ | |
104 self.opts = opts | |
105 self.scriptname = 'script' | |
106 self.useIM = cmd_exists('convert') | |
107 self.useGS = cmd_exists('gs') | |
108 self.temp_warned = False # we want only one warning if $TMP not set | |
109 self.treatbashSpecial = treatbashSpecial | |
110 self.image_tag = image_tag | |
111 os.chdir(abspath(opts.output_dir)) | |
112 self.thumbformat = 'png' | |
113 s = open(self.opts.script_path,'r').readlines() | |
114 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed | |
115 self.script = '\n'.join(s) | |
116 fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter)) | |
117 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen | |
118 tscript.write(self.script) | |
119 tscript.close() | |
120 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help | |
121 self.escapedScript = '\n'.join([html_escape(x) for x in s]) | |
122 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname) | |
123 if opts.output_dir: # may not want these complexities | |
124 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname) | |
125 art = '%s.%s' % (self.scriptname,opts.interpreter) | |
126 artpath = os.path.join(self.opts.output_dir,art) # need full path | |
127 artifact = open(artpath,'w') # use self.sfile as script source for Popen | |
128 artifact.write(self.script) | |
129 artifact.close() | |
130 self.cl = [] | |
131 self.html = [] | |
132 a = self.cl.append | |
133 a(opts.interpreter) | |
134 if self.treatbashSpecial and opts.interpreter in ['bash','sh']: | |
135 a(self.sfile) | |
136 else: | |
137 a('-') # stdin | |
138 for input in opts.input_tab: | |
139 a(input) | |
140 if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname | |
141 a(str(self.scriptname)+'.out') | |
142 a(opts.output_tab) | |
143 for param in opts.additional_parameters: | |
144 param, value=param.split(',') | |
145 a('--'+param) | |
146 a(value) | |
147 self.outFormats = opts.output_format | |
148 self.inputFormats = [formats for formats in opts.input_formats] | |
149 self.test1Input = '%s_test1_input.xls' % self.scriptname | |
150 self.test1Output = '%s_test1_output.xls' % self.scriptname | |
151 self.test1HTML = '%s_test1_output.html' % self.scriptname | |
152 | |
153 | |
154 def compressPDF(self,inpdf=None,thumbformat='png'): | |
155 """need absolute path to pdf | |
156 note that GS gets confoozled if no $TMP or $TEMP | |
157 so we set it | |
158 """ | |
159 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) | |
160 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) | |
161 sto = open(hlog,'a') | |
162 our_env = os.environ.copy() | |
163 our_tmp = our_env.get('TMP',None) | |
164 if not our_tmp: | |
165 our_tmp = our_env.get('TEMP',None) | |
166 if not (our_tmp and os.path.exists(our_tmp)): | |
167 newtmp = os.path.join(self.opts.output_dir,'tmp') | |
168 try: | |
169 os.mkdir(newtmp) | |
170 except: | |
171 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) | |
172 our_env['TEMP'] = newtmp | |
173 if not self.temp_warned: | |
174 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) | |
175 self.temp_warned = True | |
176 outpdf = '%s_compressed' % inpdf | |
177 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] | |
178 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
179 retval1 = x.wait() | |
180 sto.close() | |
181 if retval1 == 0: | |
182 os.unlink(inpdf) | |
183 shutil.move(outpdf,inpdf) | |
184 os.unlink(hlog) | |
185 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) | |
186 sto = open(hlog,'w') | |
187 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) | |
188 cl2 = ['convert', inpdf, outpng] | |
189 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
190 retval2 = x.wait() | |
191 sto.close() | |
192 if retval2 == 0: | |
193 os.unlink(hlog) | |
194 retval = retval1 or retval2 | |
195 return retval | |
196 | |
197 | |
198 def getfSize(self,fpath,outpath): | |
199 """ | |
200 format a nice file size string | |
201 """ | |
202 size = '' | |
203 fp = os.path.join(outpath,fpath) | |
204 if os.path.isfile(fp): | |
205 size = '0 B' | |
206 n = float(os.path.getsize(fp)) | |
207 if n > 2**20: | |
208 size = '%1.1f MB' % (n/2**20) | |
209 elif n > 2**10: | |
210 size = '%1.1f KB' % (n/2**10) | |
211 elif n > 0: | |
212 size = '%d B' % (int(n)) | |
213 return size | |
214 | |
215 def makeHtml(self): | |
216 """ Create an HTML file content to list all the artifacts found in the output_dir | |
217 """ | |
218 | |
219 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
220 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
221 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
222 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> | |
223 <title></title> | |
224 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> | |
225 </head> | |
226 <body> | |
227 <div class="toolFormBody"> | |
228 """ | |
229 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" | |
230 galhtmlpostfix = """</div></body></html>\n""" | |
231 | |
232 flist = os.listdir(self.opts.output_dir) | |
233 flist = [x for x in flist if x <> 'Rplots.pdf'] | |
234 flist.sort() | |
235 html = [] | |
236 html.append(galhtmlprefix % progname) | |
237 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow())) | |
238 fhtml = [] | |
239 if len(flist) > 0: | |
240 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections | |
241 logfiles.sort() | |
242 logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)] | |
243 logfiles.append(abspath(self.tlog)) # make it the last one | |
244 pdflist = [] | |
245 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) | |
246 for rownum,fname in enumerate(flist): | |
247 dname,e = os.path.splitext(fname) | |
248 sfsize = self.getfSize(fname,self.opts.output_dir) | |
249 if e.lower() == '.pdf' : # compress and make a thumbnail | |
250 thumb = '%s.%s' % (dname,self.thumbformat) | |
251 pdff = os.path.join(self.opts.output_dir,fname) | |
252 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) | |
253 if retval == 0: | |
254 pdflist.append((fname,thumb)) | |
255 else: | |
256 pdflist.append((fname,fname)) | |
257 if (rownum+1) % 2 == 0: | |
258 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
259 else: | |
260 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
261 for logfname in logfiles: # expect at least tlog - if more | |
262 if abspath(logfname) == abspath(self.tlog): # handled later | |
263 sectionname = 'All tool run' | |
264 if (len(logfiles) > 1): | |
265 sectionname = 'Other' | |
266 ourpdfs = pdflist | |
267 else: | |
268 realname = os.path.basename(logfname) | |
269 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log | |
270 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] | |
271 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove | |
272 nacross = 1 | |
273 npdf = len(ourpdfs) | |
274 | |
275 if npdf > 0: | |
276 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) | |
277 if int(nacross)**2 != npdf: | |
278 nacross += 1 | |
279 nacross = int(nacross) | |
280 width = min(400,int(1200/nacross)) | |
281 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) | |
282 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') | |
283 ntogo = nacross # counter for table row padding with empty cells | |
284 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') | |
285 for i,paths in enumerate(ourpdfs): | |
286 fname,thumb = paths | |
287 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" | |
288 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) | |
289 if ((i+1) % nacross == 0): | |
290 s += '</tr>\n' | |
291 ntogo = 0 | |
292 if i < (npdf - 1): # more to come | |
293 s += '<tr>' | |
294 ntogo = nacross | |
295 else: | |
296 ntogo -= 1 | |
297 html.append(s) | |
298 if html[-1].strip().endswith('</tr>'): | |
299 html.append('</table></div>\n') | |
300 else: | |
301 if ntogo > 0: # pad | |
302 html.append('<td> </td>'*ntogo) | |
303 html.append('</tr></table></div>\n') | |
304 logt = open(logfname,'r').readlines() | |
305 logtext = [x for x in logt if x.strip() > ''] | |
306 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) | |
307 if len(logtext) > 1: | |
308 html.append('\n<pre>\n') | |
309 html += logtext | |
310 html.append('\n</pre>\n') | |
311 else: | |
312 html.append('%s is empty<br/>' % logfname) | |
313 if len(fhtml) > 0: | |
314 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') | |
315 fhtml.append('</table></div><br/>') | |
316 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') | |
317 html += fhtml # add all non-pdf files to the end of the display | |
318 else: | |
319 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) | |
320 html.append(galhtmlpostfix) | |
321 htmlf = file(self.opts.output_html,'w') | |
322 htmlf.write('\n'.join(html)) | |
323 htmlf.write('\n') | |
324 htmlf.close() | |
325 self.html = html | |
326 | |
327 | |
328 def run(self): | |
329 """ | |
330 scripts must be small enough not to fill the pipe! | |
331 """ | |
332 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: | |
333 retval = self.runBash() | |
334 else: | |
335 if self.opts.output_dir: | |
336 ste = open(self.elog,'w') | |
337 sto = open(self.tlog,'w') | |
338 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) | |
339 sto.flush() | |
340 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) | |
341 else: | |
342 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) | |
343 p.stdin.write(self.script) | |
344 p.stdin.close() | |
345 retval = p.wait() | |
346 if self.opts.output_dir: | |
347 sto.close() | |
348 ste.close() | |
349 err = open(self.elog,'r').readlines() | |
350 if retval <> 0 and err: # problem | |
351 print >> sys.stderr,err #same problem, need to capture docker stdin/stdout | |
352 if self.opts.make_HTML: | |
353 self.makeHtml() | |
354 return retval | |
355 | |
356 def runBash(self): | |
357 """ | |
358 cannot use - for bash so use self.sfile | |
359 """ | |
360 if self.opts.output_dir: | |
361 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) | |
362 sto = open(self.tlog,'w') | |
363 sto.write(s) | |
364 sto.flush() | |
365 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) | |
366 else: | |
367 p = subprocess.Popen(self.cl,shell=False) | |
368 retval = p.wait() | |
369 if self.opts.output_dir: | |
370 sto.close() | |
371 if self.opts.make_HTML: | |
372 self.makeHtml() | |
373 return retval | |
374 | |
375 | |
376 def change_user_id(new_uid, new_gid): | |
377 """ | |
378 To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container | |
379 to the user id with which the script has been called initially. | |
380 """ | |
381 cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"] | |
382 cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"] | |
383 cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"] | |
384 cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"] | |
385 [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]] | |
386 | |
387 | |
388 def main(): | |
389 u = """ | |
390 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: | |
391 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" | |
392 </command> | |
393 """ | |
394 op = argparse.ArgumentParser() | |
395 a = op.add_argument | |
396 a('--docker_image',default=None) | |
397 a('--script_path',default=None) | |
398 a('--tool_name',default=None) | |
399 a('--interpreter',default=None) | |
400 a('--output_dir',default='./') | |
401 a('--output_html',default=None) | |
402 a('--input_tab',default='None', nargs='*') | |
403 a('--output_tab',default='None') | |
404 a('--user_email',default='Unknown') | |
405 a('--bad_user',default=None) | |
406 a('--make_HTML',default=None) | |
407 a('--new_tool',default=None) | |
408 a('--dockerized',default=0) | |
409 a('--group_id',default=None) | |
410 a('--user_id',default=None) | |
411 a('--output_format', default='tabular') | |
412 a('--input_format', dest='input_formats', action='append', default=[]) | |
413 a('--additional_parameters', dest='additional_parameters', action='append', default=[]) | |
414 opts = op.parse_args() | |
415 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) | |
416 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' | |
417 if opts.output_dir: | |
418 try: | |
419 os.makedirs(opts.output_dir) | |
420 except: | |
421 pass | |
422 if opts.dockerized==0: | |
423 switch_to_docker(opts) | |
424 return | |
425 change_user_id(opts.user_id, opts.group_id) | |
426 os.setgid(int(opts.group_id)) | |
427 os.setuid(int(opts.user_id)) | |
428 r = ScriptRunner(opts) | |
429 retcode = r.run() | |
430 os.unlink(r.sfile) | |
431 if retcode: | |
432 sys.exit(retcode) # indicate failure to job runner | |
433 | |
434 | |
435 if __name__ == "__main__": | |
436 main() |