Mercurial > repos > mvdbeek > docker_scriptrunner
diff scriptrunner.py @ 0:21d312776891 draft
planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit 30f8264cdd67d40dec8acde6407f32152e6a29c1-dirty
author | mvdbeek |
---|---|
date | Sat, 09 Jul 2016 16:57:13 -0400 |
parents | |
children | 495946ffc2d6 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scriptrunner.py Sat Jul 09 16:57:13 2016 -0400 @@ -0,0 +1,436 @@ +# DockerToolFactory.py +# see https://github.com/mvdbeek/scriptrunner + +import sys +import shutil +import subprocess +import os +import time +import tempfile +import argparse +import getpass +import tarfile +import re +import shutil +import math +import fileinput +from os.path import abspath + + +progname = os.path.split(sys.argv[0])[1] +verbose = False +debug = False + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +html_escape_table = { + "&": "&", + ">": ">", + "<": "<", + "$": "\$" + } + +def html_escape(text): + """Produce entities within text.""" + return "".join(html_escape_table.get(c,c) for c in text) + +def cmd_exists(cmd): + return subprocess.call("type " + cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 + +def construct_bind(host_path, container_path=False, binds=None, ro=True): + #TODO remove container_path if it's alwyas going to be the same as host_path + '''build or extend binds dictionary with container path. binds is used + to mount all files using the docker-py client.''' + if not binds: + binds={} + if isinstance(host_path, list): + for k,v in enumerate(host_path): + if not container_path: + container_path=host_path[k] + binds[host_path[k]]={'bind':container_path, 'ro':ro} + container_path=False #could be more elegant + return binds + else: + if not container_path: + container_path=host_path + binds[host_path]={'bind':container_path, 'ro':ro} + return binds + +def switch_to_docker(opts): + import docker #need local import, as container does not have docker-py + user_id = os.getuid() + group_id = os.getgid() + docker_client=docker.Client() + toolfactory_path=abspath(sys.argv[0]) + binds=construct_bind(host_path=opts.script_path, ro=False) + binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False) + if len(opts.input_tab)>0: + binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True) + if not opts.output_tab == 'None': + binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False) + if opts.make_HTML: + binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False) + binds=construct_bind(binds=binds, host_path=toolfactory_path) + volumes=binds.keys() + sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir + cmd=['python', '-u']+sys.argv+['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)] + image_exists = [ True for image in docker_client.images() if opts.docker_image in image['RepoTags'] ] + if not image_exists: + docker_client.pull(opts.docker_image) + container=docker_client.create_container( + image=opts.docker_image, + volumes=volumes, + command=cmd + ) + docker_client.start(container=container[u'Id'], binds=binds) + docker_client.wait(container=container[u'Id']) + logs=docker_client.logs(container=container[u'Id']) + print "".join([log for log in logs]) + docker_client.remove_container(container[u'Id']) + +class ScriptRunner: + """class is a wrapper for an arbitrary script + """ + + def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'): + """ + cleanup inputs, setup some outputs + + """ + self.opts = opts + self.scriptname = 'script' + self.useIM = cmd_exists('convert') + self.useGS = cmd_exists('gs') + self.temp_warned = False # we want only one warning if $TMP not set + self.treatbashSpecial = treatbashSpecial + self.image_tag = image_tag + os.chdir(abspath(opts.output_dir)) + self.thumbformat = 'png' + s = open(self.opts.script_path,'r').readlines() + s = [x.rstrip() for x in s] # remove pesky dos line endings if needed + self.script = '\n'.join(s) + fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter)) + tscript = open(self.sfile,'w') # use self.sfile as script source for Popen + tscript.write(self.script) + tscript.close() + self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help + self.escapedScript = '\n'.join([html_escape(x) for x in s]) + self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname) + if opts.output_dir: # may not want these complexities + self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname) + art = '%s.%s' % (self.scriptname,opts.interpreter) + artpath = os.path.join(self.opts.output_dir,art) # need full path + artifact = open(artpath,'w') # use self.sfile as script source for Popen + artifact.write(self.script) + artifact.close() + self.cl = [] + self.html = [] + a = self.cl.append + a(opts.interpreter) + if self.treatbashSpecial and opts.interpreter in ['bash','sh']: + a(self.sfile) + else: + a('-') # stdin + for input in opts.input_tab: + a(input) + if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname + a(str(self.scriptname)+'.out') + a(opts.output_tab) + for param in opts.additional_parameters: + param, value=param.split(',') + a('--'+param) + a(value) + self.outFormats = opts.output_format + self.inputFormats = [formats for formats in opts.input_formats] + self.test1Input = '%s_test1_input.xls' % self.scriptname + self.test1Output = '%s_test1_output.xls' % self.scriptname + self.test1HTML = '%s_test1_output.html' % self.scriptname + + + def compressPDF(self,inpdf=None,thumbformat='png'): + """need absolute path to pdf + note that GS gets confoozled if no $TMP or $TEMP + so we set it + """ + assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) + hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) + sto = open(hlog,'a') + our_env = os.environ.copy() + our_tmp = our_env.get('TMP',None) + if not our_tmp: + our_tmp = our_env.get('TEMP',None) + if not (our_tmp and os.path.exists(our_tmp)): + newtmp = os.path.join(self.opts.output_dir,'tmp') + try: + os.mkdir(newtmp) + except: + sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) + our_env['TEMP'] = newtmp + if not self.temp_warned: + sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) + self.temp_warned = True + outpdf = '%s_compressed' % inpdf + cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] + x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) + retval1 = x.wait() + sto.close() + if retval1 == 0: + os.unlink(inpdf) + shutil.move(outpdf,inpdf) + os.unlink(hlog) + hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) + sto = open(hlog,'w') + outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) + cl2 = ['convert', inpdf, outpng] + x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) + retval2 = x.wait() + sto.close() + if retval2 == 0: + os.unlink(hlog) + retval = retval1 or retval2 + return retval + + + def getfSize(self,fpath,outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath,fpath) + if os.path.isfile(fp): + size = '0 B' + n = float(os.path.getsize(fp)) + if n > 2**20: + size = '%1.1f MB' % (n/2**20) + elif n > 2**10: + size = '%1.1f KB' % (n/2**10) + elif n > 0: + size = '%d B' % (int(n)) + return size + + def makeHtml(self): + """ Create an HTML file content to list all the artifacts found in the output_dir + """ + + galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> + <title></title> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + </head> + <body> + <div class="toolFormBody"> + """ + galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" + galhtmlpostfix = """</div></body></html>\n""" + + flist = os.listdir(self.opts.output_dir) + flist = [x for x in flist if x <> 'Rplots.pdf'] + flist.sort() + html = [] + html.append(galhtmlprefix % progname) + html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow())) + fhtml = [] + if len(flist) > 0: + logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections + logfiles.sort() + logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)] + logfiles.append(abspath(self.tlog)) # make it the last one + pdflist = [] + npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) + for rownum,fname in enumerate(flist): + dname,e = os.path.splitext(fname) + sfsize = self.getfSize(fname,self.opts.output_dir) + if e.lower() == '.pdf' : # compress and make a thumbnail + thumb = '%s.%s' % (dname,self.thumbformat) + pdff = os.path.join(self.opts.output_dir,fname) + retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) + if retval == 0: + pdflist.append((fname,thumb)) + else: + pdflist.append((fname,fname)) + if (rownum+1) % 2 == 0: + fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) + else: + fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) + for logfname in logfiles: # expect at least tlog - if more + if abspath(logfname) == abspath(self.tlog): # handled later + sectionname = 'All tool run' + if (len(logfiles) > 1): + sectionname = 'Other' + ourpdfs = pdflist + else: + realname = os.path.basename(logfname) + sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log + ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] + pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove + nacross = 1 + npdf = len(ourpdfs) + + if npdf > 0: + nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) + if int(nacross)**2 != npdf: + nacross += 1 + nacross = int(nacross) + width = min(400,int(1200/nacross)) + html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) + html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') + ntogo = nacross # counter for table row padding with empty cells + html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') + for i,paths in enumerate(ourpdfs): + fname,thumb = paths + s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" + alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) + if ((i+1) % nacross == 0): + s += '</tr>\n' + ntogo = 0 + if i < (npdf - 1): # more to come + s += '<tr>' + ntogo = nacross + else: + ntogo -= 1 + html.append(s) + if html[-1].strip().endswith('</tr>'): + html.append('</table></div>\n') + else: + if ntogo > 0: # pad + html.append('<td> </td>'*ntogo) + html.append('</tr></table></div>\n') + logt = open(logfname,'r').readlines() + logtext = [x for x in logt if x.strip() > ''] + html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) + if len(logtext) > 1: + html.append('\n<pre>\n') + html += logtext + html.append('\n</pre>\n') + else: + html.append('%s is empty<br/>' % logfname) + if len(fhtml) > 0: + fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') + fhtml.append('</table></div><br/>') + html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') + html += fhtml # add all non-pdf files to the end of the display + else: + html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) + html.append(galhtmlpostfix) + htmlf = file(self.opts.output_html,'w') + htmlf.write('\n'.join(html)) + htmlf.write('\n') + htmlf.close() + self.html = html + + + def run(self): + """ + scripts must be small enough not to fill the pipe! + """ + if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: + retval = self.runBash() + else: + if self.opts.output_dir: + ste = open(self.elog,'w') + sto = open(self.tlog,'w') + sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) + p.stdin.write(self.script) + p.stdin.close() + retval = p.wait() + if self.opts.output_dir: + sto.close() + ste.close() + err = open(self.elog,'r').readlines() + if retval <> 0 and err: # problem + print >> sys.stderr,err #same problem, need to capture docker stdin/stdout + if self.opts.make_HTML: + self.makeHtml() + return retval + + def runBash(self): + """ + cannot use - for bash so use self.sfile + """ + if self.opts.output_dir: + s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) + sto = open(self.tlog,'w') + sto.write(s) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(self.cl,shell=False) + retval = p.wait() + if self.opts.output_dir: + sto.close() + if self.opts.make_HTML: + self.makeHtml() + return retval + + +def change_user_id(new_uid, new_gid): + """ + To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container + to the user id with which the script has been called initially. + """ + cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"] + cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"] + cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"] + cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"] + [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]] + + +def main(): + u = """ + This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: + <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" + </command> + """ + op = argparse.ArgumentParser() + a = op.add_argument + a('--docker_image',default=None) + a('--script_path',default=None) + a('--tool_name',default=None) + a('--interpreter',default=None) + a('--output_dir',default='./') + a('--output_html',default=None) + a('--input_tab',default='None', nargs='*') + a('--output_tab',default='None') + a('--user_email',default='Unknown') + a('--bad_user',default=None) + a('--make_HTML',default=None) + a('--new_tool',default=None) + a('--dockerized',default=0) + a('--group_id',default=None) + a('--user_id',default=None) + a('--output_format', default='tabular') + a('--input_format', dest='input_formats', action='append', default=[]) + a('--additional_parameters', dest='additional_parameters', action='append', default=[]) + opts = op.parse_args() + assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) + assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' + if opts.output_dir: + try: + os.makedirs(opts.output_dir) + except: + pass + if opts.dockerized==0: + switch_to_docker(opts) + return + change_user_id(opts.user_id, opts.group_id) + os.setgid(int(opts.group_id)) + os.setuid(int(opts.user_id)) + r = ScriptRunner(opts) + retcode = r.run() + os.unlink(r.sfile) + if retcode: + sys.exit(retcode) # indicate failure to job runner + + +if __name__ == "__main__": + main()