Mercurial > repos > mvdbeek > docker_scriptrunner
view scriptrunner.py @ 2:495946ffc2d6 draft default tip
planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit dded837d19aeb3f06b84e5076282cedeeaf713fa
author | mvdbeek |
---|---|
date | Sun, 22 Jul 2018 13:38:01 -0400 |
parents | 21d312776891 |
children |
line wrap: on
line source
# DockerToolFactory.py # see https://github.com/mvdbeek/scriptrunner from __future__ import print_function import sys import shutil import subprocess import os import time import tempfile import argparse import math from os.path import abspath progname = os.path.split(sys.argv[0])[1] verbose = False debug = False html_escape_table = { "&": "&", ">": ">", "<": "<", "$": "\$" } def timenow(): """Return current time as a string.""" return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) def html_escape(text): """Produce entities within text.""" return "".join(html_escape_table.get(c, c) for c in text) def cmd_exists(cmd): return subprocess.call("type " + cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 def construct_bind(host_path, container_path=False, binds=None, ro=True): # TODO remove container_path if it's alwyas going to be the same as host_path """Build or extend binds dictionary with container path. binds is used to mount all files using the docker-py client.""" if not binds: binds = {} if isinstance(host_path, list): for k, v in enumerate(host_path): if not container_path: container_path = host_path[k] binds[host_path[k]] = {'bind': container_path, 'ro': ro} container_path = False # could be more elegant return binds else: if not container_path: container_path = host_path binds[host_path] = {'bind': container_path, 'ro': ro} return binds def switch_to_docker(opts): import docker # need local import, as container does not have docker-py user_id = os.getuid() group_id = os.getgid() docker_client = docker.APIClient() toolfactory_path = abspath(sys.argv[0]) binds = construct_bind(host_path=opts.script_path, ro=False) binds = construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False) if len(opts.input_file) > 0: binds = construct_bind(binds=binds, host_path=opts.input_file, ro=True) if not opts.output_file == 'None': binds = construct_bind(binds=binds, host_path=opts.output_file, ro=False) if opts.make_HTML: binds = construct_bind(binds=binds, host_path=opts.output_html, ro=False) binds = construct_bind(binds=binds, host_path=toolfactory_path) volumes = list(binds.keys()) sys.argv = [abspath(opts.output_dir) if sys.argv[i - 1] == '--output_dir' else arg for i, arg in enumerate(sys.argv)] # inject absolute path of working_dir cmd = ['python', '-u'] + sys.argv + ['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)] image_exists = [True for image in docker_client.images() if opts.docker_image in image['RepoTags']] if not image_exists: docker_client.pull(opts.docker_image) container = docker_client.create_container( image=opts.docker_image, volumes=volumes, command=cmd, host_config=docker_client.create_host_config(binds=binds)) docker_client.start(container=container[u'Id']) exit_code = docker_client.wait(container=container[u'Id'])['StatusCode'] logs = docker_client.logs(container=container[u'Id']) print(logs, end="", file=sys.stderr) docker_client.remove_container(container[u'Id']) return exit_code class ScriptRunner: """class is a wrapper for an arbitrary script """ def __init__(self, opts=None, treatbashSpecial=True, image_tag='base'): """ cleanup inputs, setup some outputs """ self.opts = opts self.scriptname = 'script' self.treatbashSpecial = treatbashSpecial self.image_tag = image_tag os.chdir(abspath(opts.output_dir)) self.thumbformat = 'png' s = open(self.opts.script_path, 'r').readlines() s = [x.rstrip() for x in s] # remove pesky dos line endings if needed self.script = '\n'.join(s) fhandle, self.sfile = tempfile.mkstemp(prefix='script', suffix=".%s" % (opts.interpreter)) tscript = open(self.sfile, 'w') # use self.sfile as script source for Popen tscript.write(self.script) tscript.close() self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help self.escapedScript = '\n'.join([html_escape(x) for x in s]) self.elog = os.path.join(self.opts.output_dir, "%s_error.log" % self.scriptname) if opts.output_dir: # may not want these complexities self.tlog = os.path.join(self.opts.output_dir, "%s_runner.log" % self.scriptname) art = '%s.%s' % (self.scriptname, opts.interpreter) artpath = os.path.join(self.opts.output_dir, art) # need full path artifact = open(artpath, 'w') # use self.sfile as script source for Popen artifact.write(self.script) artifact.close() self.cl = [] self.html = [] self.cl.append(opts.interpreter) if self.treatbashSpecial and opts.interpreter in ['bash', 'sh']: self.cl.append(self.sfile) else: self.cl.append('-') # stdin for input in opts.input_file: self.cl.append(input) if opts.output_file == 'None': # If tool generates only HTML, set output name to toolname self.cl.append(str(self.scriptname) + '.out') self.cl.append(opts.output_file) for param in opts.additional_parameters: param, value = param.split(',') self.cl.append('--' + param) self.cl.append(value) self.outFormats = opts.output_format self.inputFormats = [formats for formats in opts.input_formats] self.test1Input = '%s_test1_input.xls' % self.scriptname self.test1Output = '%s_test1_output.xls' % self.scriptname self.test1HTML = '%s_test1_output.html' % self.scriptname def compressPDF(self, inpdf=None, thumbformat='png'): """ inpdf is absolute path to PDF """ assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf, self.myName) hlog = os.path.join(self.opts.output_dir, "compress_%s.txt" % os.path.basename(inpdf)) sto = open(hlog, 'a') our_env = os.environ.copy() our_tmp = our_env.get('TMP', None) if not our_tmp: our_env['TMP'] = tempfile.gettempdir() outpdf = '%s_compressed' % inpdf cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH", "-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf, inpdf] x = subprocess.Popen(cl, stdout=sto, stderr=sto, cwd=self.opts.output_dir, env=our_env) retval1 = x.wait() sto.close() if retval1 == 0: os.unlink(inpdf) shutil.move(outpdf, inpdf) os.unlink(hlog) hlog = os.path.join(self.opts.output_dir, "thumbnail_%s.txt" % os.path.basename(inpdf)) sto = open(hlog, 'w') outpng = '%s.%s' % (os.path.splitext(inpdf)[0], thumbformat) cl2 = ['convert', inpdf, outpng] x = subprocess.Popen(cl2, stdout=sto, stderr=sto, cwd=self.opts.output_dir, env=our_env) retval2 = x.wait() sto.close() if retval2 == 0: os.unlink(hlog) retval = retval1 or retval2 return retval def getfSize(self, fpath, outpath): """ format a nice file size string """ size = '' fp = os.path.join(outpath, fpath) if os.path.isfile(fp): size = '0 B' n = float(os.path.getsize(fp)) if n > 2**20: size = '%1.1f MB' % (n / 2**20) elif n > 2**10: size = '%1.1f KB' % (n / 2**10) elif n > 0: size = '%d B' % (int(n)) return size def makeHtml(self): """ Create an HTML file content to list all the artifacts found in the output_dir """ galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> <title></title> <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> </head> <body> <div class="toolFormBody"> """ galhtmlpostfix = """</div></body></html>\n""" flist = os.listdir(self.opts.output_dir) flist = [x for x in flist if x != 'Rplots.pdf'] flist.sort() html = [] html.append(galhtmlprefix % progname) html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname, timenow())) fhtml = [] if len(flist) > 0: logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections logfiles.sort() logfiles = [x for x in logfiles if abspath(x) != abspath(self.tlog)] logfiles.append(abspath(self.tlog)) # make it the last one pdflist = [] npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) for rownum, fname in enumerate(flist): dname, e = os.path.splitext(fname) sfsize = self.getfSize(fname, self.opts.output_dir) if e.lower() == '.pdf': # compress and make a thumbnail thumb = '%s.%s' % (dname, self.thumbformat) pdff = os.path.join(self.opts.output_dir, fname) retval = self.compressPDF(inpdf=pdff, thumbformat=self.thumbformat) if retval == 0: pdflist.append((fname, thumb)) else: pdflist.append((fname, fname)) if (rownum + 1) % 2 == 0: fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname, fname, sfsize)) else: fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname, fname, sfsize)) for logfname in logfiles: # expect at least tlog - if more if abspath(logfname) == abspath(self.tlog): # handled later sectionname = 'All tool run' if (len(logfiles) > 1): sectionname = 'Other' ourpdfs = pdflist else: realname = os.path.basename(logfname) sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] != sectionname] # remove nacross = 1 npdf = len(ourpdfs) if npdf > 0: nacross = math.sqrt(npdf) if int(nacross)**2 != npdf: nacross += 1 nacross = int(nacross) width = min(400, int(1200 / nacross)) html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') ntogo = nacross # counter for table row padding with empty cells html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') for i, paths in enumerate(ourpdfs): fname, thumb = paths s = """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" alt="Image called %s"/></a></td>\n""" % (fname, thumb, fname, width, fname) if ((i + 1) % nacross == 0): s += '</tr>\n' ntogo = 0 if i < (npdf - 1): # more to come s += '<tr>' ntogo = nacross else: ntogo -= 1 html.append(s) if html[-1].strip().endswith('</tr>'): html.append('</table></div>\n') else: if ntogo > 0: # pad html.append('<td> </td>' * ntogo) html.append('</tr></table></div>\n') logt = open(logfname, 'r').readlines() logtext = [x for x in logt if x.strip() > ''] html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) if len(logtext) > 1: html.append('\n<pre>\n') html += logtext html.append('\n</pre>\n') else: html.append('%s is empty<br/>' % logfname) if len(fhtml) > 0: fhtml.insert(0, '<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') fhtml.append('</table></div><br/>') html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') html += fhtml # add all non-pdf files to the end of the display else: html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) html.append(galhtmlpostfix) with open(self.opts.output_html, 'w') as htmlf: htmlf.write('\n'.join(html)) htmlf.write('\n') self.html = html def run(self): """ scripts must be small enough not to fill the pipe! """ if self.treatbashSpecial and self.opts.interpreter in ['bash', 'sh']: retval = self.runBash() else: if self.opts.output_dir: ste = open(self.elog, 'w') sto = open(self.tlog, 'w') sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) sto.flush() p = subprocess.Popen(self.cl, shell=False, stdout=sto, stderr=ste, stdin=subprocess.PIPE, cwd=self.opts.output_dir) else: p = subprocess.Popen(self.cl, shell=False, stdin=subprocess.PIPE) p.stdin.write(self.script) p.stdin.close() retval = p.wait() if self.opts.output_dir: sto.close() ste.close() err = open(self.elog, 'r').readlines() if retval != 0 and err: # problem print(err, end="", file=sys.stderr) # same problem, need to capture docker stdin/stdout if self.opts.make_HTML: self.makeHtml() return retval def runBash(self): """ cannot use - for bash so use self.sfile """ if self.opts.output_dir: s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) sto = open(self.tlog, 'w') sto.write(s) sto.flush() p = subprocess.Popen(self.cl, shell=False, stdout=sto, stderr=sto, cwd=self.opts.output_dir) else: p = subprocess.Popen(self.cl, shell=False) retval = p.wait() if self.opts.output_dir: sto.close() if self.opts.make_HTML: self.makeHtml() return retval def change_user_id(new_uid, new_gid): """ To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container to the user id with which the script has been called initially. """ cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"] cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"] cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"] cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"] [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]] def main(): op = argparse.ArgumentParser() a = op.add_argument a('--docker_image', default=None) a('--script_path', default=None) a('--tool_name', default=None) a('--interpreter', default=None) a('--output_dir', default='./') a('--output_html', default=None) a('--input_file', default='None', nargs='*') a('--output_file', default='None') a('--user_email', default='Unknown') a('--bad_user', default=None) a('--make_HTML', default=None) a('--new_tool', default=None) a('--dockerized', default=0) a('--group_id', default=None) a('--user_id', default=None) a('--output_format', default='tabular') a('--input_format', dest='input_formats', action='append', default=[]) a('--additional_parameters', dest='additional_parameters', action='append', default=[]) opts = op.parse_args() assert not opts.bad_user, 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user, opts.bad_user) assert os.path.isfile(opts.script_path), '## Tool Factory wrapper expects a script path - eg --script_path=foo.R' if opts.output_dir: try: os.makedirs(opts.output_dir) except Exception: pass if opts.dockerized == 0: retcode = switch_to_docker(opts) sys.exit(retcode) change_user_id(opts.user_id, opts.group_id) os.setgid(int(opts.group_id)) os.setuid(int(opts.user_id)) r = ScriptRunner(opts) retcode = r.run() os.unlink(r.sfile) if retcode: sys.exit(retcode) # indicate failure to job runner if __name__ == "__main__": main()