diff scriptrunner.py @ 0:21d312776891 draft

planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit 30f8264cdd67d40dec8acde6407f32152e6a29c1-dirty
author mvdbeek
date Sat, 09 Jul 2016 16:57:13 -0400
parents
children 495946ffc2d6
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scriptrunner.py	Sat Jul 09 16:57:13 2016 -0400
@@ -0,0 +1,436 @@
+# DockerToolFactory.py
+# see https://github.com/mvdbeek/scriptrunner
+
+import sys 
+import shutil 
+import subprocess 
+import os 
+import time 
+import tempfile 
+import argparse
+import getpass
+import tarfile
+import re
+import shutil
+import math
+import fileinput
+from os.path import abspath 
+
+
+progname = os.path.split(sys.argv[0])[1] 
+verbose = False 
+debug = False
+
+def timenow():
+    """return current time as a string
+    """
+    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+html_escape_table = {
+     "&": "&",
+     ">": ">",
+     "<": "&lt;",
+     "$": "\$"
+     }
+
+def html_escape(text):
+     """Produce entities within text."""
+     return "".join(html_escape_table.get(c,c) for c in text)
+
+def cmd_exists(cmd):
+     return subprocess.call("type " + cmd, shell=True, 
+           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
+
+def construct_bind(host_path, container_path=False, binds=None, ro=True):
+    #TODO remove container_path if it's alwyas going to be the same as host_path
+    '''build or extend binds dictionary with container path. binds is used
+    to mount all files using the docker-py client.'''
+    if not binds:
+        binds={}
+    if isinstance(host_path, list):
+        for k,v in enumerate(host_path):
+            if not container_path:
+                container_path=host_path[k]
+            binds[host_path[k]]={'bind':container_path, 'ro':ro}
+            container_path=False #could be more elegant
+        return binds
+    else:
+        if not container_path:
+            container_path=host_path
+        binds[host_path]={'bind':container_path, 'ro':ro}
+        return binds
+
+def switch_to_docker(opts):
+    import docker #need local import, as container does not have docker-py
+    user_id = os.getuid()
+    group_id = os.getgid()
+    docker_client=docker.Client()
+    toolfactory_path=abspath(sys.argv[0])
+    binds=construct_bind(host_path=opts.script_path, ro=False)
+    binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False)
+    if len(opts.input_tab)>0:
+        binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True)
+    if not opts.output_tab == 'None':
+        binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False)
+    if opts.make_HTML:
+        binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False)
+    binds=construct_bind(binds=binds, host_path=toolfactory_path)
+    volumes=binds.keys()
+    sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir
+    cmd=['python', '-u']+sys.argv+['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)]
+    image_exists = [ True for image in docker_client.images() if opts.docker_image in image['RepoTags'] ]
+    if not image_exists:
+        docker_client.pull(opts.docker_image)
+    container=docker_client.create_container(
+        image=opts.docker_image,
+        volumes=volumes,
+        command=cmd
+        )
+    docker_client.start(container=container[u'Id'], binds=binds)
+    docker_client.wait(container=container[u'Id'])
+    logs=docker_client.logs(container=container[u'Id'])
+    print "".join([log for log in logs])
+    docker_client.remove_container(container[u'Id'])
+
+class ScriptRunner:
+    """class is a wrapper for an arbitrary script
+    """
+
+    def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'):
+        """
+        cleanup inputs, setup some outputs
+        
+        """
+        self.opts = opts
+        self.scriptname = 'script'
+        self.useIM = cmd_exists('convert')
+        self.useGS = cmd_exists('gs')
+        self.temp_warned = False # we want only one warning if $TMP not set
+        self.treatbashSpecial = treatbashSpecial
+        self.image_tag = image_tag
+        os.chdir(abspath(opts.output_dir))
+        self.thumbformat = 'png'
+        s = open(self.opts.script_path,'r').readlines()
+        s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
+        self.script = '\n'.join(s)
+        fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter))
+        tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
+        tscript.write(self.script)
+        tscript.close()
+        self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
+        self.escapedScript = '\n'.join([html_escape(x) for x in s])
+        self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname)
+        if opts.output_dir: # may not want these complexities
+            self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname)
+            art = '%s.%s' % (self.scriptname,opts.interpreter)
+            artpath = os.path.join(self.opts.output_dir,art) # need full path
+            artifact = open(artpath,'w') # use self.sfile as script source for Popen
+            artifact.write(self.script)
+            artifact.close()
+        self.cl = []
+        self.html = []
+        a = self.cl.append
+        a(opts.interpreter)
+        if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
+            a(self.sfile)
+        else:
+            a('-') # stdin
+	for input in opts.input_tab:
+	  a(input) 
+        if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname
+            a(str(self.scriptname)+'.out')
+        a(opts.output_tab)
+	for param in opts.additional_parameters:
+          param, value=param.split(',')
+          a('--'+param)
+          a(value)
+        self.outFormats = opts.output_format
+        self.inputFormats = [formats for formats in opts.input_formats]
+        self.test1Input = '%s_test1_input.xls' % self.scriptname
+        self.test1Output = '%s_test1_output.xls' % self.scriptname
+        self.test1HTML = '%s_test1_output.html' % self.scriptname
+
+
+    def compressPDF(self,inpdf=None,thumbformat='png'):
+        """need absolute path to pdf
+           note that GS gets confoozled if no $TMP or $TEMP
+           so we set it
+        """
+        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
+        hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
+        sto = open(hlog,'a')
+        our_env = os.environ.copy()
+        our_tmp = our_env.get('TMP',None)
+        if not our_tmp:
+            our_tmp = our_env.get('TEMP',None)
+        if not (our_tmp and os.path.exists(our_tmp)):
+            newtmp = os.path.join(self.opts.output_dir,'tmp')
+            try:
+                os.mkdir(newtmp)
+            except:
+                sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
+            our_env['TEMP'] = newtmp
+            if not self.temp_warned:
+               sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
+               self.temp_warned = True          
+        outpdf = '%s_compressed' % inpdf
+        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
+        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
+        retval1 = x.wait()
+        sto.close()
+        if retval1 == 0:
+            os.unlink(inpdf)
+            shutil.move(outpdf,inpdf)
+            os.unlink(hlog)
+        hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
+        sto = open(hlog,'w')
+        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
+        cl2 = ['convert', inpdf, outpng]
+        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
+        retval2 = x.wait()
+        sto.close()
+        if retval2 == 0:
+             os.unlink(hlog)
+        retval = retval1 or retval2
+        return retval
+
+
+    def getfSize(self,fpath,outpath):
+        """
+        format a nice file size string
+        """
+        size = ''
+        fp = os.path.join(outpath,fpath)
+        if os.path.isfile(fp):
+            size = '0 B'
+            n = float(os.path.getsize(fp))
+            if n > 2**20:
+                size = '%1.1f MB' % (n/2**20)
+            elif n > 2**10:
+                size = '%1.1f KB' % (n/2**10)
+            elif n > 0:
+                size = '%d B' % (int(n))
+        return size
+
+    def makeHtml(self):
+        """ Create an HTML file content to list all the artifacts found in the output_dir
+        """
+
+        galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
+        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
+        <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
+        <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
+        <title></title> 
+        <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
+        </head> 
+        <body> 
+        <div class="toolFormBody"> 
+        """ 
+        galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" 
+        galhtmlpostfix = """</div></body></html>\n"""
+
+        flist = os.listdir(self.opts.output_dir)
+        flist = [x for x in flist if x <> 'Rplots.pdf']
+        flist.sort()
+        html = []
+        html.append(galhtmlprefix % progname)
+        html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow()))
+        fhtml = []
+        if len(flist) > 0:
+            logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
+            logfiles.sort()
+            logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)]
+            logfiles.append(abspath(self.tlog)) # make it the last one
+            pdflist = []
+            npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
+            for rownum,fname in enumerate(flist):
+                dname,e = os.path.splitext(fname)
+                sfsize = self.getfSize(fname,self.opts.output_dir)
+                if e.lower() == '.pdf' : # compress and make a thumbnail
+                    thumb = '%s.%s' % (dname,self.thumbformat)
+                    pdff = os.path.join(self.opts.output_dir,fname)
+                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
+                    if retval == 0:
+                        pdflist.append((fname,thumb))
+                    else:
+                        pdflist.append((fname,fname))
+                if (rownum+1) % 2 == 0:
+                    fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
+                else:
+                    fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
+            for logfname in logfiles: # expect at least tlog - if more
+                if abspath(logfname) == abspath(self.tlog): # handled later
+                    sectionname = 'All tool run'
+                    if (len(logfiles) > 1):
+                        sectionname = 'Other'
+                    ourpdfs = pdflist
+                else:
+                    realname = os.path.basename(logfname)
+                    sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
+                    ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
+                    pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
+                nacross = 1
+                npdf = len(ourpdfs)
+
+                if npdf > 0:
+                    nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
+                    if int(nacross)**2 != npdf:
+                        nacross += 1
+                    nacross = int(nacross)
+                    width = min(400,int(1200/nacross))
+                    html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
+                    html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
+                    ntogo = nacross # counter for table row padding with empty cells
+                    html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
+                    for i,paths in enumerate(ourpdfs): 
+                        fname,thumb = paths
+                        s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" 
+                           alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
+                        if ((i+1) % nacross == 0):
+                            s += '</tr>\n'
+                            ntogo = 0
+                            if i < (npdf - 1): # more to come
+                               s += '<tr>'
+                               ntogo = nacross
+                        else:
+                            ntogo -= 1
+                        html.append(s)
+                    if html[-1].strip().endswith('</tr>'):
+                        html.append('</table></div>\n')
+                    else:
+                        if ntogo > 0: # pad
+                           html.append('<td>&nbsp;</td>'*ntogo)
+                        html.append('</tr></table></div>\n')
+                logt = open(logfname,'r').readlines()
+                logtext = [x for x in logt if x.strip() > '']
+                html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
+                if len(logtext) > 1:
+                    html.append('\n<pre>\n')
+                    html += logtext
+                    html.append('\n</pre>\n')
+                else:
+                    html.append('%s is empty<br/>' % logfname)
+        if len(fhtml) > 0:
+           fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
+           fhtml.append('</table></div><br/>')
+           html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
+           html += fhtml # add all non-pdf files to the end of the display
+        else:
+            html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
+        html.append(galhtmlpostfix)
+        htmlf = file(self.opts.output_html,'w')
+        htmlf.write('\n'.join(html))
+        htmlf.write('\n')
+        htmlf.close()
+        self.html = html
+
+
+    def run(self):
+        """
+        scripts must be small enough not to fill the pipe!
+        """
+        if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
+          retval = self.runBash()
+        else:
+            if self.opts.output_dir:
+                ste = open(self.elog,'w')
+                sto = open(self.tlog,'w')
+                sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
+                sto.flush()
+                p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
+            else:
+                p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
+            p.stdin.write(self.script)
+            p.stdin.close()
+            retval = p.wait()
+            if self.opts.output_dir:
+                sto.close()
+                ste.close()
+                err = open(self.elog,'r').readlines()
+                if retval <> 0 and err: # problem
+                    print >> sys.stderr,err #same problem, need to capture docker stdin/stdout
+            if self.opts.make_HTML:
+                self.makeHtml()
+        return retval
+
+    def runBash(self):
+        """
+        cannot use - for bash so use self.sfile
+        """
+        if self.opts.output_dir:
+            s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
+            sto = open(self.tlog,'w')
+            sto.write(s)
+            sto.flush()
+            p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+        else:
+            p = subprocess.Popen(self.cl,shell=False)            
+        retval = p.wait()
+        if self.opts.output_dir:
+            sto.close()
+        if self.opts.make_HTML:
+            self.makeHtml()
+        return retval
+  
+
+def change_user_id(new_uid, new_gid):
+    """
+    To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container
+    to the user id with which the script has been called initially.
+    """
+    cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"]
+    cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"]
+    cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"]
+    cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"]
+    [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]]
+
+
+def main():
+    u = """
+    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
+    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
+    </command>
+    """
+    op = argparse.ArgumentParser()
+    a = op.add_argument
+    a('--docker_image',default=None)
+    a('--script_path',default=None)
+    a('--tool_name',default=None)
+    a('--interpreter',default=None)
+    a('--output_dir',default='./')
+    a('--output_html',default=None)
+    a('--input_tab',default='None', nargs='*')
+    a('--output_tab',default='None')
+    a('--user_email',default='Unknown')
+    a('--bad_user',default=None)
+    a('--make_HTML',default=None)
+    a('--new_tool',default=None)
+    a('--dockerized',default=0)
+    a('--group_id',default=None)
+    a('--user_id',default=None)
+    a('--output_format', default='tabular')
+    a('--input_format', dest='input_formats', action='append', default=[])
+    a('--additional_parameters', dest='additional_parameters', action='append', default=[])
+    opts = op.parse_args()
+    assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
+    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
+    if opts.output_dir:
+        try:
+            os.makedirs(opts.output_dir)
+        except:
+            pass
+    if opts.dockerized==0:
+      switch_to_docker(opts)
+      return
+    change_user_id(opts.user_id, opts.group_id)
+    os.setgid(int(opts.group_id))
+    os.setuid(int(opts.user_id))
+    r = ScriptRunner(opts)
+    retcode = r.run()
+    os.unlink(r.sfile)
+    if retcode:
+        sys.exit(retcode) # indicate failure to job runner
+
+
+if __name__ == "__main__":
+    main()