Mercurial > repos > fubar > tool_factory_2
view rgToolFactory2.py @ 25:9fe74bd23af2 draft
Uploaded
author | fubar |
---|---|
date | Mon, 02 Mar 2015 05:18:21 -0500 |
parents | |
children | db35d39e1de9 |
line wrap: on
line source
# rgToolFactoryMultIn.py # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home # # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 # # all rights reserved # Licensed under the LGPL # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home # # January 2015 # unified all setups by passing the script on the cl rather than via a PIPE - no need for treat_bash_special so removed # # in the process of building a complex tool # added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package # add that package to tool_dependencies # Note that once the generated tool is loaded, it will have that package's env.sh loaded automagically so there is no # --envshpath in the parameters for the generated tool and it uses the system one which will be first on the adjusted path. # # sept 2014 added additional params from # https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default # passing them is complex # and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on # the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script # see examples on this tool form # august 2014 # Allows arbitrary number of input files # NOTE positional parameters are now passed to script # and output (may be "None") is *before* arbitrary number of inputs # # march 2014 # had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript # grrrrr - night before a demo # added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable # # added ghostscript and graphicsmagick as dependencies # fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp # errors ensued # # august 2013 # found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn # # july 2013 # added ability to combine images and individual log files into html output # just make sure there's a log file foo.log and it will be output # together with all images named like "foo_*.pdf # otherwise old format for html # # January 2013 # problem pointed out by Carlos Borroto # added escaping for <>$ - thought I did that ages ago... # # August 11 2012 # changed to use shell=False and cl as a sequence # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye. # It also serves as the wrapper for the new tool. # # you paste and run your script # Only works for simple scripts that read one input from the history. # Optionally can write one new history dataset, # and optionally collect any number of outputs into links on an autogenerated HTML page. # DO NOT install on a public or important site - please. # installed generated tools are fine if the script is safe. # They just run normally and their user cannot do anything unusually insecure # but please, practice safe toolshed. # Read the fucking code before you install any tool # especially this one # After you get the script working on some test data, you can # optionally generate a toolshed compatible gzip file # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for # safe and largely automated installation in a production Galaxy. # If you opt for an HTML output, you get all the script outputs arranged # as a single Html history item - all output files are linked, thumbnails for all the pdfs. # Ugly but really inexpensive. # # Patches appreciated please. # # # long route to June 2012 product # Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them # derived from an integrated script model # called rgBaseScriptWrapper.py # Note to the unwary: # This tool allows arbitrary scripting on your Galaxy as the Galaxy user # There is nothing stopping a malicious user doing whatever they choose # Extremely dangerous!! # Totally insecure. So, trusted users only # # preferred model is a developer using their throw away workstation instance - ie a private site. # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool. # import sys import shutil import subprocess import os import time import tempfile import optparse import tarfile import re import shutil import math progname = os.path.split(sys.argv[0])[1] myversion = 'V001.1 March 2014' verbose = False debug = False toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory' # if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated # tool xml def timenow(): """return current time as a string """ return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) def quote_non_numeric(s): """return a prequoted string for non-numerics useful for perl and Rscript parameter passing? """ try: res = float(s) return s except ValueError: return '"%s"' % s html_escape_table = { "&": "&", ">": ">", "<": "<", "$": "\$" } def html_escape(text): """Produce entities within text.""" return "".join(html_escape_table.get(c,c) for c in text) def html_unescape(text): """Revert entities within text.""" t = text.replace('&','&').replace('>','>').replace('<','<').replace('\$','$') return t def cmd_exists(cmd): return subprocess.call("type " + cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 def parse_citations(citations_text): """ """ citations = [c for c in citations_text.split("**ENTRY**") if c.strip()] citation_tuples = [] for citation in citations: if citation.startswith("doi"): citation_tuples.append( ("doi", citation[len("doi"):].strip() ) ) else: citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) ) return citation_tuples def shell_source(script): """need a way to source a Galaxy tool interpreter env.sh to point at the right dependency package This based on the idea in http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html Note that we have to finesse any wierdly quoted newlines in automagic exports using nulls (env -0) as newlines""" pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True) output = pipe.communicate()[0] outl = output.split('\0') outl = [x for x in outl if len(x.split("=")) == 2] newenv = dict((line.split("=", 1) for line in outl)) os.environ.update(newenv) class ScriptRunner: """class is a wrapper for an arbitrary script note funky templating. this should all be done proper. Problem is, this kludge developed quite naturally and seems to work ok with little overhead... """ def __init__(self,opts=None): """ cleanup inputs, setup some outputs """ self.toolhtmldepinterpskel = """<?xml version="1.0"?> <tool_dependency> <package name="ghostscript" version="9.10"> <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" /> </package> <package name="graphicsmagick" version="1.3.18"> <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" /> </package> <package name="%(interpreter_name)s" version="%(interpreter_version)s"> <repository name="%(interpreter_pack)s" owner="%(interpreter_owner)s" prior_installation_required="True" /> </package> <readme> %(readme)s This file was autogenerated by the Galaxy Tool Factory 2 </readme> </tool_dependency> """ self.toolhtmldepskel = """<?xml version="1.0"?> <tool_dependency> <package name="ghostscript" version="9.10"> <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" /> </package> <package name="graphicsmagick" version="1.3.18"> <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" /> </package> <readme> %(readme)s This file was autogenerated by the Galaxy Tool Factory 2 </readme> </tool_dependency> """ self.emptytoolhtmldepskel = """<?xml version="1.0"?> <tool_dependency> <readme> %(readme)s This file was autogenerated by the Galaxy Tool Factory 2 </readme> </tool_dependency> """ self.protorequirements = """<requirements> <requirement type="package" version="9.10">ghostscript</requirement> <requirement type="package" version="1.3.18">graphicsmagick</requirement> </requirements>""" self.protorequirements_interpreter = """<requirements> <requirement type="package" version="9.10">ghostscript</requirement> <requirement type="package" version="1.3.18">graphicsmagick</requirement> <requirement type="package" version="%(interpreter_version)s">%(interpreter_name)s</requirement> </requirements>""" self.newCommand=""" %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s """ self.tooltestsTabOnly = """ <test> %(test1Inputs)s <param name="job_name" value="test1"/> <param name="runMe" value="$runMe"/> <output name="output1="%(test1Output)s" ftype="tabular"/> %(additionalParams)s </test> """ self.tooltestsHTMLOnly = """ <test> %(test1Inputs)s <param name="job_name" value="test1"/> <param name="runMe" value="$runMe"/> %(additionalParams)s <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/> </test> """ self.tooltestsBoth = """ <test> %(test1Inputs)s <param name="job_name" value="test1"/> <param name="runMe" value="$runMe"/> %(additionalParams)s <output name="output1" file="%(test1Output)s" ftype="tabular" /> <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/> </test> """ self.newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s"> %(tooldesc)s %(requirements)s <command interpreter="python"> %(command)s </command> <inputs> %(inputs)s %(additionalInputs)s </inputs> <outputs> %(outputs)s </outputs> <configfiles> <configfile name="runMe"> %(script)s </configfile> </configfiles> <tests> %(tooltests)s </tests> <help> %(help)s This tool was autogenerated from a user provided script using the Galaxy Tool Factory 2 https://toolshed.g2.bx.psu.edu/view/fubar/tool_factory_2 </help> <citations> %(citations)s <citation type="doi">10.1093/bioinformatics/bts573</citation> </citations> </tool>""" self.useGM = cmd_exists('gm') self.useIM = cmd_exists('convert') self.useGS = cmd_exists('gs') self.temp_warned = False # we want only one warning if $TMP not set if opts.output_dir: # simplify for the tool tarball os.chdir(opts.output_dir) self.thumbformat = 'png' self.opts = opts self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but.. self.toolid = self.toolname self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later self.pyfile = self.myname # crude but efficient - the cruft won't hurt much self.xmlfile = '%s.xml' % self.toolname rx = open(self.opts.script_path,'r').readlines() rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed self.script = '\n'.join(rx) fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter)) tscript = open(self.sfile,'w') # use self.sfile as script source for Popen tscript.write(self.script) tscript.close() self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx]) self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname) if opts.output_dir: # may not want these complexities self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname) art = '%s.%s' % (self.toolname,opts.interpreter) artpath = os.path.join(self.opts.output_dir,art) # need full path artifact = open(artpath,'w') # use self.sfile as script source for Popen artifact.write(self.script) artifact.close() self.cl = [] self.html = [] self.test1Inputs = [] # now a list a = self.cl.append a(opts.interpreter) a(self.sfile) # if multiple inputs - positional or need to distinguish them with cl params if opts.input_tab: tests = [] for i,intab in enumerate(opts.input_tab): # if multiple, make tests if intab.find(',') <> -1: (gpath,uname) = intab.split(',') else: gpath = uname = intab tests.append(os.path.basename(gpath)) self.test1Inputs = '<param name="input_tab" value="%s" />' % (','.join(tests)) else: self.test1Inputs = '' # we always pass path,name pairs in using python optparse append # but the command line has to be different self.infile_paths = '' self.infile_names = '' if self.opts.input_tab: self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab]) self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab]) if self.opts.interpreter == 'python': # yes, this is how additional parameters are always passed in python - to the TF itself and to # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line if self.opts.input_tab: a('--inpaths=%s' % (self.infile_paths)) a('--innames=%s' % (self.infile_names)) if self.opts.output_tab: a('--outpath=%s' % self.opts.output_tab) for p in opts.additional_parameters: p = p.replace('"','') psplit = p.split(',') param = html_unescape(psplit[0]) value = html_unescape(psplit[1]) a('%s="%s"' % (param,value)) if (self.opts.interpreter == 'Rscript'): # pass params on command line as expressions which the script evaluates - see sample if self.opts.input_tab: a('INPATHS="%s"' % self.infile_paths) a('INNAMES="%s"' % self.infile_names) if self.opts.output_tab: a('OUTPATH="%s"' % self.opts.output_tab) for p in opts.additional_parameters: p = p.replace('"','') psplit = p.split(',') param = html_unescape(psplit[0]) value = html_unescape(psplit[1]) a('%s=%s' % (param,quote_non_numeric(value))) if (self.opts.interpreter == 'perl'): # pass positional params on command line - perl script needs to discombobulate the path/name lists if self.opts.input_tab: a('%s' % self.infile_paths) a('%s' % self.infile_names) if self.opts.output_tab: a('%s' % self.opts.output_tab) for p in opts.additional_parameters: # followed by any additional name=value parameter pairs p = p.replace('"','') psplit = p.split(',') param = html_unescape(psplit[0]) value = html_unescape(psplit[1]) a('%s=%s' % (param,quote_non_numeric(value))) if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash': # more is better - now move all params into environment AND drop on to command line. self.cl.insert(0,'env') if self.opts.input_tab: self.cl.insert(1,'INPATHS=%s' % (self.infile_paths)) self.cl.insert(2,'INNAMES=%s' % (self.infile_names)) if self.opts.output_tab: self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab)) a('OUTPATH=%s' % (self.opts.output_tab)) # sets those environment variables for the script # additional params appear in CL - yes, it's confusing for i,p in enumerate(opts.additional_parameters): psplit = p.split(',') param = html_unescape(psplit[0]) value = html_unescape(psplit[1]) a('%s=%s' % (param,quote_non_numeric(value))) self.cl.insert(4+i,'%s=%s' % (param,quote_non_numeric(value))) self.interpreter_owner = 'SYSTEM' self.interpreter_pack = 'SYSTEM' self.interpreter_name = 'SYSTEM' self.interpreter_version = 'SYSTEM' self.interpreter_revision = 'SYSTEM' if opts.envshpath <> 'system': # need to parse out details for our tool_dependency try: # fragile - depends on common naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play # this ONLY happens at tool generation by an admin - the generated tool always uses the default of system so path is from local env.sh packdetails = opts.envshpath.split(os.path.sep)[-4:-1] # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c'] self.interpreter_owner = packdetails[0] self.interpreter_pack = packdetails[1] self.interpreter_name = packdetails[1].split('_')[1].upper() self.interpreter_revision = packdetails[2] self.interpreter_version = '.'.join(packdetails[1].split('_')[2:]) except: pass self.outFormats = opts.output_format self.inputFormats = opts.input_formats self.test1Output = '%s_test1_output.xls' % self.toolname self.test1HTML = '%s_test1_output.html' % self.toolname def makeXML(self): """ Create a Galaxy xml tool wrapper for the new script as a string to write out fixme - use templating or something less fugly than this example of what we produce <tool id="reverse" name="reverse" version="0.01"> <description>a tabular file</description> <command interpreter="python"> reverse.py --script_path "$runMe" --interpreter "python" --tool_name "reverse" --input_tab "$input1" --output_tab "$output1" </command> <inputs> <param name="input1" type="data" format="tabular" label="Select one or more input files from your history"/> <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/> </inputs> <outputs> <data format="tabular" name="output1q" label="${job_name}"/> </outputs> <help> **What it Does** Reverse the columns in a tabular file </help> <configfiles> <configfile name="runMe"> # reverse order of columns in a tabular file import sys inp = sys.argv[1] outp = sys.argv[2] i = open(inp,'r') o = open(outp,'w') for row in i: rs = row.rstrip().split('\t') rs.reverse() o.write('\t'.join(rs)) o.write('\n') i.close() o.close() </configfile> </configfiles> </tool> """ # these templates need a dict with the right keys to match the parameters - outputs, help, code... xdict = {} xdict['additionalParams'] = '' xdict['additionalInputs'] = '' if self.opts.additional_parameters: if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value xdict['additionalInputs'] = '\n'.join(['<param name="%s" value="%s" label="%s" help="%s" type="%s"/>' % \ (x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters]) xdict['additionalParams'] = '\n'.join(['<param name="%s" value="%s" />' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters]) xdict['interpreter_owner'] = self.interpreter_owner xdict['interpreter_version'] = self.interpreter_version xdict['interpreter_pack'] = self.interpreter_pack xdict['interpreter_name'] = self.interpreter_name xdict['requirements'] = '' if self.opts.include_dependencies == "yes": if self.opts.envshpath <> 'system': xdict['requirements'] = self.protorequirements_interpreter % xdict else: xdict['requirements'] = self.protorequirements xdict['tool_version'] = self.opts.tool_version xdict['test1HTML'] = self.test1HTML xdict['test1Output'] = self.test1Output xdict['test1Inputs'] = self.test1Inputs if self.opts.make_HTML and self.opts.output_tab: xdict['tooltests'] = self.tooltestsBoth % xdict elif self.opts.make_HTML: xdict['tooltests'] = self.tooltestsHTMLOnly % xdict else: xdict['tooltests'] = self.tooltestsTabOnly % xdict xdict['script'] = self.escapedScript # configfile is least painful way to embed script to avoid external dependencies # but requires escaping of <, > and $ to avoid Mako parsing if self.opts.help_text: helptext = open(self.opts.help_text,'r').readlines() helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek xdict['help'] = ''.join([x for x in helptext]) else: xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email) coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::'] coda.append('\n') coda.append(self.indentedScript) coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow())) coda.append('See %s for details of that project' % (toolFactoryURL)) coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ') coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n') xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda)) if self.opts.tool_desc: xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc else: xdict['tooldesc'] = '' xdict['command_outputs'] = '' xdict['outputs'] = '' if self.opts.input_tab: cins = ['\n',] cins.append('--input_formats %s' % self.opts.input_formats) cins.append('#for intab in $input1:') cins.append('--input_tab "${intab},${intab.name}"') cins.append('#end for\n') xdict['command_inputs'] = '\n'.join(cins) xdict['inputs'] = '''<param name="input_tab" multiple="true" type="data" format="%s" label="Select one or more %s input files from your history" help="Multiple inputs may be selected assuming the script can deal with them..."/> \n''' % (self.inputFormats,self.inputFormats) else: xdict['command_inputs'] = '' # assume no input - eg a random data generator xdict['inputs'] = '' if (len(self.opts.additional_parameters) > 0): cins = ['\n',] for params in self.opts.additional_parameters: psplit = params.split(',') # name,value... psplit[3] = html_escape(psplit[3]) if self.opts.edit_additional_parameters: psplit[1] = '$%s' % psplit[0] # replace with form value else: psplit[1] = html_escape(psplit[1]) # leave prespecified value cins.append('--additional_parameters """%s"""' % ','.join(psplit)) xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins)) xdict['inputs'] += '<param name="job_name" type="text" size="60" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname xdict['toolname'] = self.toolname xdict['toolid'] = self.toolid xdict['interpreter'] = self.opts.interpreter xdict['scriptname'] = self.sfile if self.opts.make_HTML: xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"' xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n' else: xdict['command_outputs'] += ' --output_dir "./"' if self.opts.output_tab: xdict['command_outputs'] += ' --output_tab "$output1"' xdict['outputs'] += ' <data format="%s" name="output1" label="${job_name}"/>\n' % self.outFormats xdict['command'] = self.newCommand % xdict if self.opts.citations: citationstext = open(self.opts.citations,'r').read() citation_tuples = parse_citations(citationstext) citations_xml = "" for citation_type, citation_content in citation_tuples: citation_xml = """<citation type="%s">%s</citation>""" % (citation_type, html_escape(citation_content)) citations_xml += citation_xml xdict['citations'] = citations_xml else: xdict['citations'] = "" xmls = self.newXML % xdict xf = open(self.xmlfile,'w') xf.write(xmls) xf.write('\n') xf.close() # ready for the tarball def makeTooltar(self): """ a tool is a gz tarball with eg /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ... """ retval = self.run() if retval: print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry' sys.exit(1) tdir = self.toolname os.mkdir(tdir) self.makeXML() if self.opts.help_text: hlp = open(self.opts.help_text,'r').read() else: hlp = 'Please ask the tool author for help as none was supplied at tool generation\n' readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_name, 'interpreter_owner':self.interpreter_owner,'interpreter_pack':self.interpreter_pack} if self.opts.include_dependencies == "yes": if self.opts.envshpath == 'system': tooldepcontent = self.toolhtmldepskel % readme_dict else: tooldepcontent = self.toolhtmldepinterpskel % readme_dict else: tooldepcontent = self.emptytoolhtmldepskel % readme_dict depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w') depf.write(tooldepcontent) depf.write('\n') depf.close() testdir = os.path.join(tdir,'test-data') os.mkdir(testdir) # make tests directory for i,intab in enumerate(self.opts.input_tab): si = self.opts.input_tab[i] if si.find(',') <> -1: s = si.split(',')[0] si = s dest = os.path.join(testdir,os.path.basename(si)) if si <> dest: shutil.copyfile(si,dest) if self.opts.output_tab: shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output)) if self.opts.make_HTML: shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML)) if self.opts.output_dir: shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log')) outpif = '%s.py' % self.toolname # new name outpiname = os.path.join(tdir,outpif) # path for the tool tarball pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM) notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),] notes.append('# to make a new Galaxy tool called %s\n' % self.toolname) notes.append('# User %s at %s\n' % (self.opts.user_email,timenow())) pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm notes += pi outpi = open(outpiname,'w') outpi.write(''.join(notes)) outpi.write('\n') outpi.close() stname = os.path.join(tdir,self.sfile) if not os.path.exists(stname): shutil.copyfile(self.sfile, stname) xtname = os.path.join(tdir,self.xmlfile) if not os.path.exists(xtname): shutil.copyfile(self.xmlfile,xtname) tarpath = "%s.tar.gz" % self.toolname tar = tarfile.open(tarpath, "w:gz") tar.add(tdir,arcname='%s' % self.toolname) tar.close() shutil.copyfile(tarpath,self.opts.new_tool) shutil.rmtree(tdir) ## TODO: replace with optional direct upload to local toolshed? return retval def compressPDF(self,inpdf=None,thumbformat='png'): """need absolute path to pdf note that GS gets confoozled if no $TMP or $TEMP so we set it """ assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) sto = open(hlog,'a') our_env = os.environ.copy() our_tmp = our_env.get('TMP',None) if not our_tmp: our_tmp = our_env.get('TEMP',None) if not (our_tmp and os.path.exists(our_tmp)): newtmp = os.path.join(self.opts.output_dir,'tmp') try: os.mkdir(newtmp) except: sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) our_env['TEMP'] = newtmp if not self.temp_warned: sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) self.temp_warned = True outpdf = '%s_compressed' % inpdf cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) retval1 = x.wait() sto.close() if retval1 == 0: os.unlink(inpdf) shutil.move(outpdf,inpdf) os.unlink(hlog) hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) sto = open(hlog,'w') outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) if self.useGM: cl2 = ['gm', 'convert', inpdf, outpng] else: # assume imagemagick cl2 = ['convert', inpdf, outpng] x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) retval2 = x.wait() sto.close() if retval2 == 0: os.unlink(hlog) retval = retval1 or retval2 return retval def getfSize(self,fpath,outpath): """ format a nice file size string """ size = '' fp = os.path.join(outpath,fpath) if os.path.isfile(fp): size = '0 B' n = float(os.path.getsize(fp)) if n > 2**20: size = '%1.1f MB' % (n/2**20) elif n > 2**10: size = '%1.1f KB' % (n/2**10) elif n > 0: size = '%d B' % (int(n)) return size def makeHtml(self): """ Create an HTML file content to list all the artifacts found in the output_dir """ galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> <title></title> <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> </head> <body> <div class="toolFormBody"> """ galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" galhtmlpostfix = """</div></body></html>\n""" flist = os.listdir(self.opts.output_dir) flist = [x for x in flist if x <> 'Rplots.pdf'] flist.sort() html = [] html.append(galhtmlprefix % progname) html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow())) fhtml = [] if len(flist) > 0: logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections logfiles.sort() logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)] logfiles.append(os.path.abspath(self.tlog)) # make it the last one pdflist = [] npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) for rownum,fname in enumerate(flist): dname,e = os.path.splitext(fname) sfsize = self.getfSize(fname,self.opts.output_dir) if e.lower() == '.pdf' : # compress and make a thumbnail thumb = '%s.%s' % (dname,self.thumbformat) pdff = os.path.join(self.opts.output_dir,fname) retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) if retval == 0: pdflist.append((fname,thumb)) else: pdflist.append((fname,fname)) if (rownum+1) % 2 == 0: fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) else: fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) for logfname in logfiles: # expect at least tlog - if more if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later sectionname = 'All tool run' if (len(logfiles) > 1): sectionname = 'Other' ourpdfs = pdflist else: realname = os.path.basename(logfname) sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove nacross = 1 npdf = len(ourpdfs) if npdf > 0: nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) if int(nacross)**2 != npdf: nacross += 1 nacross = int(nacross) width = min(400,int(1200/nacross)) html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') ntogo = nacross # counter for table row padding with empty cells html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') for i,paths in enumerate(ourpdfs): fname,thumb = paths s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) if ((i+1) % nacross == 0): s += '</tr>\n' ntogo = 0 if i < (npdf - 1): # more to come s += '<tr>' ntogo = nacross else: ntogo -= 1 html.append(s) if html[-1].strip().endswith('</tr>'): html.append('</table></div>\n') else: if ntogo > 0: # pad html.append('<td> </td>'*ntogo) html.append('</tr></table></div>\n') logt = open(logfname,'r').readlines() logtext = [x for x in logt if x.strip() > ''] html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) if len(logtext) > 1: html.append('\n<pre>\n') html += logtext html.append('\n</pre>\n') else: html.append('%s is empty<br/>' % logfname) if len(fhtml) > 0: fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') fhtml.append('</table></div><br/>') html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') html += fhtml # add all non-pdf files to the end of the display else: html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) html.append(galhtmlpostfix) htmlf = file(self.opts.output_html,'w') htmlf.write('\n'.join(html)) htmlf.write('\n') htmlf.close() self.html = html def run(self): """ Some devteam tools have this defensive stderr read so I'm keeping with the faith Feel free to update. """ if self.opts.envshpath <> 'system': shell_source(self.opts.envshpath) # this only happens at tool generation - the generated tool relies on the dependencies all being set up # at toolshed installation by sourcing local env.sh if self.opts.output_dir: ste = open(self.elog,'wb') sto = open(self.tlog,'wb') s = ' '.join(self.cl) sto.write('## Executing Toolfactory generated command line = %s\n' % s) sto.flush() p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir) retval = p.wait() sto.close() ste.close() tmp_stderr = open( self.elog, 'rb' ) err = '' buffsize = 1048576 try: while True: err += tmp_stderr.read( buffsize ) if not err or len( err ) % buffsize != 0: break except OverflowError: pass tmp_stderr.close() else: p = subprocess.Popen(self.cl,shell=False) retval = p.wait() if self.opts.output_dir: if retval <> 0 and err: # problem print >> sys.stderr,err if self.opts.make_HTML: self.makeHtml() return retval def main(): u = """ This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" </command> """ op = optparse.OptionParser() a = op.add_option a('--script_path',default=None) a('--tool_name',default=None) a('--interpreter',default=None) a('--output_dir',default='./') a('--output_html',default=None) a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs a("--input_formats",default="tabular") a('--output_tab',default=None) a('--output_format',default='tabular') a('--user_email',default='Unknown') a('--bad_user',default=None) a('--make_Tool',default=None) a('--make_HTML',default=None) a('--help_text',default=None) a('--tool_desc',default=None) a('--new_tool',default=None) a('--tool_version',default=None) a('--include_dependencies',default=None) a('--citations',default=None) a('--additional_parameters', dest='additional_parameters', action='append', default=[]) a('--edit_additional_parameters', action="store_true", default=False) a('--envshpath',default="system") opts, args = op.parse_args() assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq' assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript' assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' if opts.output_dir: try: os.makedirs(opts.output_dir) except: pass opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab] for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','') r = ScriptRunner(opts) if opts.make_Tool: retcode = r.makeTooltar() else: retcode = r.run() os.unlink(r.sfile) if retcode: sys.exit(retcode) # indicate failure to job runner if __name__ == "__main__": main()