Mercurial > repos > fubar > tool_factory_2
diff toolfactory/rgToolFactory2.py @ 119:8ea1133b9d9a draft
Uploaded
author | fubar |
---|---|
date | Tue, 05 Jan 2021 00:34:48 +0000 |
parents | e43c43396a70 |
children |
line wrap: on
line diff
--- a/toolfactory/rgToolFactory2.py Fri Dec 11 04:23:48 2020 +0000 +++ b/toolfactory/rgToolFactory2.py Tue Jan 05 00:34:48 2021 +0000 @@ -13,18 +13,9 @@ # 1. Fix the toolfactory so it works - done for simplest case # 2. Fix planemo so the toolfactory function works # 3. Rewrite bits using galaxyxml functions where that makes sense - done -# -# uses planemo in a biodocker sort of image as a requirement -# otherwise planemo seems to leak dependencies back into the -# calling venv. Hilarity ensues. - - import argparse import copy -import datetime -import grp -import json import logging import os import re @@ -35,12 +26,9 @@ import tempfile import time - from bioblend import ConnectionError from bioblend import toolshed -import docker - import galaxyxml.tool as gxt import galaxyxml.tool.parameters as gxtp @@ -54,8 +42,9 @@ toolFactoryURL = "https://github.com/fubar2/toolfactory" ourdelim = "~~~" -# --input_files="$intab.input_files~~~$intab.input_CL~~~$intab.input_formats\ -#~~~$intab.input_label~~~$intab.input_help" +# --input_files="$intab.input_files~~~$intab.input_CL~~~ +# $intab.input_formats# ~~~$intab.input_label +# ~~~$intab.input_help" IPATHPOS = 0 ICLPOS = 1 IFMTPOS = 2 @@ -63,7 +52,8 @@ IHELPOS = 4 IOCLPOS = 5 -# --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test" +# --output_files "$otab.history_name~~~$otab.history_format~~~ +# $otab.history_CL~~~$otab.history_test" ONAMEPOS = 0 OFMTPOS = 1 OCLPOS = 2 @@ -72,7 +62,8 @@ # --additional_parameters="$i.param_name~~~$i.param_value~~~ -# $i.param_label~~~$i.param_help~~~$i.param_type~~~$i.CL~~~i$.param_CLoverride" +# $i.param_label~~~$i.param_help~~~$i.param_type +# ~~~$i.CL~~~i$.param_CLoverride" ANAMEPOS = 0 AVALPOS = 1 ALABPOS = 2 @@ -106,13 +97,21 @@ return '"%s"' % s -html_escape_table = {"&": "&", ">": ">", "<": "<", "$": r"\$","#":"#", "$":"$"} -cheetah_escape_table = {"$": "\$","#":"\#"} +html_escape_table = { + "&": "&", + ">": ">", + "<": "<", + "#": "#", + "$": "$", +} +cheetah_escape_table = {"$": "\\$", "#": "\\#"} + def html_escape(text): """Produce entities within text.""" return "".join([html_escape_table.get(c, c) for c in text]) + def cheetah_escape(text): """Produce entities within text.""" return "".join([cheetah_escape_table.get(c, c) for c in text]) @@ -124,8 +123,8 @@ t = t.replace(">", ">") t = t.replace("<", "<") t = t.replace("\\$", "$") - t = t.replace("$","$") - t = t.replace("#","#") + t = t.replace("$", "$") + t = t.replace("#", "#") return t @@ -137,7 +136,9 @@ if citation.startswith("doi"): citation_tuples.append(("doi", citation[len("doi") :].strip())) else: - citation_tuples.append(("bibtex", citation[len("bibtex") :].strip())) + citation_tuples.append( + ("bibtex", citation[len("bibtex") :].strip()) + ) return citation_tuples @@ -168,7 +169,9 @@ self.executeme = self.args.sysexe else: if self.args.packages: - self.executeme = self.args.packages.split(",")[0].split(":")[0] + self.executeme = ( + self.args.packages.split(",")[0].split(":")[0].strip() + ) else: self.executeme = None aCL = self.cl.append @@ -226,8 +229,12 @@ else: aCL(self.executeme) aXCL(self.executeme) - self.elog = os.path.join(self.repdir, "%s_error_log.txt" % self.tool_name) - self.tlog = os.path.join(self.repdir, "%s_runner_log.txt" % self.tool_name) + self.elog = os.path.join( + self.repdir, "%s_error_log.txt" % self.tool_name + ) + self.tlog = os.path.join( + self.repdir, "%s_runner_log.txt" % self.tool_name + ) if self.args.parampass == "0": self.clsimple() @@ -235,15 +242,15 @@ clsuffix = [] xclsuffix = [] for i, p in enumerate(self.infiles): - if p[IOCLPOS] == "STDIN": + if p[IOCLPOS].upper() == "STDIN": appendme = [ - p[IOCLPOS], + p[ICLPOS], p[ICLPOS], p[IPATHPOS], "< %s" % p[IPATHPOS], ] xappendme = [ - p[IOCLPOS], + p[ICLPOS], p[ICLPOS], p[IPATHPOS], "< $%s" % p[ICLPOS], @@ -258,10 +265,14 @@ self.lastclredirect = [">", p[ONAMEPOS]] self.lastxclredirect = [">", "$%s" % p[OCLPOS]] else: - clsuffix.append([p[ONAMEPOS], p[ONAMEPOS], p[ONAMEPOS], ""]) - xclsuffix.append([p[ONAMEPOS], p[ONAMEPOS], "$%s" % p[ONAMEPOS], ""]) + clsuffix.append([p[OCLPOS], p[ONAMEPOS], p[ONAMEPOS], ""]) + xclsuffix.append( + [p[OCLPOS], p[ONAMEPOS], "$%s" % p[ONAMEPOS], ""] + ) for p in self.addpar: - clsuffix.append([p[AOCLPOS], p[ACLPOS], p[AVALPOS], p[AOVERPOS]]) + clsuffix.append( + [p[AOCLPOS], p[ACLPOS], p[AVALPOS], p[AOVERPOS]] + ) xclsuffix.append( [p[AOCLPOS], p[ACLPOS], '"$%s"' % p[ANAMEPOS], p[AOVERPOS]] ) @@ -290,52 +301,58 @@ self.spacedScript = [f" {x}" for x in rx if x.strip() > ""] art = "%s.%s" % (self.tool_name, self.executeme) artifact = open(art, "wb") - artifact.write(bytes('\n'.join(self.escapedScript),'utf8')) + artifact.write(bytes("\n".join(self.escapedScript), "utf8")) artifact.close() def cleanuppar(self): """ positional parameters are complicated by their numeric ordinal""" - for i, p in enumerate(self.infiles): - infp = copy.copy(p) - if self.args.parampass == "positional": - assert infp[ - ICLPOS - ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( - infp[ICLPOS], - infp[ILABPOS], + if self.args.parampass == "positional": + for i, p in enumerate(self.infiles): + assert ( + p[ICLPOS].isdigit() or p[ICLPOS].strip().upper() == "STDIN" + ), "Positional parameters must be ordinal integers - got %s for %s" % ( + p[ICLPOS], + p[ILABPOS], ) - icl = infp[ICLPOS] - infp.append(icl) - if infp[ICLPOS].isdigit() or self.args.parampass == "0": - scl = "input%d" % (i + 1) - infp[ICLPOS] = scl - self.infiles[i] = infp - for i, p in enumerate( - self.outfiles - ): - if self.args.parampass == "positional" and p[OCLPOS].upper() != "STDOUT": - assert p[ - OCLPOS - ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( + for i, p in enumerate(self.outfiles): + assert ( + p[OCLPOS].isdigit() + or p[OCLPOS].strip().upper() == "STDOUT" + ), "Positional parameters must be ordinal integers - got %s for %s" % ( p[OCLPOS], p[ONAMEPOS], ) - p.append(p[OCLPOS]) # keep copy - if p[OOCLPOS].isdigit() or p[OOCLPOS].upper() == "STDOUT": - scl = p[ONAMEPOS] - p[OCLPOS] = scl - self.outfiles[i] = p - for i, p in enumerate(self.addpar): - if self.args.parampass == "positional": + for i, p in enumerate(self.addpar): assert p[ ACLPOS ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( p[ACLPOS], p[ANAMEPOS], ) + for i, p in enumerate(self.infiles): + infp = copy.copy(p) + icl = infp[ICLPOS] + infp.append(icl) + if ( + infp[ICLPOS].isdigit() + or self.args.parampass == "0" + or infp[ICLPOS].strip().upper() == "STDOUT" + ): + scl = "input%d" % (i + 1) + infp[ICLPOS] = scl + self.infiles[i] = infp + for i, p in enumerate(self.outfiles): + p.append(p[OCLPOS]) # keep copy + if ( + p[OOCLPOS].isdigit() and self.args.parampass != "positional" + ) or p[OOCLPOS].strip().upper() == "STDOUT": + scl = p[ONAMEPOS] + p[OCLPOS] = scl + self.outfiles[i] = p + for i, p in enumerate(self.addpar): p.append(p[ACLPOS]) if p[ACLPOS].isdigit(): - scl = "input%s" % p[ACLPOS] + scl = "param%s" % p[ACLPOS] p[ACLPOS] = scl self.addpar[i] = p @@ -370,7 +387,6 @@ aXCL(self.lastxclredirect[0]) aXCL(self.lastxclredirect[1]) - def clargparse(self): """argparse style""" aCL = self.cl.append @@ -396,7 +412,6 @@ aCL(k) aCL(v) - def getNdash(self, newname): if self.is_positional: ndash = 0 @@ -408,11 +423,17 @@ def doXMLparam(self): """flake8 made me do this...""" - for p in self.outfiles: # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test" + for ( + p + ) in ( + self.outfiles + ): # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test" newname, newfmt, newcl, test, oldcl = p test = test.strip() ndash = self.getNdash(newcl) - aparm = gxtp.OutputData(name=newname, format=newfmt, num_dashes=ndash, label=newcl) + aparm = gxtp.OutputData( + name=newname, format=newfmt, num_dashes=ndash, label=newcl + ) aparm.positional = self.is_positional if self.is_positional: if oldcl.upper() == "STDOUT": @@ -430,30 +451,30 @@ if test.split(":")[1].isdigit: ld = int(test.split(":")[1]) tp = gxtp.TestOutput( - name=newcl, - value="%s_sample" % newcl, - format=newfmt, - compare= c, - lines_diff=ld, - ) + name=newname, + value="%s_sample" % newname, + format=newfmt, + compare=c, + lines_diff=ld, + ) elif test.startswith("sim_size"): c = "sim_size" tn = test.split(":")[1].strip() - if tn > '': - if '.' in tn: + if tn > "": + if "." in tn: delta = None - delta_frac = min(1.0,float(tn)) + delta_frac = min(1.0, float(tn)) else: delta = int(tn) delta_frac = None tp = gxtp.TestOutput( - name=newcl, - value="%s_sample" % newcl, - format=newfmt, - compare= c, - delta = delta, - delta_frac = delta_frac - ) + name=newname, + value="%s_sample" % newname, + format=newfmt, + compare=c, + delta=delta, + delta_frac=delta_frac, + ) self.testparam.append(tp) for p in self.infiles: newname = p[ICLPOS] @@ -477,7 +498,16 @@ tparm = gxtp.TestParam(name=newname, value="%s_sample" % newname) self.testparam.append(tparm) for p in self.addpar: - newname, newval, newlabel, newhelp, newtype, newcl, override, oldcl = p + ( + newname, + newval, + newlabel, + newhelp, + newtype, + newcl, + override, + oldcl, + ) = p if not len(newlabel) > 0: newlabel = newname ndash = self.getNdash(newname) @@ -563,7 +593,9 @@ Hmmm. How to get the command line into correct order... """ if self.command_override: - self.newtool.command_override = self.command_override # config file + self.newtool.command_override = ( + self.command_override + ) # config file else: self.newtool.command_override = self.xmlcl if self.args.help_text: @@ -571,14 +603,14 @@ safertext = "\n".join([cheetah_escape(x) for x in helptext]) if self.args.script_path: scr = [x for x in self.spacedScript if x.strip() > ""] - scr.insert(0,'\n------\n\n\nScript::\n') + scr.insert(0, "\n------\n\n\nScript::\n") if len(scr) > 300: scr = ( scr[:100] + [" >300 lines - stuff deleted", " ......"] + scr[-100:] ) - scr.append('\n') + scr.append("\n") safertext = safertext + "\n".join(scr) self.newtool.help = safertext else: @@ -591,9 +623,9 @@ requirements = gxtp.Requirements() if self.args.packages: for d in self.args.packages.split(","): - ver = '' - d = d.replace('==',':') - d = d.replace('=',':') + ver = "" + d = d.replace("==", ":") + d = d.replace("=", ":") if ":" in d: packg, ver = d.split(":") else: @@ -610,7 +642,11 @@ self.newtool.inputs = self.tinputs if self.args.script_path: configfiles = gxtp.Configfiles() - configfiles.append(gxtp.Configfile(name="runme", text="\n".join(self.escapedScript))) + configfiles.append( + gxtp.Configfile( + name="runme", text="\n".join(self.escapedScript) + ) + ) self.newtool.configfiles = configfiles tests = gxtp.Tests() test_a = gxtp.Test() @@ -627,7 +663,9 @@ "Cite: Creating re-usable tools from scripts doi:10.1093/bioinformatics/bts573" ) exml0 = self.newtool.export() - exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted + exml = exml0.replace( + FAKEEXE, "" + ) # temporary work around until PR accepted if ( self.test_override ): # cannot do this inside galaxyxml as it expects lxml objects for tests @@ -635,7 +673,7 @@ part2 = exml.split("</tests>")[1] fixed = "%s\n%s\n%s" % (part1, self.test_override, part2) exml = fixed - #exml = exml.replace('range="1:"', 'range="1000:"') + # exml = exml.replace('range="1:"', 'range="1000:"') xf = open("%s.xml" % self.tool_name, "w") xf.write(exml) xf.write("\n") @@ -657,14 +695,17 @@ else: ste = open(self.elog, "w") if self.lastclredirect: - sto = open(self.lastclredirect[1], "wb") # is name of an output file + sto = open( + self.lastclredirect[1], "wb" + ) # is name of an output file else: if os.path.exists(self.tlog): sto = open(self.tlog, "a") else: sto = open(self.tlog, "w") sto.write( - "## Executing Toolfactory generated command line = %s\n" % scl + "## Executing Toolfactory generated command line = %s\n" + % scl ) sto.flush() subp = subprocess.run( @@ -685,7 +726,9 @@ subp = subprocess.run( self.cl, env=self.ourenv, shell=False, stdout=sto, stdin=sti ) - sto.write("## Executing Toolfactory generated command line = %s\n" % scl) + sto.write( + "## Executing Toolfactory generated command line = %s\n" % scl + ) retval = subp.returncode sto.close() sti.close() @@ -698,112 +741,6 @@ logging.debug("run done") return retval - def copy_to_container(self, src, dest, container): - """Recreate the src directory tree at dest - full path included""" - idir = os.getcwd() - workdir = os.path.dirname(src) - os.chdir(workdir) - _, tfname = tempfile.mkstemp(suffix=".tar") - tar = tarfile.open(tfname, mode="w") - srcb = os.path.basename(src) - tar.add(srcb) - tar.close() - data = open(tfname, "rb").read() - container.put_archive(dest, data) - os.unlink(tfname) - os.chdir(idir) - - def copy_from_container(self, src, dest, container): - """recreate the src directory tree at dest using docker sdk""" - os.makedirs(dest, exist_ok=True) - _, tfname = tempfile.mkstemp(suffix=".tar") - tf = open(tfname, "wb") - bits, stat = container.get_archive(src) - for chunk in bits: - tf.write(chunk) - tf.close() - tar = tarfile.open(tfname, "r") - tar.extractall(dest) - tar.close() - os.unlink(tfname) - - def planemo_biodocker_test(self): - """planemo currently leaks dependencies if used in the same container and gets unhappy after a - first successful run. https://github.com/galaxyproject/planemo/issues/1078#issuecomment-731476930 - - Docker biocontainer has planemo with caches filled to save repeated downloads - - - """ - - def prun(container, tout, cl, user="biodocker"): - rlog = container.exec_run(cl, user=user) - slogl = str(rlog).split("\\n") - slog = "\n".join(slogl) - tout.write(f"## got rlog {slog} from {cl}\n") - - if os.path.exists(self.tlog): - tout = open(self.tlog, "a") - else: - tout = open(self.tlog, "w") - planemoimage = "quay.io/fubar2/planemo-biocontainer" - xreal = "%s.xml" % self.tool_name - repname = f"{self.tool_name}_planemo_test_report.html" - ptestrep_path = os.path.join(self.repdir, repname) - tool_name = self.tool_name - client = docker.from_env() - tvol = client.volumes.create() - tvolname = tvol.name - destdir = "/toolfactory/ptest" - imrep = os.path.join(destdir, repname) - # need to keep the container running so keep it open with sleep - # will stop and destroy it when we are done - container = client.containers.run( - planemoimage, - "sleep 120m", - detach=True, - user="biodocker", - volumes={f"{tvolname}": {"bind": "/toolfactory", "mode": "rw"}}, - ) - cl = f"mkdir -p {destdir}" - prun(container, tout, cl, user="root") - # that's how hard it is to get root on a biodocker container :( - cl = f"rm -rf {destdir}/*" - prun(container, tout, cl, user="root") - ptestpath = os.path.join(destdir, "tfout", xreal) - self.copy_to_container(self.tooloutdir, destdir, container) - cl = "chown -R biodocker /toolfactory" - prun(container, tout, cl, user="root") - rlog = container.exec_run(f"ls -la {destdir}") - ptestcl = f"planemo test --update_test_data --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}" - try: - rlog = container.exec_run(ptestcl) - # fails because test outputs missing but updates the test-data directory - except: - e = sys.exc_info()[0] - tout.write(f"#### error: {e} from {ptestcl}\n") - cl = f"planemo test --test_output {imrep} --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}" - try: - prun(container, tout, cl) - except: - e = sys.exc_info()[0] - tout.write(f"#### error: {e} from {ptestcl}\n") - testouts = tempfile.mkdtemp(suffix=None, prefix="tftemp", dir=".") - self.copy_from_container(destdir, testouts, container) - src = os.path.join(testouts, "ptest") - if os.path.isdir(src): - shutil.copytree(src, ".", dirs_exist_ok=True) - src = repname - if os.path.isfile(repname): - shutil.copyfile(src, ptestrep_path) - else: - tout.write(f"No output from run to shutil.copytree in {src}\n") - tout.close() - container.stop() - container.remove() - tvol.remove() - shutil.rmtree(testouts) # leave for debugging - def shedLoad(self): """ use bioblend to create new repository @@ -816,7 +753,9 @@ sto = open(self.tlog, "w") ts = toolshed.ToolShedInstance( - url=self.args.toolshed_url, key=self.args.toolshed_api_key, verify=False + url=self.args.toolshed_url, + key=self.args.toolshed_api_key, + verify=False, ) repos = ts.repositories.get_repositories() rnames = [x.get("name", "?") for x in repos] @@ -840,7 +779,9 @@ category_ids=catID, ) tid = res.get("id", None) - sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n") + sto.write( + f"#create_repository {self.args.tool_name} tid={tid} res={res}\n" + ) else: i = rnames.index(self.tool_name) tid = rids[i] @@ -882,16 +823,20 @@ ] tout.write("running\n%s\n" % " ".join(cll)) subp = subprocess.run( - cll, env=self.ourenv, cwd=self.ourcwd, shell=False, stderr=tout, stdout=tout + cll, + env=self.ourenv, + cwd=self.ourcwd, + shell=False, + stderr=tout, + stdout=tout, ) tout.write( - "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode) + "installed %s - got retcode %d\n" + % (self.tool_name, subp.returncode) ) tout.close() return subp.returncode - - def writeShedyml(self): """for planemo""" yuser = self.args.user_email.split("@")[0] @@ -950,7 +895,11 @@ % (tdest, self.testdir) ) tf = tarfile.open(self.newtarpath, "w:gz") - tf.add(name=self.tooloutdir, arcname=self.tool_name, filter=exclude_function) + tf.add( + name=self.tooloutdir, + arcname=self.tool_name, + filter=exclude_function, + ) tf.close() shutil.copyfile(self.newtarpath, self.args.new_tool) @@ -990,7 +939,8 @@ def main(): """ - This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml + This is a Galaxy wrapper. + It expects to be called by a special purpose tool.xml """ parser = argparse.ArgumentParser() @@ -1020,35 +970,48 @@ a("--new_tool", default="new_tool") a("--galaxy_url", default="http://localhost:8080") a("--toolshed_url", default="http://localhost:9009") - # make sure this is identical to tool_sheds_conf.xml localhost != 127.0.0.1 so validation fails + # make sure this is identical to tool_sheds_conf.xml + # localhost != 127.0.0.1 so validation fails a("--toolshed_api_key", default="fakekey") a("--galaxy_api_key", default="fakekey") a("--galaxy_root", default="/galaxy-central") a("--galaxy_venv", default="/galaxy_venv") args = parser.parse_args() assert not args.bad_user, ( - 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file' + 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy \ +admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file' % (args.bad_user, args.bad_user) ) - assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq" + assert ( + args.tool_name + ), "## Tool Factory expects a tool name - eg --tool_name=DESeq" assert ( args.sysexe or args.packages - ), "## Tool Factory wrapper expects an interpreter or an executable package" - args.input_files = [x.replace('"', "").replace("'", "") for x in args.input_files] + ), "## Tool Factory wrapper expects an interpreter \ +or an executable package in --sysexe or --packages" + args.input_files = [ + x.replace('"', "").replace("'", "") for x in args.input_files + ] # remove quotes we need to deal with spaces in CL params for i, x in enumerate(args.additional_parameters): - args.additional_parameters[i] = args.additional_parameters[i].replace('"', "") + args.additional_parameters[i] = args.additional_parameters[i].replace( + '"', "" + ) r = ScriptRunner(args) r.writeShedyml() r.makeTool() if args.make_Tool == "generate": - retcode = r.run() # for testing toolfactory itself + retcode = r.run() r.moveRunOutputs() r.makeToolTar() else: - r.planemo_biodocker_test() # test to make outputs and then test + retcode = r.planemo_test(genoutputs=True) # this fails :( - see PR r.moveRunOutputs() r.makeToolTar() + retcode = r.planemo_test(genoutputs=False) + r.moveRunOutputs() + r.makeToolTar() + print(f"second planemo_test returned {retcode}") if args.make_Tool == "gentestinstall": r.shedLoad() r.eph_galaxy_load()