comparison toolfactory/ToolFactory.py @ 4:2a46da701dde draft

Uploaded
author fubar
date Mon, 26 Apr 2021 05:25:26 +0000
parents
children e2c8c2fa192d
comparison
equal deleted inserted replaced
3:c4f192ec521c 4:2a46da701dde
1
2 # see https://github.com/fubar2/toolfactory
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at
9 # https://github.com/fubar2/toolfactory
10 #
11 # April 2021: Refactored into two tools - generate and test/install
12 # as part of GTN tutorial development and biocontainer adoption
13 # The tester runs planemo on a non-tested archive, creates the test outputs
14 # and returns a new proper tool with test.
15 # The tester was generated from the ToolFactory_tester.py script
16
17
18 import argparse
19 import copy
20 import json
21 import logging
22 import os
23 import re
24 import shlex
25 import shutil
26 import subprocess
27 import sys
28 import tarfile
29 import tempfile
30 import time
31
32 from bioblend import ConnectionError
33 from bioblend import galaxy
34 from bioblend import toolshed
35
36 import galaxyxml.tool as gxt
37 import galaxyxml.tool.parameters as gxtp
38
39 import lxml.etree as ET
40
41 import yaml
42
43 myversion = "V2.3 April 2021"
44 verbose = True
45 debug = True
46 toolFactoryURL = "https://github.com/fubar2/toolfactory"
47 FAKEEXE = "~~~REMOVE~~~ME~~~"
48 # need this until a PR/version bump to fix galaxyxml prepending the exe even
49 # with override.
50
51
52 def timenow():
53 """return current time as a string"""
54 return time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
55
56 cheetah_escape_table = {"$": "\\$", "#": "\\#"}
57
58 def cheetah_escape(text):
59 """Produce entities within text."""
60 return "".join([cheetah_escape_table.get(c, c) for c in text])
61
62 def parse_citations(citations_text):
63 """"""
64 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
65 citation_tuples = []
66 for citation in citations:
67 if citation.startswith("doi"):
68 citation_tuples.append(("doi", citation[len("doi") :].strip()))
69 else:
70 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip()))
71 return citation_tuples
72
73 class ToolTester():
74 # requires highly insecure docker settings - like write to tool_conf.xml and to tools !
75 # if in a container possibly not so courageous.
76 # Fine on your own laptop but security red flag for most production instances
77 # uncompress passed tar, run planemo and rebuild a new tarball with tests
78
79 def __init__(self, report_dir, in_tool_archive, new_tool_archive, include_tests, galaxy_root):
80 self.new_tool_archive = new_tool_archive
81 self.include_tests = include_tests
82 self.galaxy_root = galaxy_root
83 self.repdir = report_dir
84 assert in_tool_archive and tarfile.is_tarfile(in_tool_archive)
85 # this is not going to go well with arbitrary names. TODO introspect tool xml!
86 tff = tarfile.open(in_tool_archive, "r:*")
87 flist = tff.getnames()
88 ourdir = os.path.commonpath(flist) # eg pyrevpos
89 self.tool_name = ourdir
90 ourxmls = [x for x in flist if x.lower().endswith('.xml') and os.path.split(x)[0] == ourdir]
91 # planemo_test/planemo_test.xml
92 assert len(ourxmls) > 0
93 self.ourxmls = ourxmls # [os.path.join(tool_path,x) for x in ourxmls]
94 res = tff.extractall()
95 tff.close()
96 self.update_tests(ourdir)
97 self.tooloutdir = ourdir
98 self.testdir = os.path.join(self.tooloutdir, "test-data")
99 if not os.path.exists(self.tooloutdir):
100 os.mkdir(self.tooloutdir)
101 if not os.path.exists(self.testdir):
102 os.mkdir(self.testdir)
103 if not os.path.exists(self.repdir):
104 os.mkdir(self.repdir)
105 if not os.path.exists(self.tooloutdir):
106 os.mkdir(self.tooloutdir)
107 if not os.path.exists(self.testdir):
108 os.mkdir(self.testdir)
109 if not os.path.exists(self.repdir):
110 os.mkdir(self.repdir)
111 self.moveRunOutputs()
112 self.makeToolTar()
113
114 def call_planemo(self,xmlpath,ourdir):
115 penv = os.environ
116 penv['HOME'] = os.path.join(self.galaxy_root,'planemo')
117 #penv["GALAXY_VIRTUAL_ENV"] = os.path.join(penv['HOME'],'.planemo','gx_venv_3.9')
118 penv["PIP_CACHE_DIR"] = os.path.join(self.galaxy_root,'pipcache')
119 toolfile = os.path.split(xmlpath)[1]
120 tool_name = self.tool_name
121 tool_test_output = os.path.join(self.repdir, f"{tool_name}_planemo_test_report.html")
122 cll = ["planemo",
123 "test",
124 #"--job_config_file",
125 # os.path.join(self.galaxy_root,"config","job_conf.xml"),
126 #"--galaxy_python_version",
127 #"3.9",
128 "--test_output",
129 os.path.abspath(tool_test_output),
130 "--galaxy_root",
131 self.galaxy_root,
132 "--update_test_data",
133 os.path.abspath(xmlpath),
134 ]
135 print("Call planemo cl =", cll)
136 p = subprocess.run(
137 cll,
138 capture_output=True,
139 encoding='utf8',
140 env = penv,
141 shell=False,
142 )
143 return p
144
145 def makeToolTar(self):
146 """move outputs into test-data and prepare the tarball"""
147 excludeme = "_planemo_test_report.html"
148
149 def exclude_function(tarinfo):
150 filename = tarinfo.name
151 return None if filename.endswith(excludeme) else tarinfo
152
153 newtar = 'new_%s_toolshed.gz' % self.tool_name
154 ttf = tarfile.open(newtar, "w:gz")
155 ttf.add(name=self.tooloutdir,
156 arcname=self.tool_name,
157 filter=exclude_function)
158 ttf.close()
159 shutil.copyfile(newtar, self.new_tool_archive)
160
161 def move_One(self,scandir):
162 with os.scandir('.') as outs:
163 for entry in outs:
164 newname = entry.name
165 if not entry.is_file() or entry.name.endswith('_sample'):
166 continue
167 if not (entry.name.endswith('.html') or entry.name.endswith('.gz') or entry.name.endswith(".tgz")):
168 fname, ext = os.path.splitext(entry.name)
169 if len(ext) > 1:
170 newname = f"{fname}_{ext[1:]}.txt"
171 else:
172 newname = f"{fname}.txt"
173 dest = os.path.join(self.repdir, newname)
174 src = entry.name
175 shutil.copyfile(src, dest)
176
177 def moveRunOutputs(self):
178 """need to move planemo or run outputs into toolfactory collection"""
179 self.move_One(self.tooloutdir)
180 self.move_One('.')
181 if self.include_tests:
182 self.move_One(self.testdir)
183
184 def update_tests(self,ourdir):
185 for xmlf in self.ourxmls:
186 capture = self.call_planemo(xmlf,ourdir)
187 logf = open(f"%s_run_report" % (self.tool_name),'w')
188 logf.write("stdout:")
189 logf.write(capture.stdout)
190 logf.write("stderr:")
191 logf.write(capture.stderr)
192
193
194 class ToolConfUpdater():
195 # update config/tool_conf.xml with a new tool unpacked in /tools
196 # requires highly insecure docker settings - like write to tool_conf.xml and to tools !
197 # if in a container possibly not so courageous.
198 # Fine on your own laptop but security red flag for most production instances
199
200 def __init__(self, args, tool_conf_path, new_tool_archive_path, new_tool_name, tool_dir):
201 self.args = args
202 self.tool_conf_path = tool_conf_path
203 self.our_name = 'ToolFactory'
204 tff = tarfile.open(new_tool_archive_path, "r:*")
205 flist = tff.getnames()
206 ourdir = os.path.commonpath(flist) # eg pyrevpos
207 self.tool_id = ourdir # they are the same for TF tools
208 ourxml = [x for x in flist if x.lower().endswith('.xml')]
209 res = tff.extractall(tool_dir)
210 tff.close()
211 self.update_toolconf(ourdir,ourxml)
212
213 def install_deps(self):
214 gi = galaxy.GalaxyInstance(url=self.args.galaxy_url, key=self.args.galaxy_api_key)
215 x = gi.tools.install_dependencies(self.tool_id)
216 print(f"Called install_dependencies on {self.tool_id} - got {x}")
217
218 def update_toolconf(self,ourdir,ourxml): # path is relative to tools
219 updated = False
220 tree = ET.parse(self.tool_conf_path)
221 root = tree.getroot()
222 hasTF = False
223 TFsection = None
224 for e in root.findall('section'):
225 if e.attrib['name'] == self.our_name:
226 hasTF = True
227 TFsection = e
228 if not hasTF:
229 TFsection = ET.Element('section')
230 root.insert(0,TFsection) # at the top!
231 our_tools = TFsection.findall('tool')
232 conf_tools = [x.attrib['file'] for x in our_tools]
233 for xml in ourxml: # may be > 1
234 if not xml in conf_tools: # new
235 updated = True
236 ET.SubElement(TFsection, 'tool', {'file':xml})
237 ET.indent(tree)
238 tree.write(self.tool_conf_path, pretty_print=True)
239 if False and self.args.packages and self.args.packages > '':
240 self.install_deps()
241
242 class ScriptRunner:
243 """Wrapper for an arbitrary script
244 uses galaxyxml
245
246 """
247
248 def __init__(self, args=None): # noqa
249 """
250 prepare command line cl for running the tool here
251 and prepare elements needed for galaxyxml tool generation
252 """
253 self.ourcwd = os.getcwd()
254 self.collections = []
255 if len(args.collection) > 0:
256 try:
257 self.collections = [
258 json.loads(x) for x in args.collection if len(x.strip()) > 1
259 ]
260 except Exception:
261 print(
262 f"--collections parameter {str(args.collection)} is malformed - should be a dictionary"
263 )
264 try:
265 self.infiles = [
266 json.loads(x) for x in args.input_files if len(x.strip()) > 1
267 ]
268 except Exception:
269 print(
270 f"--input_files parameter {str(args.input_files)} is malformed - should be a dictionary"
271 )
272 try:
273 self.outfiles = [
274 json.loads(x) for x in args.output_files if len(x.strip()) > 1
275 ]
276 except Exception:
277 print(
278 f"--output_files parameter {args.output_files} is malformed - should be a dictionary"
279 )
280 try:
281 self.addpar = [
282 json.loads(x) for x in args.additional_parameters if len(x.strip()) > 1
283 ]
284 except Exception:
285 print(
286 f"--additional_parameters {args.additional_parameters} is malformed - should be a dictionary"
287 )
288 try:
289 self.selpar = [
290 json.loads(x) for x in args.selecttext_parameters if len(x.strip()) > 1
291 ]
292 except Exception:
293 print(
294 f"--selecttext_parameters {args.selecttext_parameters} is malformed - should be a dictionary"
295 )
296 self.args = args
297 self.cleanuppar()
298 self.lastclredirect = None
299 self.lastxclredirect = None
300 self.cl = []
301 self.xmlcl = []
302 self.is_positional = self.args.parampass == "positional"
303 if self.args.sysexe:
304 if ' ' in self.args.sysexe:
305 self.executeme = self.args.sysexe.split(' ')
306 else:
307 self.executeme = [self.args.sysexe, ]
308 else:
309 if self.args.packages:
310 self.executeme = [self.args.packages.split(",")[0].split(":")[0].strip(), ]
311 else:
312 self.executeme = None
313 aCL = self.cl.append
314 aXCL = self.xmlcl.append
315 assert args.parampass in [
316 "0",
317 "argparse",
318 "positional",
319 ], 'args.parampass must be "0","positional" or "argparse"'
320 self.tool_name = re.sub("[^a-zA-Z0-9_]+", "", args.tool_name)
321 self.tool_id = self.tool_name
322 self.newtool = gxt.Tool(
323 self.tool_name,
324 self.tool_id,
325 self.args.tool_version,
326 self.args.tool_desc,
327 FAKEEXE,
328 )
329 self.newtarpath = "%s_toolshed.gz" % self.tool_name
330 self.tooloutdir = "./tfout"
331 self.repdir = "./TF_run_report"
332 self.testdir = os.path.join(self.tooloutdir, "test-data")
333 if not os.path.exists(self.tooloutdir):
334 os.mkdir(self.tooloutdir)
335 if not os.path.exists(self.testdir):
336 os.mkdir(self.testdir)
337 if not os.path.exists(self.repdir):
338 os.mkdir(self.repdir)
339 self.tinputs = gxtp.Inputs()
340 self.toutputs = gxtp.Outputs()
341 self.testparam = []
342 if self.args.script_path:
343 self.prepScript()
344 if self.args.command_override:
345 scos = open(self.args.command_override, "r").readlines()
346 self.command_override = [x.rstrip() for x in scos]
347 else:
348 self.command_override = None
349 if self.args.test_override:
350 stos = open(self.args.test_override, "r").readlines()
351 self.test_override = [x.rstrip() for x in stos]
352 else:
353 self.test_override = None
354 if self.args.script_path:
355 for ex in self.executeme:
356 aCL(ex)
357 aXCL(ex)
358 aCL(self.sfile)
359 aXCL("$runme")
360 else:
361 for ex in self.executeme:
362 aCL(ex)
363 aXCL(ex)
364
365 if self.args.parampass == "0":
366 self.clsimple()
367 else:
368 if self.args.parampass == "positional":
369 self.prepclpos()
370 self.clpositional()
371 else:
372 self.prepargp()
373 self.clargparse()
374
375 def clsimple(self):
376 """no parameters or repeats - uses < and > for i/o"""
377 aCL = self.cl.append
378 aXCL = self.xmlcl.append
379 if len(self.infiles) > 0:
380 aCL("<")
381 aCL(self.infiles[0]["infilename"])
382 aXCL("<")
383 aXCL("$%s" % self.infiles[0]["infilename"])
384 if len(self.outfiles) > 0:
385 aCL(">")
386 aCL(self.outfiles[0]["name"])
387 aXCL(">")
388 aXCL("$%s" % self.outfiles[0]["name"])
389 if self.args.cl_user_suffix: # DIY CL end
390 clp = shlex.split(self.args.cl_user_suffix)
391 for c in clp:
392 aCL(c)
393 aXCL(c)
394
395 def prepargp(self):
396 clsuffix = []
397 xclsuffix = []
398 for i, p in enumerate(self.infiles):
399 nam = p["infilename"]
400 if p["origCL"].strip().upper() == "STDIN":
401 appendme = [
402 nam,
403 nam,
404 "< %s" % nam,
405 ]
406 xappendme = [
407 nam,
408 nam,
409 "< $%s" % nam,
410 ]
411 else:
412 rep = p["repeat"] == "1"
413 over = ""
414 if rep:
415 over = f'#for $rep in $R_{nam}:\n--{nam} "$rep.{nam}"\n#end for'
416 appendme = [p["CL"], p["CL"], ""]
417 xappendme = [p["CL"], "$%s" % p["CL"], over]
418 clsuffix.append(appendme)
419 xclsuffix.append(xappendme)
420 for i, p in enumerate(self.outfiles):
421 if p["origCL"].strip().upper() == "STDOUT":
422 self.lastclredirect = [">", p["name"]]
423 self.lastxclredirect = [">", "$%s" % p["name"]]
424 else:
425 clsuffix.append([p["name"], p["name"], ""])
426 xclsuffix.append([p["name"], "$%s" % p["name"], ""])
427 for p in self.addpar:
428 nam = p["name"]
429 rep = p["repeat"] == "1"
430 if rep:
431 over = f'#for $rep in $R_{nam}:\n--{nam} "$rep.{nam}"\n#end for'
432 else:
433 over = p["override"]
434 clsuffix.append([p["CL"], nam, over])
435 xclsuffix.append([p["CL"], '"$%s"' % nam, over])
436 for p in self.selpar:
437 clsuffix.append([p["CL"], p["name"], p["override"]])
438 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]])
439 self.xclsuffix = xclsuffix
440 self.clsuffix = clsuffix
441
442 def prepclpos(self):
443 clsuffix = []
444 xclsuffix = []
445 for i, p in enumerate(self.infiles):
446 if p["origCL"].strip().upper() == "STDIN":
447 appendme = [
448 "999",
449 p["infilename"],
450 "< $%s" % p["infilename"],
451 ]
452 xappendme = [
453 "999",
454 p["infilename"],
455 "< $%s" % p["infilename"],
456 ]
457 else:
458 appendme = [p["CL"], p["infilename"], ""]
459 xappendme = [p["CL"], "$%s" % p["infilename"], ""]
460 clsuffix.append(appendme)
461 xclsuffix.append(xappendme)
462 for i, p in enumerate(self.outfiles):
463 if p["origCL"].strip().upper() == "STDOUT":
464 self.lastclredirect = [">", p["name"]]
465 self.lastxclredirect = [">", "$%s" % p["name"]]
466 else:
467 clsuffix.append([p["CL"], p["name"], ""])
468 xclsuffix.append([p["CL"], "$%s" % p["name"], ""])
469 for p in self.addpar:
470 nam = p["name"]
471 rep = p["repeat"] == "1" # repeats make NO sense
472 if rep:
473 print(f'### warning. Repeats for {nam} ignored - not permitted in positional parameter command lines!')
474 over = p["override"]
475 clsuffix.append([p["CL"], nam, over])
476 xclsuffix.append([p["CL"], '"$%s"' % nam, over])
477 for p in self.selpar:
478 clsuffix.append([p["CL"], p["name"], p["override"]])
479 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]])
480 clsuffix.sort()
481 xclsuffix.sort()
482 self.xclsuffix = xclsuffix
483 self.clsuffix = clsuffix
484
485 def prepScript(self):
486 rx = open(self.args.script_path, "r").readlines()
487 rx = [x.rstrip() for x in rx]
488 rxcheck = [x.strip() for x in rx if x.strip() > ""]
489 assert len(rxcheck) > 0, "Supplied script is empty. Cannot run"
490 self.script = "\n".join(rx)
491 fhandle, self.sfile = tempfile.mkstemp(
492 prefix=self.tool_name, suffix="_%s" % (self.executeme[0])
493 )
494 tscript = open(self.sfile, "w")
495 tscript.write(self.script)
496 tscript.close()
497 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""]
498 rx.insert(0,'#raw')
499 rx.append('#end raw')
500 self.escapedScript = rx
501 art = "%s.%s" % (self.tool_name, self.executeme[0])
502 artifact = open(art, "wb")
503 artifact.write(bytes(self.script, "utf8"))
504 artifact.close()
505
506 def cleanuppar(self):
507 """ positional parameters are complicated by their numeric ordinal"""
508 if self.args.parampass == "positional":
509 for i, p in enumerate(self.infiles):
510 assert (
511 p["CL"].isdigit() or p["CL"].strip().upper() == "STDIN"
512 ), "Positional parameters must be ordinal integers - got %s for %s" % (
513 p["CL"],
514 p["label"],
515 )
516 for i, p in enumerate(self.outfiles):
517 assert (
518 p["CL"].isdigit() or p["CL"].strip().upper() == "STDOUT"
519 ), "Positional parameters must be ordinal integers - got %s for %s" % (
520 p["CL"],
521 p["name"],
522 )
523 for i, p in enumerate(self.addpar):
524 assert p[
525 "CL"
526 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % (
527 p["CL"],
528 p["name"],
529 )
530 for i, p in enumerate(self.infiles):
531 infp = copy.copy(p)
532 infp["origCL"] = infp["CL"]
533 if self.args.parampass in ["positional", "0"]:
534 infp["infilename"] = infp["label"].replace(" ", "_")
535 else:
536 infp["infilename"] = infp["CL"]
537 self.infiles[i] = infp
538 for i, p in enumerate(self.outfiles):
539 p["origCL"] = p["CL"] # keep copy
540 self.outfiles[i] = p
541 for i, p in enumerate(self.addpar):
542 p["origCL"] = p["CL"]
543 self.addpar[i] = p
544
545 def clpositional(self):
546 # inputs in order then params
547 aCL = self.cl.append
548 for (k, v, koverride) in self.clsuffix:
549 if " " in v:
550 aCL("%s" % v)
551 else:
552 aCL(v)
553 aXCL = self.xmlcl.append
554 for (k, v, koverride) in self.xclsuffix:
555 aXCL(v)
556 if self.lastxclredirect:
557 aXCL(self.lastxclredirect[0])
558 aXCL(self.lastxclredirect[1])
559 if self.args.cl_user_suffix: # DIY CL end
560 clp = shlex.split(self.args.cl_user_suffix)
561 for c in clp:
562 aCL(c)
563 aXCL(c)
564
565
566 def clargparse(self):
567 """argparse style"""
568 aCL = self.cl.append
569 aXCL = self.xmlcl.append
570 # inputs then params in argparse named form
571
572 for (k, v, koverride) in self.xclsuffix:
573 if koverride > "":
574 k = koverride
575 aXCL(k)
576 else:
577 if len(k.strip()) == 1:
578 k = "-%s" % k
579 else:
580 k = "--%s" % k
581 aXCL(k)
582 aXCL(v)
583 for (k, v, koverride) in self.clsuffix:
584 if koverride > "":
585 k = koverride
586 elif len(k.strip()) == 1:
587 k = "-%s" % k
588 else:
589 k = "--%s" % k
590 aCL(k)
591 aCL(v)
592 if self.lastxclredirect:
593 aXCL(self.lastxclredirect[0])
594 aXCL(self.lastxclredirect[1])
595 if self.args.cl_user_suffix: # DIY CL end
596 clp = shlex.split(self.args.cl_user_suffix)
597 for c in clp:
598 aCL(c)
599 aXCL(c)
600
601 def getNdash(self, newname):
602 if self.is_positional:
603 ndash = 0
604 else:
605 ndash = 2
606 if len(newname) < 2:
607 ndash = 1
608 return ndash
609
610 def doXMLparam(self): # noqa
611 """Add all needed elements to tool"""
612 for p in self.outfiles:
613 newname = p["name"]
614 newfmt = p["format"]
615 newcl = p["CL"]
616 test = p["test"]
617 oldcl = p["origCL"]
618 test = test.strip()
619 ndash = self.getNdash(newcl)
620 aparm = gxtp.OutputData(
621 name=newname, format=newfmt, num_dashes=ndash, label=newname
622 )
623 aparm.positional = self.is_positional
624 if self.is_positional:
625 if oldcl.upper() == "STDOUT":
626 aparm.positional = 9999999
627 aparm.command_line_override = "> $%s" % newname
628 else:
629 aparm.positional = int(oldcl)
630 aparm.command_line_override = "$%s" % newname
631 self.toutputs.append(aparm)
632 ld = None
633 if test.strip() > "":
634 if test.startswith("diff"):
635 c = "diff"
636 ld = 0
637 if test.split(":")[1].isdigit:
638 ld = int(test.split(":")[1])
639 tp = gxtp.TestOutput(
640 name=newname,
641 value="%s_sample" % newname,
642 compare=c,
643 lines_diff=ld,
644 )
645 elif test.startswith("sim_size"):
646 c = "sim_size"
647 tn = test.split(":")[1].strip()
648 if tn > "":
649 if "." in tn:
650 delta = None
651 delta_frac = min(1.0, float(tn))
652 else:
653 delta = int(tn)
654 delta_frac = None
655 tp = gxtp.TestOutput(
656 name=newname,
657 value="%s_sample" % newname,
658 compare=c,
659 delta=delta,
660 delta_frac=delta_frac,
661 )
662 else:
663 c = test
664 tp = gxtp.TestOutput(
665 name=newname,
666 value="%s_sample" % newname,
667 compare=c,
668 )
669 self.testparam.append(tp)
670 for p in self.infiles:
671 newname = p["infilename"]
672 newfmt = p["format"]
673 ndash = self.getNdash(newname)
674 reps = p.get("repeat", "0") == "1"
675 if not len(p["label"]) > 0:
676 alab = p["CL"]
677 else:
678 alab = p["label"]
679 aninput = gxtp.DataParam(
680 newname,
681 optional=False,
682 label=alab,
683 help=p["help"],
684 format=newfmt,
685 multiple=False,
686 num_dashes=ndash,
687 )
688 aninput.positional = self.is_positional
689 if self.is_positional:
690 if p["origCL"].upper() == "STDIN":
691 aninput.positional = 9999998
692 aninput.command_line_override = "> $%s" % newname
693 else:
694 aninput.positional = int(p["origCL"])
695 aninput.command_line_override = "$%s" % newname
696 if reps:
697 repe = gxtp.Repeat(name=f"R_{newname}", title=f"Add as many {alab} as needed")
698 repe.append(aninput)
699 self.tinputs.append(repe)
700 tparm = gxtp.TestRepeat(name=f"R_{newname}")
701 tparm2 = gxtp.TestParam(newname, value="%s_sample" % newname)
702 tparm.append(tparm2)
703 self.testparam.append(tparm)
704 else:
705 self.tinputs.append(aninput)
706 tparm = gxtp.TestParam(newname, value="%s_sample" % newname)
707 self.testparam.append(tparm)
708 for p in self.addpar:
709 newname = p["name"]
710 newval = p["value"]
711 newlabel = p["label"]
712 newhelp = p["help"]
713 newtype = p["type"]
714 newcl = p["CL"]
715 oldcl = p["origCL"]
716 reps = p["repeat"] == "1"
717 if not len(newlabel) > 0:
718 newlabel = newname
719 ndash = self.getNdash(newname)
720 if newtype == "text":
721 aparm = gxtp.TextParam(
722 newname,
723 label=newlabel,
724 help=newhelp,
725 value=newval,
726 num_dashes=ndash,
727 )
728 elif newtype == "integer":
729 aparm = gxtp.IntegerParam(
730 newname,
731 label=newlabel,
732 help=newhelp,
733 value=newval,
734 num_dashes=ndash,
735 )
736 elif newtype == "float":
737 aparm = gxtp.FloatParam(
738 newname,
739 label=newlabel,
740 help=newhelp,
741 value=newval,
742 num_dashes=ndash,
743 )
744 elif newtype == "boolean":
745 aparm = gxtp.BooleanParam(
746 newname,
747 label=newlabel,
748 help=newhelp,
749 value=newval,
750 num_dashes=ndash,
751 )
752 else:
753 raise ValueError(
754 'Unrecognised parameter type "%s" for\
755 additional parameter %s in makeXML'
756 % (newtype, newname)
757 )
758 aparm.positional = self.is_positional
759 if self.is_positional:
760 aparm.positional = int(oldcl)
761 if reps:
762 repe = gxtp.Repeat(name=f"R_{newname}", title=f"Add as many {newlabel} as needed")
763 repe.append(aparm)
764 self.tinputs.append(repe)
765 tparm = gxtp.TestRepeat(name=f"R_{newname}")
766 tparm2 = gxtp.TestParam(newname, value=newval)
767 tparm.append(tparm2)
768 self.testparam.append(tparm)
769 else:
770 self.tinputs.append(aparm)
771 tparm = gxtp.TestParam(newname, value=newval)
772 self.testparam.append(tparm)
773 for p in self.selpar:
774 newname = p["name"]
775 newval = p["value"]
776 newlabel = p["label"]
777 newhelp = p["help"]
778 newtype = p["type"]
779 newcl = p["CL"]
780 if not len(newlabel) > 0:
781 newlabel = newname
782 ndash = self.getNdash(newname)
783 if newtype == "selecttext":
784 newtext = p["texts"]
785 aparm = gxtp.SelectParam(
786 newname,
787 label=newlabel,
788 help=newhelp,
789 num_dashes=ndash,
790 )
791 for i in range(len(newval)):
792 anopt = gxtp.SelectOption(
793 value=newval[i],
794 text=newtext[i],
795 )
796 aparm.append(anopt)
797 aparm.positional = self.is_positional
798 if self.is_positional:
799 aparm.positional = int(newcl)
800 self.tinputs.append(aparm)
801 tparm = gxtp.TestParam(newname, value=newval)
802 self.testparam.append(tparm)
803 else:
804 raise ValueError(
805 'Unrecognised parameter type "%s" for\
806 selecttext parameter %s in makeXML'
807 % (newtype, newname)
808 )
809 for p in self.collections:
810 newkind = p["kind"]
811 newname = p["name"]
812 newlabel = p["label"]
813 newdisc = p["discover"]
814 collect = gxtp.OutputCollection(newname, label=newlabel, type=newkind)
815 disc = gxtp.DiscoverDatasets(
816 pattern=newdisc, directory=f"{newname}", visible="false"
817 )
818 collect.append(disc)
819 self.toutputs.append(collect)
820 try:
821 tparm = gxtp.TestOutputCollection(newname) # broken until PR merged.
822 self.testparam.append(tparm)
823 except Exception:
824 print("#### WARNING: Galaxyxml version does not have the PR merged yet - tests for collections must be over-ridden until then!")
825
826 def doNoXMLparam(self):
827 """filter style package - stdin to stdout"""
828 if len(self.infiles) > 0:
829 alab = self.infiles[0]["label"]
830 if len(alab) == 0:
831 alab = self.infiles[0]["infilename"]
832 max1s = (
833 "Maximum one input if parampass is 0 but multiple input files supplied - %s"
834 % str(self.infiles)
835 )
836 assert len(self.infiles) == 1, max1s
837 newname = self.infiles[0]["infilename"]
838 aninput = gxtp.DataParam(
839 newname,
840 optional=False,
841 label=alab,
842 help=self.infiles[0]["help"],
843 format=self.infiles[0]["format"],
844 multiple=False,
845 num_dashes=0,
846 )
847 aninput.command_line_override = "< $%s" % newname
848 aninput.positional = True
849 self.tinputs.append(aninput)
850 tp = gxtp.TestParam(name=newname, value="%s_sample" % newname)
851 self.testparam.append(tp)
852 if len(self.outfiles) > 0:
853 newname = self.outfiles[0]["name"]
854 newfmt = self.outfiles[0]["format"]
855 anout = gxtp.OutputData(newname, format=newfmt, num_dashes=0)
856 anout.command_line_override = "> $%s" % newname
857 anout.positional = self.is_positional
858 self.toutputs.append(anout)
859 tp = gxtp.TestOutput(name=newname, value="%s_sample" % newname)
860 self.testparam.append(tp)
861
862 def makeXML(self): # noqa
863 """
864 Create a Galaxy xml tool wrapper for the new script
865 Uses galaxyhtml
866 Hmmm. How to get the command line into correct order...
867 """
868 if self.command_override:
869 self.newtool.command_override = self.command_override # config file
870 else:
871 self.newtool.command_override = self.xmlcl
872 cite = gxtp.Citations()
873 acite = gxtp.Citation(type="doi", value="10.1093/bioinformatics/bts573")
874 cite.append(acite)
875 self.newtool.citations = cite
876 safertext = ""
877 if self.args.help_text:
878 helptext = open(self.args.help_text, "r").readlines()
879 safertext = "\n".join([cheetah_escape(x) for x in helptext])
880 if len(safertext.strip()) == 0:
881 safertext = (
882 "Ask the tool author (%s) to rebuild with help text please\n"
883 % (self.args.user_email)
884 )
885 if self.args.script_path:
886 if len(safertext) > 0:
887 safertext = safertext + "\n\n------\n" # transition allowed!
888 scr = [x for x in self.spacedScript if x.strip() > ""]
889 scr.insert(0, "\n\nScript::\n")
890 if len(scr) > 300:
891 scr = (
892 scr[:100]
893 + [" >300 lines - stuff deleted", " ......"]
894 + scr[-100:]
895 )
896 scr.append("\n")
897 safertext = safertext + "\n".join(scr)
898 self.newtool.help = safertext
899 self.newtool.version_command = f'echo "{self.args.tool_version}"'
900 std = gxtp.Stdios()
901 std1 = gxtp.Stdio()
902 std.append(std1)
903 self.newtool.stdios = std
904 requirements = gxtp.Requirements()
905 if self.args.packages:
906 try:
907 for d in self.args.packages.split(","):
908 ver = ""
909 d = d.replace("==", ":")
910 d = d.replace("=", ":")
911 if ":" in d:
912 packg, ver = d.split(":")
913 else:
914 packg = d
915 requirements.append(
916 gxtp.Requirement("package", packg.strip(), ver.strip())
917 )
918 except Exception:
919 print('### malformed packages string supplied - cannot parse =',self.args.packages)
920 sys.exit(2)
921 self.newtool.requirements = requirements
922 if self.args.parampass == "0":
923 self.doNoXMLparam()
924 else:
925 self.doXMLparam()
926 self.newtool.outputs = self.toutputs
927 self.newtool.inputs = self.tinputs
928 if self.args.script_path:
929 configfiles = gxtp.Configfiles()
930 configfiles.append(
931 gxtp.Configfile(name="runme", text="\n".join(self.escapedScript))
932 )
933 self.newtool.configfiles = configfiles
934 tests = gxtp.Tests()
935 test_a = gxtp.Test()
936 for tp in self.testparam:
937 test_a.append(tp)
938 tests.append(test_a)
939 self.newtool.tests = tests
940 self.newtool.add_comment(
941 "Created by %s at %s using the Galaxy Tool Factory."
942 % (self.args.user_email, timenow())
943 )
944 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL))
945 exml0 = self.newtool.export()
946 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted
947 if (
948 self.test_override
949 ): # cannot do this inside galaxyxml as it expects lxml objects for tests
950 part1 = exml.split("<tests>")[0]
951 part2 = exml.split("</tests>")[1]
952 fixed = "%s\n%s\n%s" % (part1, "\n".join(self.test_override), part2)
953 exml = fixed
954 # exml = exml.replace('range="1:"', 'range="1000:"')
955 xf = open("%s.xml" % self.tool_name, "w")
956 xf.write(exml)
957 xf.write("\n")
958 xf.close()
959 # ready for the tarball
960
961 def run(self): #noqa
962 """
963 generate test outputs by running a command line
964 won't work if command or test override in play - planemo is the
965 easiest way to generate test outputs for that case so is
966 automagically selected
967 """
968 scl = " ".join(self.cl)
969 err = None
970 logname = f"{self.tool_name}_runner_log"
971 if self.args.parampass != "0":
972 if self.lastclredirect:
973 logf = open(self.lastclredirect[1], "wb") # is name of an output file
974 else:
975 logf = open(logname,'w')
976 logf.write("No dependencies so sending CL = '%s' to the fast direct runner instead of planemo to generate tests" % scl)
977 subp = subprocess.run(
978 self.cl, shell=False, stdout=logf, stderr=logf
979 )
980 logf.close()
981 retval = subp.returncode
982 else: # work around special case - stdin and write to stdout
983 if len(self.infiles) > 0:
984 sti = open(self.infiles[0]["name"], "rb")
985 else:
986 sti = sys.stdin
987 if len(self.outfiles) > 0:
988 sto = open(self.outfiles[0]["name"], "wb")
989 else:
990 sto = sys.stdout
991 subp = subprocess.run(
992 self.cl, shell=False, stdout=sto, stdin=sti
993 )
994 retval = subp.returncode
995 sto.close()
996 sti.close()
997 if retval != 0 and err: # problem
998 sys.stderr.write(err)
999 for p in self.outfiles:
1000 oname = p["name"]
1001 tdest = os.path.join(self.testdir, "%s_sample" % oname)
1002 if not os.path.isfile(tdest):
1003 if os.path.isfile(oname):
1004 shutil.copyfile(oname, tdest)
1005 dest = os.path.join(self.repdir, "%s.sample.%s" % (oname,p['format']))
1006 shutil.copyfile(oname, dest)
1007 else:
1008 if report_fail:
1009 tout.write(
1010 "###Tool may have failed - output file %s not found in testdir after planemo run %s."
1011 % (oname, self.testdir)
1012 )
1013 for p in self.infiles:
1014 pth = p["name"]
1015 dest = os.path.join(self.testdir, "%s_sample" % p["infilename"])
1016 shutil.copyfile(pth, dest)
1017 dest = os.path.join(self.repdir, "%s_sample.%s" % (p["infilename"],p["format"]))
1018 shutil.copyfile(pth, dest)
1019 with os.scandir('.') as outs:
1020 for entry in outs:
1021 newname = entry.name
1022 if not entry.is_file() or entry.name.endswith('_sample'):
1023 continue
1024 if not (entry.name.endswith('.html') or entry.name.endswith('.gz') or entry.name.endswith(".tgz")):
1025 fname, ext = os.path.splitext(entry.name)
1026 if len(ext) > 1:
1027 newname = f"{fname}_{ext[1:]}.txt"
1028 else:
1029 newname = f"{fname}.txt"
1030 dest = os.path.join(self.repdir, newname)
1031 src = entry.name
1032 shutil.copyfile(src, dest)
1033 return retval
1034
1035 def writeShedyml(self):
1036 """for planemo"""
1037 yuser = self.args.user_email.split("@")[0]
1038 yfname = os.path.join(self.tooloutdir, ".shed.yml")
1039 yamlf = open(yfname, "w")
1040 odict = {
1041 "name": self.tool_name,
1042 "owner": yuser,
1043 "type": "unrestricted",
1044 "description": self.args.tool_desc,
1045 "synopsis": self.args.tool_desc,
1046 "category": "TF Generated Tools",
1047 }
1048 yaml.dump(odict, yamlf, allow_unicode=True)
1049 yamlf.close()
1050
1051 def makeTool(self):
1052 """write xmls and input samples into place"""
1053 if self.args.parampass == 0:
1054 self.doNoXMLparam()
1055 else:
1056 self.makeXML()
1057 if self.args.script_path:
1058 stname = os.path.join(self.tooloutdir, self.sfile)
1059 if not os.path.exists(stname):
1060 shutil.copyfile(self.sfile, stname)
1061 xreal = "%s.xml" % self.tool_name
1062 xout = os.path.join(self.tooloutdir, xreal)
1063 shutil.copyfile(xreal, xout)
1064 for p in self.infiles:
1065 pth = p["name"]
1066 dest = os.path.join(self.testdir, "%s_sample" % p["infilename"])
1067 shutil.copyfile(pth, dest)
1068 dest = os.path.join(self.repdir, "%s_sample.%s" % (p["infilename"],p["format"]))
1069 shutil.copyfile(pth, dest)
1070
1071 def makeToolTar(self, report_fail=False):
1072 """move outputs into test-data and prepare the tarball"""
1073 excludeme = "_planemo_test_report.html"
1074
1075 def exclude_function(tarinfo):
1076 filename = tarinfo.name
1077 return None if filename.endswith(excludeme) else tarinfo
1078
1079 for p in self.outfiles:
1080 oname = p["name"]
1081 tdest = os.path.join(self.testdir, "%s_sample" % oname)
1082 src = os.path.join(self.testdir, oname)
1083 if not os.path.isfile(tdest):
1084 if os.path.isfile(src):
1085 shutil.copyfile(src, tdest)
1086 dest = os.path.join(self.repdir, "%s.sample" % (oname))
1087 shutil.copyfile(src, dest)
1088 else:
1089 if report_fail:
1090 print(
1091 "###Tool may have failed - output file %s not found in testdir after planemo run %s."
1092 % (tdest, self.testdir)
1093 )
1094 tf = tarfile.open(self.newtarpath, "w:gz")
1095 tf.add(
1096 name=self.tooloutdir,
1097 arcname=self.tool_name,
1098 filter=exclude_function,
1099 )
1100 tf.close()
1101 shutil.copyfile(self.newtarpath, self.args.new_tool)
1102
1103 def moveRunOutputs(self):
1104 """need to move planemo or run outputs into toolfactory collection"""
1105 with os.scandir(self.tooloutdir) as outs:
1106 for entry in outs:
1107 if not entry.is_file():
1108 continue
1109 if not entry.name.endswith('.html'):
1110 _, ext = os.path.splitext(entry.name)
1111 newname = f"{entry.name.replace('.','_')}.txt"
1112 dest = os.path.join(self.repdir, newname)
1113 src = os.path.join(self.tooloutdir, entry.name)
1114 shutil.copyfile(src, dest)
1115 if self.args.include_tests:
1116 with os.scandir(self.testdir) as outs:
1117 for entry in outs:
1118 if (not entry.is_file()) or entry.name.endswith(
1119 "_planemo_test_report.html"
1120 ):
1121 continue
1122 if "." in entry.name:
1123 _, ext = os.path.splitext(entry.name)
1124 if ext in [".tgz", ".json"]:
1125 continue
1126 if ext in [".yml", ".xml", ".yaml"]:
1127 newname = f"{entry.name.replace('.','_')}.txt"
1128 else:
1129 newname = entry.name
1130 else:
1131 newname = f"{entry.name}.txt"
1132 dest = os.path.join(self.repdir, newname)
1133 src = os.path.join(self.testdir, entry.name)
1134 shutil.copyfile(src, dest)
1135
1136
1137 def main():
1138 """
1139 This is a Galaxy wrapper.
1140 It expects to be called by a special purpose tool.xml
1141
1142 """
1143 parser = argparse.ArgumentParser()
1144 a = parser.add_argument
1145 a("--script_path", default=None)
1146 a("--history_test", default=None)
1147 a("--cl_user_suffix", default=None)
1148 a("--sysexe", default=None)
1149 a("--packages", default=None)
1150 a("--tool_name", default="newtool")
1151 a("--tool_dir", default=None)
1152 a("--input_files", default=[], action="append")
1153 a("--output_files", default=[], action="append")
1154 a("--user_email", default="Unknown")
1155 a("--bad_user", default=None)
1156 a("--help_text", default=None)
1157 a("--tool_desc", default=None)
1158 a("--tool_version", default=None)
1159 a("--citations", default=None)
1160 a("--command_override", default=None)
1161 a("--test_override", default=None)
1162 a("--additional_parameters", action="append", default=[])
1163 a("--selecttext_parameters", action="append", default=[])
1164 a("--edit_additional_parameters", action="store_true", default=False)
1165 a("--parampass", default="positional")
1166 a("--tfout", default="./tfout")
1167 a("--new_tool", default="new_tool")
1168 a("--galaxy_root", default="/galaxy-central")
1169 a("--galaxy_venv", default="/galaxy_venv")
1170 a("--collection", action="append", default=[])
1171 a("--include_tests", default=False, action="store_true")
1172 a("--install", default=False, action="store_true")
1173 a("--run_test", default=False, action="store_true")
1174 a("--local_tools", default='tools') # relative to galaxy_root
1175 a("--tool_conf_path", default='/galaxy_root/config/tool_conf.xml')
1176 a("--galaxy_url", default="http://localhost:8080")
1177 a("--toolshed_url", default="http://localhost:9009")
1178 # make sure this is identical to tool_sheds_conf.xml
1179 # localhost != 127.0.0.1 so validation fails
1180 a("--toolshed_api_key", default="fakekey")
1181 a("--galaxy_api_key", default="8993d65865e6d6d1773c2c34a1cc207d")
1182 args = parser.parse_args()
1183 assert not args.bad_user, (
1184 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy \
1185 admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file'
1186 % (args.bad_user, args.bad_user)
1187 )
1188 assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq"
1189 assert (
1190 args.sysexe or args.packages
1191 ), "## Tool Factory wrapper expects an interpreter \
1192 or an executable package in --sysexe or --packages"
1193 print('Hello from',os.getcwd())
1194 r = ScriptRunner(args)
1195 r.writeShedyml()
1196 r.makeTool()
1197 r.makeToolTar()
1198 if args.run_test:
1199 if not args.packages or args.packages.strip() == "bash":
1200 r.run()
1201 r.makeToolTar()
1202 else:
1203 tt = ToolTester(report_dir=r.repdir, in_tool_archive=r.newtarpath, new_tool_archive=r.args.new_tool, galaxy_root=args.galaxy_root, include_tests=False)
1204 if args.install:
1205 #try:
1206 tcu = ToolConfUpdater(args=args, tool_dir=os.path.join(args.galaxy_root,args.local_tools),
1207 new_tool_archive_path=r.newtarpath, tool_conf_path=os.path.join(args.galaxy_root,'config','tool_conf.xml'),
1208 new_tool_name=r.tool_name)
1209 #except Exception:
1210 # print("### Unable to install the new tool. Are you sure you have all the required special settings?")
1211
1212 if __name__ == "__main__":
1213 main()
1214