Mercurial > repos > fubar > toolfactory2
comparison toolfactory/rgToolFactory2.py @ 3:290f552d7e05 draft default tip
Uploaded
author | fubar |
---|---|
date | Sat, 17 Apr 2021 22:58:34 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:9fd3d83e1bac | 3:290f552d7e05 |
---|---|
1 # replace with shebang for biocontainer | |
2 # see https://github.com/fubar2/toolfactory | |
3 # | |
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 | |
5 # | |
6 # all rights reserved | |
7 # Licensed under the LGPL | |
8 # suggestions for improvement and bug fixes welcome at | |
9 # https://github.com/fubar2/toolfactory | |
10 # | |
11 # July 2020: BCC was fun and I feel like rip van winkle after 5 years. | |
12 # Decided to | |
13 # 1. Fix the toolfactory so it works - done for simplest case | |
14 # 2. Fix planemo so the toolfactory function works | |
15 # 3. Rewrite bits using galaxyxml functions where that makes sense - done | |
16 | |
17 import argparse | |
18 import copy | |
19 import json | |
20 import logging | |
21 import os | |
22 import re | |
23 import shlex | |
24 import shutil | |
25 import subprocess | |
26 import sys | |
27 import tarfile | |
28 import tempfile | |
29 import time | |
30 | |
31 from bioblend import ConnectionError | |
32 from bioblend import toolshed | |
33 | |
34 import galaxyxml.tool as gxt | |
35 import galaxyxml.tool.parameters as gxtp | |
36 | |
37 import lxml | |
38 | |
39 import yaml | |
40 | |
41 myversion = "V2.2 February 2021" | |
42 verbose = True | |
43 debug = True | |
44 toolFactoryURL = "https://github.com/fubar2/toolfactory" | |
45 foo = len(lxml.__version__) | |
46 FAKEEXE = "~~~REMOVE~~~ME~~~" | |
47 # need this until a PR/version bump to fix galaxyxml prepending the exe even | |
48 # with override. | |
49 | |
50 | |
51 def timenow(): | |
52 """return current time as a string""" | |
53 return time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) | |
54 | |
55 | |
56 cheetah_escape_table = {"$": "\\$", "#": "\\#"} | |
57 | |
58 | |
59 def cheetah_escape(text): | |
60 """Produce entities within text.""" | |
61 return "".join([cheetah_escape_table.get(c, c) for c in text]) | |
62 | |
63 | |
64 def parse_citations(citations_text): | |
65 """""" | |
66 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()] | |
67 citation_tuples = [] | |
68 for citation in citations: | |
69 if citation.startswith("doi"): | |
70 citation_tuples.append(("doi", citation[len("doi") :].strip())) | |
71 else: | |
72 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip())) | |
73 return citation_tuples | |
74 | |
75 | |
76 class ScriptRunner: | |
77 """Wrapper for an arbitrary script | |
78 uses galaxyxml | |
79 | |
80 """ | |
81 | |
82 def __init__(self, args=None): # noqa | |
83 """ | |
84 prepare command line cl for running the tool here | |
85 and prepare elements needed for galaxyxml tool generation | |
86 """ | |
87 self.ourcwd = os.getcwd() | |
88 self.collections = [] | |
89 if len(args.collection) > 0: | |
90 try: | |
91 self.collections = [ | |
92 json.loads(x) for x in args.collection if len(x.strip()) > 1 | |
93 ] | |
94 except Exception: | |
95 print( | |
96 f"--collections parameter {str(args.collection)} is malformed - should be a dictionary" | |
97 ) | |
98 try: | |
99 self.infiles = [ | |
100 json.loads(x) for x in args.input_files if len(x.strip()) > 1 | |
101 ] | |
102 except Exception: | |
103 print( | |
104 f"--input_files parameter {str(args.input_files)} is malformed - should be a dictionary" | |
105 ) | |
106 try: | |
107 self.outfiles = [ | |
108 json.loads(x) for x in args.output_files if len(x.strip()) > 1 | |
109 ] | |
110 except Exception: | |
111 print( | |
112 f"--output_files parameter {args.output_files} is malformed - should be a dictionary" | |
113 ) | |
114 try: | |
115 self.addpar = [ | |
116 json.loads(x) for x in args.additional_parameters if len(x.strip()) > 1 | |
117 ] | |
118 except Exception: | |
119 print( | |
120 f"--additional_parameters {args.additional_parameters} is malformed - should be a dictionary" | |
121 ) | |
122 try: | |
123 self.selpar = [ | |
124 json.loads(x) for x in args.selecttext_parameters if len(x.strip()) > 1 | |
125 ] | |
126 except Exception: | |
127 print( | |
128 f"--selecttext_parameters {args.selecttext_parameters} is malformed - should be a dictionary" | |
129 ) | |
130 self.args = args | |
131 self.cleanuppar() | |
132 self.lastclredirect = None | |
133 self.lastxclredirect = None | |
134 self.cl = [] | |
135 self.xmlcl = [] | |
136 self.is_positional = self.args.parampass == "positional" | |
137 if self.args.sysexe: | |
138 if ' ' in self.args.sysexe: | |
139 self.executeme = self.args.sysexe.split(' ') | |
140 else: | |
141 self.executeme = [self.args.sysexe, ] | |
142 else: | |
143 if self.args.packages: | |
144 self.executeme = [self.args.packages.split(",")[0].split(":")[0].strip(), ] | |
145 else: | |
146 self.executeme = None | |
147 aCL = self.cl.append | |
148 aXCL = self.xmlcl.append | |
149 assert args.parampass in [ | |
150 "0", | |
151 "argparse", | |
152 "positional", | |
153 ], 'args.parampass must be "0","positional" or "argparse"' | |
154 self.tool_name = re.sub("[^a-zA-Z0-9_]+", "", args.tool_name) | |
155 self.tool_id = self.tool_name | |
156 self.newtool = gxt.Tool( | |
157 self.tool_name, | |
158 self.tool_id, | |
159 self.args.tool_version, | |
160 self.args.tool_desc, | |
161 FAKEEXE, | |
162 ) | |
163 self.newtarpath = "%s_toolshed.gz" % self.tool_name | |
164 self.tooloutdir = "./tfout" | |
165 self.repdir = "./TF_run_report_tempdir" | |
166 self.testdir = os.path.join(self.tooloutdir, "test-data") | |
167 if not os.path.exists(self.tooloutdir): | |
168 os.mkdir(self.tooloutdir) | |
169 if not os.path.exists(self.testdir): | |
170 os.mkdir(self.testdir) | |
171 if not os.path.exists(self.repdir): | |
172 os.mkdir(self.repdir) | |
173 self.tinputs = gxtp.Inputs() | |
174 self.toutputs = gxtp.Outputs() | |
175 self.testparam = [] | |
176 if self.args.script_path: | |
177 self.prepScript() | |
178 if self.args.command_override: | |
179 scos = open(self.args.command_override, "r").readlines() | |
180 self.command_override = [x.rstrip() for x in scos] | |
181 else: | |
182 self.command_override = None | |
183 if self.args.test_override: | |
184 stos = open(self.args.test_override, "r").readlines() | |
185 self.test_override = [x.rstrip() for x in stos] | |
186 else: | |
187 self.test_override = None | |
188 if self.args.script_path: | |
189 for ex in self.executeme: | |
190 aCL(ex) | |
191 aXCL(ex) | |
192 aCL(self.sfile) | |
193 aXCL("$runme") | |
194 else: | |
195 for ex in self.executeme: | |
196 aCL(ex) | |
197 aXCL(ex) | |
198 | |
199 self.elog = os.path.join(self.repdir, "%s_error_log.txt" % self.tool_name) | |
200 self.tlog = os.path.join(self.repdir, "%s_runner_log.txt" % self.tool_name) | |
201 if self.args.parampass == "0": | |
202 self.clsimple() | |
203 else: | |
204 if self.args.parampass == "positional": | |
205 self.prepclpos() | |
206 self.clpositional() | |
207 else: | |
208 self.prepargp() | |
209 self.clargparse() | |
210 if self.args.cl_suffix: # DIY CL end | |
211 clp = shlex.split(self.args.cl_suffix) | |
212 for c in clp: | |
213 aCL(c) | |
214 aXCL(c) | |
215 | |
216 def clsimple(self): | |
217 """no parameters or repeats - uses < and > for i/o""" | |
218 aCL = self.cl.append | |
219 aXCL = self.xmlcl.append | |
220 if len(self.infiles) > 0: | |
221 aCL("<") | |
222 aCL(self.infiles[0]["infilename"]) | |
223 aXCL("<") | |
224 aXCL("$%s" % self.infiles[0]["infilename"]) | |
225 if len(self.outfiles) > 0: | |
226 aCL(">") | |
227 aCL(self.outfiles[0]["name"]) | |
228 aXCL(">") | |
229 aXCL("$%s" % self.outfiles[0]["name"]) | |
230 | |
231 def prepargp(self): | |
232 clsuffix = [] | |
233 xclsuffix = [] | |
234 for i, p in enumerate(self.infiles): | |
235 nam = p["infilename"] | |
236 if p["origCL"].strip().upper() == "STDIN": | |
237 appendme = [ | |
238 nam, | |
239 nam, | |
240 "< %s" % nam, | |
241 ] | |
242 xappendme = [ | |
243 nam, | |
244 nam, | |
245 "< $%s" % nam, | |
246 ] | |
247 else: | |
248 rep = p["repeat"] == "1" | |
249 over = "" | |
250 if rep: | |
251 over = f'#for $rep in $R_{nam}:\n--{nam} "$rep.{nam}"\n#end for' | |
252 appendme = [p["CL"], p["CL"], ""] | |
253 xappendme = [p["CL"], "$%s" % p["CL"], over] | |
254 clsuffix.append(appendme) | |
255 xclsuffix.append(xappendme) | |
256 for i, p in enumerate(self.outfiles): | |
257 if p["origCL"].strip().upper() == "STDOUT": | |
258 self.lastclredirect = [">", p["name"]] | |
259 self.lastxclredirect = [">", "$%s" % p["name"]] | |
260 else: | |
261 clsuffix.append([p["name"], p["name"], ""]) | |
262 xclsuffix.append([p["name"], "$%s" % p["name"], ""]) | |
263 for p in self.addpar: | |
264 nam = p["name"] | |
265 rep = p["repeat"] == "1" | |
266 if rep: | |
267 over = f'#for $rep in $R_{nam}:\n--{nam} "$rep.{nam}"\n#end for' | |
268 else: | |
269 over = p["override"] | |
270 clsuffix.append([p["CL"], nam, over]) | |
271 xclsuffix.append([p["CL"], nam, over]) | |
272 for p in self.selpar: | |
273 clsuffix.append([p["CL"], p["name"], p["override"]]) | |
274 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]]) | |
275 self.xclsuffix = xclsuffix | |
276 self.clsuffix = clsuffix | |
277 | |
278 def prepclpos(self): | |
279 clsuffix = [] | |
280 xclsuffix = [] | |
281 for i, p in enumerate(self.infiles): | |
282 if p["origCL"].strip().upper() == "STDIN": | |
283 appendme = [ | |
284 "999", | |
285 p["infilename"], | |
286 "< $%s" % p["infilename"], | |
287 ] | |
288 xappendme = [ | |
289 "999", | |
290 p["infilename"], | |
291 "< $%s" % p["infilename"], | |
292 ] | |
293 else: | |
294 appendme = [p["CL"], p["infilename"], ""] | |
295 xappendme = [p["CL"], "$%s" % p["infilename"], ""] | |
296 clsuffix.append(appendme) | |
297 xclsuffix.append(xappendme) | |
298 for i, p in enumerate(self.outfiles): | |
299 if p["origCL"].strip().upper() == "STDOUT": | |
300 self.lastclredirect = [">", p["name"]] | |
301 self.lastxclredirect = [">", "$%s" % p["name"]] | |
302 else: | |
303 clsuffix.append([p["CL"], p["name"], ""]) | |
304 xclsuffix.append([p["CL"], "$%s" % p["name"], ""]) | |
305 for p in self.addpar: | |
306 nam = p["name"] | |
307 rep = p["repeat"] == "1" # repeats make NO sense | |
308 if rep: | |
309 print(f'### warning. Repeats for {nam} ignored - not permitted in positional parameter command lines!') | |
310 over = p["override"] | |
311 clsuffix.append([p["CL"], nam, over]) | |
312 xclsuffix.append([p["CL"], '"$%s"' % nam, over]) | |
313 for p in self.selpar: | |
314 clsuffix.append([p["CL"], p["name"], p["override"]]) | |
315 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]]) | |
316 clsuffix.sort() | |
317 xclsuffix.sort() | |
318 self.xclsuffix = xclsuffix | |
319 self.clsuffix = clsuffix | |
320 | |
321 def prepScript(self): | |
322 rx = open(self.args.script_path, "r").readlines() | |
323 rx = [x.rstrip() for x in rx] | |
324 rxcheck = [x.strip() for x in rx if x.strip() > ""] | |
325 assert len(rxcheck) > 0, "Supplied script is empty. Cannot run" | |
326 self.script = "\n".join(rx) | |
327 fhandle, self.sfile = tempfile.mkstemp( | |
328 prefix=self.tool_name, suffix="_%s" % (self.executeme[0]) | |
329 ) | |
330 tscript = open(self.sfile, "w") | |
331 tscript.write(self.script) | |
332 tscript.close() | |
333 self.escapedScript = [cheetah_escape(x) for x in rx] | |
334 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""] | |
335 art = "%s.%s" % (self.tool_name, self.executeme[0]) | |
336 artifact = open(art, "wb") | |
337 artifact.write(bytes("\n".join(self.escapedScript), "utf8")) | |
338 artifact.close() | |
339 | |
340 def cleanuppar(self): | |
341 """ positional parameters are complicated by their numeric ordinal""" | |
342 if self.args.parampass == "positional": | |
343 for i, p in enumerate(self.infiles): | |
344 assert ( | |
345 p["CL"].isdigit() or p["CL"].strip().upper() == "STDIN" | |
346 ), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
347 p["CL"], | |
348 p["label"], | |
349 ) | |
350 for i, p in enumerate(self.outfiles): | |
351 assert ( | |
352 p["CL"].isdigit() or p["CL"].strip().upper() == "STDOUT" | |
353 ), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
354 p["CL"], | |
355 p["name"], | |
356 ) | |
357 for i, p in enumerate(self.addpar): | |
358 assert p[ | |
359 "CL" | |
360 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
361 p["CL"], | |
362 p["name"], | |
363 ) | |
364 for i, p in enumerate(self.infiles): | |
365 infp = copy.copy(p) | |
366 infp["origCL"] = infp["CL"] | |
367 if self.args.parampass in ["positional", "0"]: | |
368 infp["infilename"] = infp["label"].replace(" ", "_") | |
369 else: | |
370 infp["infilename"] = infp["CL"] | |
371 self.infiles[i] = infp | |
372 for i, p in enumerate(self.outfiles): | |
373 p["origCL"] = p["CL"] # keep copy | |
374 self.outfiles[i] = p | |
375 for i, p in enumerate(self.addpar): | |
376 p["origCL"] = p["CL"] | |
377 self.addpar[i] = p | |
378 | |
379 def clpositional(self): | |
380 # inputs in order then params | |
381 aCL = self.cl.append | |
382 for (k, v, koverride) in self.clsuffix: | |
383 if " " in v: | |
384 aCL("%s" % v) | |
385 else: | |
386 aCL(v) | |
387 aXCL = self.xmlcl.append | |
388 for (k, v, koverride) in self.xclsuffix: | |
389 aXCL(v) | |
390 if self.lastxclredirect: | |
391 aXCL(self.lastxclredirect[0]) | |
392 aXCL(self.lastxclredirect[1]) | |
393 | |
394 def clargparse(self): | |
395 """argparse style""" | |
396 aCL = self.cl.append | |
397 aXCL = self.xmlcl.append | |
398 # inputs then params in argparse named form | |
399 | |
400 for (k, v, koverride) in self.xclsuffix: | |
401 if koverride > "": | |
402 k = koverride | |
403 aXCL(k) | |
404 else: | |
405 if len(k.strip()) == 1: | |
406 k = "-%s" % k | |
407 else: | |
408 k = "--%s" % k | |
409 aXCL(k) | |
410 aXCL(v) | |
411 for (k, v, koverride) in self.clsuffix: | |
412 if koverride > "": | |
413 k = koverride | |
414 elif len(k.strip()) == 1: | |
415 k = "-%s" % k | |
416 else: | |
417 k = "--%s" % k | |
418 aCL(k) | |
419 aCL(v) | |
420 if self.lastxclredirect: | |
421 aXCL(self.lastxclredirect[0]) | |
422 aXCL(self.lastxclredirect[1]) | |
423 | |
424 def getNdash(self, newname): | |
425 if self.is_positional: | |
426 ndash = 0 | |
427 else: | |
428 ndash = 2 | |
429 if len(newname) < 2: | |
430 ndash = 1 | |
431 return ndash | |
432 | |
433 def doXMLparam(self): | |
434 """Add all needed elements to tool""" # noqa | |
435 for p in self.outfiles: | |
436 newname = p["name"] | |
437 newfmt = p["format"] | |
438 newcl = p["CL"] | |
439 test = p["test"] | |
440 oldcl = p["origCL"] | |
441 test = test.strip() | |
442 ndash = self.getNdash(newcl) | |
443 aparm = gxtp.OutputData( | |
444 name=newname, format=newfmt, num_dashes=ndash, label=newname | |
445 ) | |
446 aparm.positional = self.is_positional | |
447 if self.is_positional: | |
448 if oldcl.upper() == "STDOUT": | |
449 aparm.positional = 9999999 | |
450 aparm.command_line_override = "> $%s" % newname | |
451 else: | |
452 aparm.positional = int(oldcl) | |
453 aparm.command_line_override = "$%s" % newname | |
454 self.toutputs.append(aparm) | |
455 ld = None | |
456 if test.strip() > "": | |
457 if test.startswith("diff"): | |
458 c = "diff" | |
459 ld = 0 | |
460 if test.split(":")[1].isdigit: | |
461 ld = int(test.split(":")[1]) | |
462 tp = gxtp.TestOutput( | |
463 name=newname, | |
464 value="%s_sample" % newname, | |
465 compare=c, | |
466 lines_diff=ld, | |
467 ) | |
468 elif test.startswith("sim_size"): | |
469 c = "sim_size" | |
470 tn = test.split(":")[1].strip() | |
471 if tn > "": | |
472 if "." in tn: | |
473 delta = None | |
474 delta_frac = min(1.0, float(tn)) | |
475 else: | |
476 delta = int(tn) | |
477 delta_frac = None | |
478 tp = gxtp.TestOutput( | |
479 name=newname, | |
480 value="%s_sample" % newname, | |
481 compare=c, | |
482 delta=delta, | |
483 delta_frac=delta_frac, | |
484 ) | |
485 else: | |
486 c = test | |
487 tp = gxtp.TestOutput( | |
488 name=newname, | |
489 value="%s_sample" % newname, | |
490 compare=c, | |
491 ) | |
492 self.testparam.append(tp) | |
493 for p in self.infiles: | |
494 newname = p["infilename"] | |
495 newfmt = p["format"] | |
496 ndash = self.getNdash(newname) | |
497 reps = p.get("repeat", "0") == "1" | |
498 if not len(p["label"]) > 0: | |
499 alab = p["CL"] | |
500 else: | |
501 alab = p["label"] | |
502 aninput = gxtp.DataParam( | |
503 newname, | |
504 optional=False, | |
505 label=alab, | |
506 help=p["help"], | |
507 format=newfmt, | |
508 multiple=False, | |
509 num_dashes=ndash, | |
510 ) | |
511 aninput.positional = self.is_positional | |
512 if self.is_positional: | |
513 if p["origCL"].upper() == "STDIN": | |
514 aninput.positional = 9999998 | |
515 aninput.command_line_override = "> $%s" % newname | |
516 else: | |
517 aninput.positional = int(p["origCL"]) | |
518 aninput.command_line_override = "$%s" % newname | |
519 if reps: | |
520 repe = gxtp.Repeat(name=f"R_{newname}", title=f"Add as many {alab} as needed") | |
521 repe.append(aninput) | |
522 self.tinputs.append(repe) | |
523 tparm = gxtp.TestRepeat(name=f"R_{newname}") | |
524 tparm2 = gxtp.TestParam(newname, value="%s_sample" % newname) | |
525 tparm.append(tparm2) | |
526 self.testparam.append(tparm) | |
527 else: | |
528 self.tinputs.append(aninput) | |
529 tparm = gxtp.TestParam(newname, value="%s_sample" % newname) | |
530 self.testparam.append(tparm) | |
531 for p in self.addpar: | |
532 newname = p["name"] | |
533 newval = p["value"] | |
534 newlabel = p["label"] | |
535 newhelp = p["help"] | |
536 newtype = p["type"] | |
537 newcl = p["CL"] | |
538 oldcl = p["origCL"] | |
539 reps = p["repeat"] == "1" | |
540 if not len(newlabel) > 0: | |
541 newlabel = newname | |
542 ndash = self.getNdash(newname) | |
543 if newtype == "text": | |
544 aparm = gxtp.TextParam( | |
545 newname, | |
546 label=newlabel, | |
547 help=newhelp, | |
548 value=newval, | |
549 num_dashes=ndash, | |
550 ) | |
551 elif newtype == "integer": | |
552 aparm = gxtp.IntegerParam( | |
553 newname, | |
554 label=newlabel, | |
555 help=newhelp, | |
556 value=newval, | |
557 num_dashes=ndash, | |
558 ) | |
559 elif newtype == "float": | |
560 aparm = gxtp.FloatParam( | |
561 newname, | |
562 label=newlabel, | |
563 help=newhelp, | |
564 value=newval, | |
565 num_dashes=ndash, | |
566 ) | |
567 elif newtype == "boolean": | |
568 aparm = gxtp.BooleanParam( | |
569 newname, | |
570 label=newlabel, | |
571 help=newhelp, | |
572 value=newval, | |
573 num_dashes=ndash, | |
574 ) | |
575 else: | |
576 raise ValueError( | |
577 'Unrecognised parameter type "%s" for\ | |
578 additional parameter %s in makeXML' | |
579 % (newtype, newname) | |
580 ) | |
581 aparm.positional = self.is_positional | |
582 if self.is_positional: | |
583 aparm.positional = int(oldcl) | |
584 if reps: | |
585 repe = gxtp.Repeat(name=f"R_{newname}", title=f"Add as many {newlabel} as needed") | |
586 repe.append(aparm) | |
587 self.tinputs.append(repe) | |
588 tparm = gxtp.TestRepeat(name=f"R_{newname}") | |
589 tparm2 = gxtp.TestParam(newname, value=newval) | |
590 tparm.append(tparm2) | |
591 self.testparam.append(tparm) | |
592 else: | |
593 self.tinputs.append(aparm) | |
594 tparm = gxtp.TestParam(newname, value=newval) | |
595 self.testparam.append(tparm) | |
596 for p in self.selpar: | |
597 newname = p["name"] | |
598 newval = p["value"] | |
599 newlabel = p["label"] | |
600 newhelp = p["help"] | |
601 newtype = p["type"] | |
602 newcl = p["CL"] | |
603 if not len(newlabel) > 0: | |
604 newlabel = newname | |
605 ndash = self.getNdash(newname) | |
606 if newtype == "selecttext": | |
607 newtext = p["texts"] | |
608 aparm = gxtp.SelectParam( | |
609 newname, | |
610 label=newlabel, | |
611 help=newhelp, | |
612 num_dashes=ndash, | |
613 ) | |
614 for i in range(len(newval)): | |
615 anopt = gxtp.SelectOption( | |
616 value=newval[i], | |
617 text=newtext[i], | |
618 ) | |
619 aparm.append(anopt) | |
620 aparm.positional = self.is_positional | |
621 if self.is_positional: | |
622 aparm.positional = int(newcl) | |
623 self.tinputs.append(aparm) | |
624 tparm = gxtp.TestParam(newname, value=newval) | |
625 self.testparam.append(tparm) | |
626 else: | |
627 raise ValueError( | |
628 'Unrecognised parameter type "%s" for\ | |
629 selecttext parameter %s in makeXML' | |
630 % (newtype, newname) | |
631 ) | |
632 for p in self.collections: | |
633 newkind = p["kind"] | |
634 newname = p["name"] | |
635 newlabel = p["label"] | |
636 newdisc = p["discover"] | |
637 collect = gxtp.OutputCollection(newname, label=newlabel, type=newkind) | |
638 disc = gxtp.DiscoverDatasets( | |
639 pattern=newdisc, directory=f"{newname}", visible="false" | |
640 ) | |
641 collect.append(disc) | |
642 self.toutputs.append(collect) | |
643 try: | |
644 tparm = gxtp.TestOutputCollection(newname) # broken until PR merged. | |
645 self.testparam.append(tparm) | |
646 except Exception: | |
647 print("#### WARNING: Galaxyxml version does not have the PR merged yet - tests for collections must be over-ridden until then!") | |
648 | |
649 def doNoXMLparam(self): | |
650 """filter style package - stdin to stdout""" | |
651 if len(self.infiles) > 0: | |
652 alab = self.infiles[0]["label"] | |
653 if len(alab) == 0: | |
654 alab = self.infiles[0]["infilename"] | |
655 max1s = ( | |
656 "Maximum one input if parampass is 0 but multiple input files supplied - %s" | |
657 % str(self.infiles) | |
658 ) | |
659 assert len(self.infiles) == 1, max1s | |
660 newname = self.infiles[0]["infilename"] | |
661 aninput = gxtp.DataParam( | |
662 newname, | |
663 optional=False, | |
664 label=alab, | |
665 help=self.infiles[0]["help"], | |
666 format=self.infiles[0]["format"], | |
667 multiple=False, | |
668 num_dashes=0, | |
669 ) | |
670 aninput.command_line_override = "< $%s" % newname | |
671 aninput.positional = True | |
672 self.tinputs.append(aninput) | |
673 tp = gxtp.TestParam(name=newname, value="%s_sample" % newname) | |
674 self.testparam.append(tp) | |
675 if len(self.outfiles) > 0: | |
676 newname = self.outfiles[0]["name"] | |
677 newfmt = self.outfiles[0]["format"] | |
678 anout = gxtp.OutputData(newname, format=newfmt, num_dashes=0) | |
679 anout.command_line_override = "> $%s" % newname | |
680 anout.positional = self.is_positional | |
681 self.toutputs.append(anout) | |
682 tp = gxtp.TestOutput(name=newname, value="%s_sample" % newname) | |
683 self.testparam.append(tp) | |
684 | |
685 def makeXML(self): # noqa | |
686 """ | |
687 Create a Galaxy xml tool wrapper for the new script | |
688 Uses galaxyhtml | |
689 Hmmm. How to get the command line into correct order... | |
690 """ | |
691 if self.command_override: | |
692 self.newtool.command_override = self.command_override # config file | |
693 else: | |
694 self.newtool.command_override = self.xmlcl | |
695 cite = gxtp.Citations() | |
696 acite = gxtp.Citation(type="doi", value="10.1093/bioinformatics/bts573") | |
697 cite.append(acite) | |
698 self.newtool.citations = cite | |
699 safertext = "" | |
700 if self.args.help_text: | |
701 helptext = open(self.args.help_text, "r").readlines() | |
702 safertext = "\n".join([cheetah_escape(x) for x in helptext]) | |
703 if len(safertext.strip()) == 0: | |
704 safertext = ( | |
705 "Ask the tool author (%s) to rebuild with help text please\n" | |
706 % (self.args.user_email) | |
707 ) | |
708 if self.args.script_path: | |
709 if len(safertext) > 0: | |
710 safertext = safertext + "\n\n------\n" # transition allowed! | |
711 scr = [x for x in self.spacedScript if x.strip() > ""] | |
712 scr.insert(0, "\n\nScript::\n") | |
713 if len(scr) > 300: | |
714 scr = ( | |
715 scr[:100] | |
716 + [" >300 lines - stuff deleted", " ......"] | |
717 + scr[-100:] | |
718 ) | |
719 scr.append("\n") | |
720 safertext = safertext + "\n".join(scr) | |
721 self.newtool.help = safertext | |
722 self.newtool.version_command = f'echo "{self.args.tool_version}"' | |
723 std = gxtp.Stdios() | |
724 std1 = gxtp.Stdio() | |
725 std.append(std1) | |
726 self.newtool.stdios = std | |
727 requirements = gxtp.Requirements() | |
728 if self.args.packages: | |
729 for d in self.args.packages.split(","): | |
730 ver = "" | |
731 d = d.replace("==", ":") | |
732 d = d.replace("=", ":") | |
733 if ":" in d: | |
734 packg, ver = d.split(":") | |
735 else: | |
736 packg = d | |
737 requirements.append( | |
738 gxtp.Requirement("package", packg.strip(), ver.strip()) | |
739 ) | |
740 self.newtool.requirements = requirements | |
741 if self.args.parampass == "0": | |
742 self.doNoXMLparam() | |
743 else: | |
744 self.doXMLparam() | |
745 self.newtool.outputs = self.toutputs | |
746 self.newtool.inputs = self.tinputs | |
747 if self.args.script_path: | |
748 configfiles = gxtp.Configfiles() | |
749 configfiles.append( | |
750 gxtp.Configfile(name="runme", text="\n".join(self.escapedScript)) | |
751 ) | |
752 self.newtool.configfiles = configfiles | |
753 tests = gxtp.Tests() | |
754 test_a = gxtp.Test() | |
755 for tp in self.testparam: | |
756 test_a.append(tp) | |
757 tests.append(test_a) | |
758 self.newtool.tests = tests | |
759 self.newtool.add_comment( | |
760 "Created by %s at %s using the Galaxy Tool Factory." | |
761 % (self.args.user_email, timenow()) | |
762 ) | |
763 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL)) | |
764 exml0 = self.newtool.export() | |
765 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted | |
766 if ( | |
767 self.test_override | |
768 ): # cannot do this inside galaxyxml as it expects lxml objects for tests | |
769 part1 = exml.split("<tests>")[0] | |
770 part2 = exml.split("</tests>")[1] | |
771 fixed = "%s\n%s\n%s" % (part1, "\n".join(self.test_override), part2) | |
772 exml = fixed | |
773 # exml = exml.replace('range="1:"', 'range="1000:"') | |
774 xf = open("%s.xml" % self.tool_name, "w") | |
775 xf.write(exml) | |
776 xf.write("\n") | |
777 xf.close() | |
778 # ready for the tarball | |
779 | |
780 def run(self): | |
781 """ | |
782 generate test outputs by running a command line | |
783 won't work if command or test override in play - planemo is the | |
784 easiest way to generate test outputs for that case so is | |
785 automagically selected | |
786 """ | |
787 scl = " ".join(self.cl) | |
788 err = None | |
789 if self.args.parampass != "0": | |
790 if os.path.exists(self.elog): | |
791 ste = open(self.elog, "a") | |
792 else: | |
793 ste = open(self.elog, "w") | |
794 if self.lastclredirect: | |
795 sto = open(self.lastclredirect[1], "wb") # is name of an output file | |
796 else: | |
797 if os.path.exists(self.tlog): | |
798 sto = open(self.tlog, "a") | |
799 else: | |
800 sto = open(self.tlog, "w") | |
801 sto.write( | |
802 "## Executing Toolfactory generated command line = %s\n" % scl | |
803 ) | |
804 sto.flush() | |
805 subp = subprocess.run( | |
806 self.cl, shell=False, stdout=sto, stderr=ste | |
807 ) | |
808 sto.close() | |
809 ste.close() | |
810 retval = subp.returncode | |
811 else: # work around special case - stdin and write to stdout | |
812 if len(self.infiles) > 0: | |
813 sti = open(self.infiles[0]["name"], "rb") | |
814 else: | |
815 sti = sys.stdin | |
816 if len(self.outfiles) > 0: | |
817 sto = open(self.outfiles[0]["name"], "wb") | |
818 else: | |
819 sto = sys.stdout | |
820 subp = subprocess.run( | |
821 self.cl, shell=False, stdout=sto, stdin=sti | |
822 ) | |
823 sto.write("## Executing Toolfactory generated command line = %s\n" % scl) | |
824 retval = subp.returncode | |
825 sto.close() | |
826 sti.close() | |
827 if os.path.isfile(self.tlog) and os.stat(self.tlog).st_size == 0: | |
828 os.unlink(self.tlog) | |
829 if os.path.isfile(self.elog) and os.stat(self.elog).st_size == 0: | |
830 os.unlink(self.elog) | |
831 if retval != 0 and err: # problem | |
832 sys.stderr.write(err) | |
833 logging.debug("run done") | |
834 return retval | |
835 | |
836 def shedLoad(self): | |
837 """ | |
838 use bioblend to create new repository | |
839 or update existing | |
840 | |
841 """ | |
842 if os.path.exists(self.tlog): | |
843 sto = open(self.tlog, "a") | |
844 else: | |
845 sto = open(self.tlog, "w") | |
846 | |
847 ts = toolshed.ToolShedInstance( | |
848 url=self.args.toolshed_url, | |
849 key=self.args.toolshed_api_key, | |
850 verify=False, | |
851 ) | |
852 repos = ts.repositories.get_repositories() | |
853 rnames = [x.get("name", "?") for x in repos] | |
854 rids = [x.get("id", "?") for x in repos] | |
855 tfcat = "ToolFactory generated tools" | |
856 if self.tool_name not in rnames: | |
857 tscat = ts.categories.get_categories() | |
858 cnames = [x.get("name", "?").strip() for x in tscat] | |
859 cids = [x.get("id", "?") for x in tscat] | |
860 catID = None | |
861 if tfcat.strip() in cnames: | |
862 ci = cnames.index(tfcat) | |
863 catID = cids[ci] | |
864 res = ts.repositories.create_repository( | |
865 name=self.args.tool_name, | |
866 synopsis="Synopsis:%s" % self.args.tool_desc, | |
867 description=self.args.tool_desc, | |
868 type="unrestricted", | |
869 remote_repository_url=self.args.toolshed_url, | |
870 homepage_url=None, | |
871 category_ids=catID, | |
872 ) | |
873 tid = res.get("id", None) | |
874 sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n") | |
875 else: | |
876 i = rnames.index(self.tool_name) | |
877 tid = rids[i] | |
878 try: | |
879 res = ts.repositories.update_repository( | |
880 id=tid, tar_ball_path=self.newtarpath, commit_message=None | |
881 ) | |
882 sto.write(f"#update res id {id} ={res}\n") | |
883 except ConnectionError: | |
884 sto.write( | |
885 "####### Is the toolshed running and the API key correct? Bioblend shed upload failed\n" | |
886 ) | |
887 sto.close() | |
888 | |
889 def eph_galaxy_load(self): | |
890 """ | |
891 use ephemeris to load the new tool from the local toolshed after planemo uploads it | |
892 """ | |
893 if os.path.exists(self.tlog): | |
894 tout = open(self.tlog, "a") | |
895 else: | |
896 tout = open(self.tlog, "w") | |
897 cll = [ | |
898 "shed-tools", | |
899 "install", | |
900 "-g", | |
901 self.args.galaxy_url, | |
902 "--latest", | |
903 "-a", | |
904 self.args.galaxy_api_key, | |
905 "--name", | |
906 self.tool_name, | |
907 "--owner", | |
908 "fubar", | |
909 "--toolshed", | |
910 self.args.toolshed_url, | |
911 "--section_label", | |
912 "ToolFactory", | |
913 ] | |
914 tout.write("running\n%s\n" % " ".join(cll)) | |
915 subp = subprocess.run( | |
916 cll, | |
917 cwd=self.ourcwd, | |
918 shell=False, | |
919 stderr=tout, | |
920 stdout=tout, | |
921 ) | |
922 tout.write( | |
923 "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode) | |
924 ) | |
925 tout.close() | |
926 return subp.returncode | |
927 | |
928 def writeShedyml(self): | |
929 """for planemo""" | |
930 yuser = self.args.user_email.split("@")[0] | |
931 yfname = os.path.join(self.tooloutdir, ".shed.yml") | |
932 yamlf = open(yfname, "w") | |
933 odict = { | |
934 "name": self.tool_name, | |
935 "owner": yuser, | |
936 "type": "unrestricted", | |
937 "description": self.args.tool_desc, | |
938 "synopsis": self.args.tool_desc, | |
939 "category": "TF Generated Tools", | |
940 } | |
941 yaml.dump(odict, yamlf, allow_unicode=True) | |
942 yamlf.close() | |
943 | |
944 def makeTool(self): | |
945 """write xmls and input samples into place""" | |
946 if self.args.parampass == 0: | |
947 self.doNoXMLparam() | |
948 else: | |
949 self.makeXML() | |
950 if self.args.script_path: | |
951 stname = os.path.join(self.tooloutdir, self.sfile) | |
952 if not os.path.exists(stname): | |
953 shutil.copyfile(self.sfile, stname) | |
954 xreal = "%s.xml" % self.tool_name | |
955 xout = os.path.join(self.tooloutdir, xreal) | |
956 shutil.copyfile(xreal, xout) | |
957 for p in self.infiles: | |
958 pth = p["name"] | |
959 dest = os.path.join(self.testdir, "%s_sample" % p["infilename"]) | |
960 shutil.copyfile(pth, dest) | |
961 dest = os.path.join(self.repdir, "%s_sample" % p["infilename"]) | |
962 shutil.copyfile(pth, dest) | |
963 | |
964 def makeToolTar(self, report_fail=False): | |
965 """move outputs into test-data and prepare the tarball""" | |
966 excludeme = "_planemo_test_report.html" | |
967 | |
968 def exclude_function(tarinfo): | |
969 filename = tarinfo.name | |
970 return None if filename.endswith(excludeme) else tarinfo | |
971 | |
972 if os.path.exists(self.tlog): | |
973 tout = open(self.tlog, "a") | |
974 else: | |
975 tout = open(self.tlog, "w") | |
976 for p in self.outfiles: | |
977 oname = p["name"] | |
978 tdest = os.path.join(self.testdir, "%s_sample" % oname) | |
979 src = os.path.join(self.testdir, oname) | |
980 if not os.path.isfile(tdest): | |
981 if os.path.isfile(src): | |
982 shutil.copyfile(src, tdest) | |
983 dest = os.path.join(self.repdir, "%s.sample" % (oname)) | |
984 shutil.copyfile(src, dest) | |
985 else: | |
986 if report_fail: | |
987 tout.write( | |
988 "###Tool may have failed - output file %s not found in testdir after planemo run %s." | |
989 % (tdest, self.testdir) | |
990 ) | |
991 tf = tarfile.open(self.newtarpath, "w:gz") | |
992 tf.add( | |
993 name=self.tooloutdir, | |
994 arcname=self.tool_name, | |
995 filter=exclude_function, | |
996 ) | |
997 tf.close() | |
998 shutil.copyfile(self.newtarpath, self.args.new_tool) | |
999 | |
1000 def moveRunOutputs(self): | |
1001 """need to move planemo or run outputs into toolfactory collection""" | |
1002 with os.scandir(self.tooloutdir) as outs: | |
1003 for entry in outs: | |
1004 if not entry.is_file(): | |
1005 continue | |
1006 if "." in entry.name: | |
1007 _, ext = os.path.splitext(entry.name) | |
1008 if ext in [".tgz", ".json"]: | |
1009 continue | |
1010 if ext in [".yml", ".xml", ".yaml"]: | |
1011 newname = f"{entry.name.replace('.','_')}.txt" | |
1012 else: | |
1013 newname = entry.name | |
1014 else: | |
1015 newname = f"{entry.name}.txt" | |
1016 dest = os.path.join(self.repdir, newname) | |
1017 src = os.path.join(self.tooloutdir, entry.name) | |
1018 shutil.copyfile(src, dest) | |
1019 if self.args.include_tests: | |
1020 with os.scandir(self.testdir) as outs: | |
1021 for entry in outs: | |
1022 if (not entry.is_file()) or entry.name.endswith( | |
1023 "_planemo_test_report.html" | |
1024 ): | |
1025 continue | |
1026 if "." in entry.name: | |
1027 _, ext = os.path.splitext(entry.name) | |
1028 if ext in [".tgz", ".json"]: | |
1029 continue | |
1030 if ext in [".yml", ".xml", ".yaml"]: | |
1031 newname = f"{entry.name.replace('.','_')}.txt" | |
1032 else: | |
1033 newname = entry.name | |
1034 else: | |
1035 newname = f"{entry.name}.txt" | |
1036 dest = os.path.join(self.repdir, newname) | |
1037 src = os.path.join(self.testdir, entry.name) | |
1038 shutil.copyfile(src, dest) | |
1039 | |
1040 def planemo_test_once(self): | |
1041 """planemo is a requirement so is available for testing but needs a | |
1042 different call if in the biocontainer - see above | |
1043 and for generating test outputs if command or test overrides are | |
1044 supplied test outputs are sent to repdir for display | |
1045 """ | |
1046 xreal = "%s.xml" % self.tool_name | |
1047 tool_test_path = os.path.join( | |
1048 self.repdir, f"{self.tool_name}_planemo_test_report.html" | |
1049 ) | |
1050 if os.path.exists(self.tlog): | |
1051 tout = open(self.tlog, "a") | |
1052 else: | |
1053 tout = open(self.tlog, "w") | |
1054 cll = [ | |
1055 "planemo", | |
1056 "test", | |
1057 "--galaxy_python_version", | |
1058 self.args.python_version, | |
1059 "--conda_auto_init", | |
1060 "--test_data", | |
1061 os.path.abspath(self.testdir), | |
1062 "--test_output", | |
1063 os.path.abspath(tool_test_path), | |
1064 "--galaxy_root", | |
1065 self.args.galaxy_root, | |
1066 "--update_test_data", | |
1067 os.path.abspath(xreal), | |
1068 ] | |
1069 p = subprocess.run( | |
1070 cll, | |
1071 shell=False, | |
1072 cwd=self.tooloutdir, | |
1073 stderr=tout, | |
1074 stdout=tout, | |
1075 ) | |
1076 tout.close() | |
1077 return p.returncode | |
1078 | |
1079 def set_planemo_galaxy_root(self, galaxyroot='/galaxy-central', config_path=".planemo.yml"): | |
1080 # bug in planemo - bogus '--dev-wheels' passed to run_tests.sh as at april 2021 - need a fiddled copy so it is ignored until fixed | |
1081 CONFIG_TEMPLATE = """## Planemo Global Configuration File. | |
1082 ## Everything in this file is completely optional - these values can all be | |
1083 ## configured via command line options for the corresponding commands. | |
1084 | |
1085 ## Specify a default galaxy_root for test and server commands here. | |
1086 galaxy_root: %s | |
1087 ## Username used with toolshed(s). | |
1088 #shed_username: "<TODO>" | |
1089 sheds: | |
1090 # For each tool shed you wish to target, uncomment key or both email and | |
1091 # password. | |
1092 toolshed: | |
1093 #key: "<TODO>" | |
1094 #email: "<TODO>" | |
1095 #password: "<TODO>" | |
1096 testtoolshed: | |
1097 #key: "<TODO>" | |
1098 #email: "<TODO>" | |
1099 #password: "<TODO>" | |
1100 local: | |
1101 #key: "<TODO>" | |
1102 #email: "<TODO>" | |
1103 #password: "<TODO>" | |
1104 """ | |
1105 if not os.path.exists(config_path): | |
1106 with open(config_path, "w") as f: | |
1107 f.write(CONFIG_TEMPLATE % galaxyroot) | |
1108 | |
1109 | |
1110 def main(): | |
1111 """ | |
1112 This is a Galaxy wrapper. | |
1113 It expects to be called by a special purpose tool.xml | |
1114 | |
1115 """ | |
1116 parser = argparse.ArgumentParser() | |
1117 a = parser.add_argument | |
1118 a("--script_path", default=None) | |
1119 a("--history_test", default=None) | |
1120 a("--cl_suffix", default=None) | |
1121 a("--sysexe", default=None) | |
1122 a("--packages", default=None) | |
1123 a("--tool_name", default="newtool") | |
1124 a("--tool_dir", default=None) | |
1125 a("--input_files", default=[], action="append") | |
1126 a("--output_files", default=[], action="append") | |
1127 a("--user_email", default="Unknown") | |
1128 a("--bad_user", default=None) | |
1129 a("--make_Tool", default="runonly") | |
1130 a("--help_text", default=None) | |
1131 a("--tool_desc", default=None) | |
1132 a("--tool_version", default=None) | |
1133 a("--citations", default=None) | |
1134 a("--command_override", default=None) | |
1135 a("--test_override", default=None) | |
1136 a("--additional_parameters", action="append", default=[]) | |
1137 a("--selecttext_parameters", action="append", default=[]) | |
1138 a("--edit_additional_parameters", action="store_true", default=False) | |
1139 a("--parampass", default="positional") | |
1140 a("--tfout", default="./tfout") | |
1141 a("--new_tool", default="new_tool") | |
1142 a("--galaxy_url", default="http://localhost:8080") | |
1143 a("--toolshed_url", default="http://localhost:9009") | |
1144 # make sure this is identical to tool_sheds_conf.xml | |
1145 # localhost != 127.0.0.1 so validation fails | |
1146 a("--toolshed_api_key", default="fakekey") | |
1147 a("--galaxy_api_key", default="fakekey") | |
1148 a("--galaxy_root", default="/galaxy-central") | |
1149 a("--galaxy_venv", default="/galaxy_venv") | |
1150 a("--collection", action="append", default=[]) | |
1151 a("--include_tests", default=False, action="store_true") | |
1152 a("--python_version", default="3.9") | |
1153 args = parser.parse_args() | |
1154 assert not args.bad_user, ( | |
1155 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy \ | |
1156 admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file' | |
1157 % (args.bad_user, args.bad_user) | |
1158 ) | |
1159 assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq" | |
1160 assert ( | |
1161 args.sysexe or args.packages | |
1162 ), "## Tool Factory wrapper expects an interpreter \ | |
1163 or an executable package in --sysexe or --packages" | |
1164 r = ScriptRunner(args) | |
1165 r.writeShedyml() | |
1166 r.makeTool() | |
1167 if args.make_Tool == "generate": | |
1168 r.run() | |
1169 r.moveRunOutputs() | |
1170 r.makeToolTar() | |
1171 else: | |
1172 r.planemo_test_once() | |
1173 r.moveRunOutputs() | |
1174 r.makeToolTar(report_fail=True) | |
1175 if args.make_Tool == "gentestinstall": | |
1176 r.shedLoad() | |
1177 r.eph_galaxy_load() | |
1178 | |
1179 | |
1180 if __name__ == "__main__": | |
1181 main() |