Mercurial > repos > fubar > toolfactory2
comparison toolfactory/rgToolFactory2.py @ 0:fc50a3f507ab draft
Need a new repo - old tool_factory_2 is broken
author | fubar |
---|---|
date | Sat, 10 Apr 2021 02:16:35 +0000 |
parents | |
children | 48458b0369aa |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fc50a3f507ab |
---|---|
1 # replace with shebang for biocontainer | |
2 # see https://github.com/fubar2/toolfactory | |
3 # | |
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 | |
5 # | |
6 # all rights reserved | |
7 # Licensed under the LGPL | |
8 # suggestions for improvement and bug fixes welcome at | |
9 # https://github.com/fubar2/toolfactory | |
10 # | |
11 # July 2020: BCC was fun and I feel like rip van winkle after 5 years. | |
12 # Decided to | |
13 # 1. Fix the toolfactory so it works - done for simplest case | |
14 # 2. Fix planemo so the toolfactory function works | |
15 # 3. Rewrite bits using galaxyxml functions where that makes sense - done | |
16 | |
17 import argparse | |
18 import copy | |
19 import json | |
20 import logging | |
21 import os | |
22 import re | |
23 import shlex | |
24 import shutil | |
25 import subprocess | |
26 import sys | |
27 import tarfile | |
28 import tempfile | |
29 import time | |
30 | |
31 import galaxyxml.tool as gxt | |
32 import galaxyxml.tool.parameters as gxtp | |
33 import lxml | |
34 import yaml | |
35 from bioblend import ConnectionError | |
36 from bioblend import toolshed | |
37 | |
38 myversion = "V2.2 February 2021" | |
39 verbose = True | |
40 debug = True | |
41 toolFactoryURL = "https://github.com/fubar2/toolfactory" | |
42 foo = len(lxml.__version__) | |
43 FAKEEXE = "~~~REMOVE~~~ME~~~" | |
44 # need this until a PR/version bump to fix galaxyxml prepending the exe even | |
45 # with override. | |
46 | |
47 | |
48 def timenow(): | |
49 """return current time as a string""" | |
50 return time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) | |
51 | |
52 | |
53 cheetah_escape_table = {"$": "\\$", "#": "\\#"} | |
54 | |
55 | |
56 def cheetah_escape(text): | |
57 """Produce entities within text.""" | |
58 return "".join([cheetah_escape_table.get(c, c) for c in text]) | |
59 | |
60 | |
61 def parse_citations(citations_text): | |
62 """""" | |
63 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()] | |
64 citation_tuples = [] | |
65 for citation in citations: | |
66 if citation.startswith("doi"): | |
67 citation_tuples.append(("doi", citation[len("doi") :].strip())) | |
68 else: | |
69 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip())) | |
70 return citation_tuples | |
71 | |
72 | |
73 class ScriptRunner: | |
74 """Wrapper for an arbitrary script | |
75 uses galaxyxml | |
76 | |
77 """ | |
78 | |
79 def __init__(self, args=None): | |
80 """ | |
81 prepare command line cl for running the tool here | |
82 and prepare elements needed for galaxyxml tool generation | |
83 """ | |
84 self.ourcwd = os.getcwd() | |
85 self.collections = [] | |
86 if len(args.collection) > 0: | |
87 try: | |
88 self.collections = [ | |
89 json.loads(x) for x in args.collection if len(x.strip()) > 1 | |
90 ] | |
91 except Exception: | |
92 print( | |
93 f"--collections parameter {str(args.collection)} is malformed - should be a dictionary" | |
94 ) | |
95 try: | |
96 self.infiles = [ | |
97 json.loads(x) for x in args.input_files if len(x.strip()) > 1 | |
98 ] | |
99 except Exception: | |
100 print( | |
101 f"--input_files parameter {str(args.input_files)} is malformed - should be a dictionary" | |
102 ) | |
103 try: | |
104 self.outfiles = [ | |
105 json.loads(x) for x in args.output_files if len(x.strip()) > 1 | |
106 ] | |
107 except Exception: | |
108 print( | |
109 f"--output_files parameter {args.output_files} is malformed - should be a dictionary" | |
110 ) | |
111 try: | |
112 self.addpar = [ | |
113 json.loads(x) for x in args.additional_parameters if len(x.strip()) > 1 | |
114 ] | |
115 except Exception: | |
116 print( | |
117 f"--additional_parameters {args.additional_parameters} is malformed - should be a dictionary" | |
118 ) | |
119 try: | |
120 self.selpar = [ | |
121 json.loads(x) for x in args.selecttext_parameters if len(x.strip()) > 1 | |
122 ] | |
123 except Exception: | |
124 print( | |
125 f"--selecttext_parameters {args.selecttext_parameters} is malformed - should be a dictionary" | |
126 ) | |
127 self.args = args | |
128 self.cleanuppar() | |
129 self.lastclredirect = None | |
130 self.lastxclredirect = None | |
131 self.cl = [] | |
132 self.xmlcl = [] | |
133 self.is_positional = self.args.parampass == "positional" | |
134 if self.args.sysexe: | |
135 if ' ' in self.args.sysexe: | |
136 self.executeme = self.args.sysexe.split(' ') | |
137 else: | |
138 self.executeme = [self.args.sysexe, ] | |
139 else: | |
140 if self.args.packages: | |
141 self.executeme = [self.args.packages.split(",")[0].split(":")[0].strip(), ] | |
142 else: | |
143 self.executeme = None | |
144 aCL = self.cl.append | |
145 aXCL = self.xmlcl.append | |
146 assert args.parampass in [ | |
147 "0", | |
148 "argparse", | |
149 "positional", | |
150 ], 'args.parampass must be "0","positional" or "argparse"' | |
151 self.tool_name = re.sub("[^a-zA-Z0-9_]+", "", args.tool_name) | |
152 self.tool_id = self.tool_name | |
153 self.newtool = gxt.Tool( | |
154 self.tool_name, | |
155 self.tool_id, | |
156 self.args.tool_version, | |
157 self.args.tool_desc, | |
158 FAKEEXE, | |
159 ) | |
160 self.newtarpath = "%s_toolshed.gz" % self.tool_name | |
161 self.tooloutdir = "./tfout" | |
162 self.repdir = "./TF_run_report_tempdir" | |
163 self.testdir = os.path.join(self.tooloutdir, "test-data") | |
164 if not os.path.exists(self.tooloutdir): | |
165 os.mkdir(self.tooloutdir) | |
166 if not os.path.exists(self.testdir): | |
167 os.mkdir(self.testdir) | |
168 if not os.path.exists(self.repdir): | |
169 os.mkdir(self.repdir) | |
170 self.tinputs = gxtp.Inputs() | |
171 self.toutputs = gxtp.Outputs() | |
172 self.testparam = [] | |
173 if self.args.script_path: | |
174 self.prepScript() | |
175 if self.args.command_override: | |
176 scos = open(self.args.command_override, "r").readlines() | |
177 self.command_override = [x.rstrip() for x in scos] | |
178 else: | |
179 self.command_override = None | |
180 if self.args.test_override: | |
181 stos = open(self.args.test_override, "r").readlines() | |
182 self.test_override = [x.rstrip() for x in stos] | |
183 else: | |
184 self.test_override = None | |
185 if self.args.script_path: | |
186 for ex in self.executeme: | |
187 aCL(ex) | |
188 aXCL(ex) | |
189 aCL(self.sfile) | |
190 aXCL("$runme") | |
191 else: | |
192 for ex in self.executeme: | |
193 aCL(ex) | |
194 aXCL(ex) | |
195 | |
196 self.elog = os.path.join(self.repdir, "%s_error_log.txt" % self.tool_name) | |
197 self.tlog = os.path.join(self.repdir, "%s_runner_log.txt" % self.tool_name) | |
198 if self.args.parampass == "0": | |
199 self.clsimple() | |
200 else: | |
201 if self.args.parampass == "positional": | |
202 self.prepclpos() | |
203 self.clpositional() | |
204 else: | |
205 self.prepargp() | |
206 self.clargparse() | |
207 if self.args.cl_suffix: # DIY CL end | |
208 clp = shlex.split(self.args.cl_suffix) | |
209 for c in clp: | |
210 aCL(c) | |
211 aXCL(c) | |
212 | |
213 def clsimple(self): | |
214 """no parameters or repeats - uses < and > for i/o""" | |
215 aCL = self.cl.append | |
216 aXCL = self.xmlcl.append | |
217 if len(self.infiles) > 0: | |
218 aCL("<") | |
219 aCL(self.infiles[0]["infilename"]) | |
220 aXCL("<") | |
221 aXCL("$%s" % self.infiles[0]["infilename"]) | |
222 if len(self.outfiles) > 0: | |
223 aCL(">") | |
224 aCL(self.outfiles[0]["name"]) | |
225 aXCL(">") | |
226 aXCL("$%s" % self.outfiles[0]["name"]) | |
227 | |
228 def prepargp(self): | |
229 clsuffix = [] | |
230 xclsuffix = [] | |
231 for i, p in enumerate(self.infiles): | |
232 if p["origCL"].strip().upper() == "STDIN": | |
233 appendme = [ | |
234 p["infilename"], | |
235 p["infilename"], | |
236 "< %s" % p["infilename"], | |
237 ] | |
238 xappendme = [ | |
239 p["infilename"], | |
240 p["infilename"], | |
241 "< $%s" % p["infilename"], | |
242 ] | |
243 else: | |
244 appendme = [p["CL"], p["CL"], ""] | |
245 xappendme = [p["CL"], "$%s" % p["CL"], ""] | |
246 clsuffix.append(appendme) | |
247 xclsuffix.append(xappendme) | |
248 for i, p in enumerate(self.outfiles): | |
249 if p["origCL"].strip().upper() == "STDOUT": | |
250 self.lastclredirect = [">", p["name"]] | |
251 self.lastxclredirect = [">", "$%s" % p["name"]] | |
252 else: | |
253 clsuffix.append([p["name"], p["name"], ""]) | |
254 xclsuffix.append([p["name"], "$%s" % p["name"], ""]) | |
255 for p in self.addpar: | |
256 nam = p["name"] | |
257 rep = p["repeat"] == "1" | |
258 if rep: | |
259 over = f" #for $rep in $R_{nam}:\n--{nam} $rep.{nam}\n#end for" | |
260 else: | |
261 over = p["override"] | |
262 clsuffix.append([p["CL"], nam, over]) | |
263 xclsuffix.append([p["CL"], nam, over]) | |
264 for p in self.selpar: | |
265 clsuffix.append([p["CL"], p["name"], p["override"]]) | |
266 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]]) | |
267 self.xclsuffix = xclsuffix | |
268 self.clsuffix = clsuffix | |
269 | |
270 def prepclpos(self): | |
271 clsuffix = [] | |
272 xclsuffix = [] | |
273 for i, p in enumerate(self.infiles): | |
274 if p["origCL"].strip().upper() == "STDIN": | |
275 appendme = [ | |
276 "999", | |
277 p["infilename"], | |
278 "< $%s" % p["infilename"], | |
279 ] | |
280 xappendme = [ | |
281 "999", | |
282 p["infilename"], | |
283 "< $%s" % p["infilename"], | |
284 ] | |
285 else: | |
286 appendme = [p["CL"], p["infilename"], ""] | |
287 xappendme = [p["CL"], "$%s" % p["infilename"], ""] | |
288 clsuffix.append(appendme) | |
289 xclsuffix.append(xappendme) | |
290 for i, p in enumerate(self.outfiles): | |
291 if p["origCL"].strip().upper() == "STDOUT": | |
292 self.lastclredirect = [">", p["name"]] | |
293 self.lastxclredirect = [">", "$%s" % p["name"]] | |
294 else: | |
295 clsuffix.append([p["CL"], p["name"], ""]) | |
296 xclsuffix.append([p["CL"], "$%s" % p["name"], ""]) | |
297 for p in self.addpar: | |
298 nam = p["name"] | |
299 rep = p["repeat"] == "1" # repeats make NO sense | |
300 if rep: | |
301 print(f'### warning. Repeats for {nam} ignored - not permitted in positional parameter command lines!') | |
302 over = p["override"] | |
303 clsuffix.append([p["CL"], nam, over]) | |
304 xclsuffix.append([p["CL"], '"$%s"' % nam, over]) | |
305 for p in self.selpar: | |
306 clsuffix.append([p["CL"], p["name"], p["override"]]) | |
307 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]]) | |
308 clsuffix.sort() | |
309 xclsuffix.sort() | |
310 self.xclsuffix = xclsuffix | |
311 self.clsuffix = clsuffix | |
312 | |
313 def prepScript(self): | |
314 rx = open(self.args.script_path, "r").readlines() | |
315 rx = [x.rstrip() for x in rx] | |
316 rxcheck = [x.strip() for x in rx if x.strip() > ""] | |
317 assert len(rxcheck) > 0, "Supplied script is empty. Cannot run" | |
318 self.script = "\n".join(rx) | |
319 fhandle, self.sfile = tempfile.mkstemp( | |
320 prefix=self.tool_name, suffix="_%s" % (self.executeme[0]) | |
321 ) | |
322 tscript = open(self.sfile, "w") | |
323 tscript.write(self.script) | |
324 tscript.close() | |
325 self.escapedScript = [cheetah_escape(x) for x in rx] | |
326 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""] | |
327 art = "%s.%s" % (self.tool_name, self.executeme[0]) | |
328 artifact = open(art, "wb") | |
329 artifact.write(bytes("\n".join(self.escapedScript), "utf8")) | |
330 artifact.close() | |
331 | |
332 def cleanuppar(self): | |
333 """ positional parameters are complicated by their numeric ordinal""" | |
334 if self.args.parampass == "positional": | |
335 for i, p in enumerate(self.infiles): | |
336 assert ( | |
337 p["CL"].isdigit() or p["CL"].strip().upper() == "STDIN" | |
338 ), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
339 p["CL"], | |
340 p["label"], | |
341 ) | |
342 for i, p in enumerate(self.outfiles): | |
343 assert ( | |
344 p["CL"].isdigit() or p["CL"].strip().upper() == "STDOUT" | |
345 ), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
346 p["CL"], | |
347 p["name"], | |
348 ) | |
349 for i, p in enumerate(self.addpar): | |
350 assert p[ | |
351 "CL" | |
352 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
353 p["CL"], | |
354 p["name"], | |
355 ) | |
356 for i, p in enumerate(self.infiles): | |
357 infp = copy.copy(p) | |
358 infp["origCL"] = infp["CL"] | |
359 if self.args.parampass in ["positional", "0"]: | |
360 infp["infilename"] = infp["label"].replace(" ", "_") | |
361 else: | |
362 infp["infilename"] = infp["CL"] | |
363 self.infiles[i] = infp | |
364 for i, p in enumerate(self.outfiles): | |
365 p["origCL"] = p["CL"] # keep copy | |
366 self.outfiles[i] = p | |
367 for i, p in enumerate(self.addpar): | |
368 p["origCL"] = p["CL"] | |
369 self.addpar[i] = p | |
370 | |
371 def clpositional(self): | |
372 # inputs in order then params | |
373 aCL = self.cl.append | |
374 for (k, v, koverride) in self.clsuffix: | |
375 if " " in v: | |
376 aCL("%s" % v) | |
377 else: | |
378 aCL(v) | |
379 aXCL = self.xmlcl.append | |
380 for (k, v, koverride) in self.xclsuffix: | |
381 aXCL(v) | |
382 if self.lastxclredirect: | |
383 aXCL(self.lastxclredirect[0]) | |
384 aXCL(self.lastxclredirect[1]) | |
385 | |
386 def clargparse(self): | |
387 """argparse style""" | |
388 aCL = self.cl.append | |
389 aXCL = self.xmlcl.append | |
390 # inputs then params in argparse named form | |
391 | |
392 for (k, v, koverride) in self.xclsuffix: | |
393 if koverride > "": | |
394 k = koverride | |
395 aXCL(k) | |
396 else: | |
397 if len(k.strip()) == 1: | |
398 k = "-%s" % k | |
399 else: | |
400 k = "--%s" % k | |
401 aXCL(k) | |
402 aXCL(v) | |
403 for (k, v, koverride) in self.clsuffix: | |
404 if koverride > "": | |
405 k = koverride | |
406 elif len(k.strip()) == 1: | |
407 k = "-%s" % k | |
408 else: | |
409 k = "--%s" % k | |
410 aCL(k) | |
411 aCL(v) | |
412 if self.lastxclredirect: | |
413 aXCL(self.lastxclredirect[0]) | |
414 aXCL(self.lastxclredirect[1]) | |
415 | |
416 def getNdash(self, newname): | |
417 if self.is_positional: | |
418 ndash = 0 | |
419 else: | |
420 ndash = 2 | |
421 if len(newname) < 2: | |
422 ndash = 1 | |
423 return ndash | |
424 | |
425 def doXMLparam(self): | |
426 """flake8 made me do this...""" | |
427 for p in self.outfiles: | |
428 newname = p["name"] | |
429 newfmt = p["format"] | |
430 newcl = p["CL"] | |
431 test = p["test"] | |
432 oldcl = p["origCL"] | |
433 test = test.strip() | |
434 ndash = self.getNdash(newcl) | |
435 aparm = gxtp.OutputData( | |
436 name=newname, format=newfmt, num_dashes=ndash, label=newname | |
437 ) | |
438 aparm.positional = self.is_positional | |
439 if self.is_positional: | |
440 if oldcl.upper() == "STDOUT": | |
441 aparm.positional = 9999999 | |
442 aparm.command_line_override = "> $%s" % newname | |
443 else: | |
444 aparm.positional = int(oldcl) | |
445 aparm.command_line_override = "$%s" % newname | |
446 self.toutputs.append(aparm) | |
447 ld = None | |
448 if test.strip() > "": | |
449 if test.startswith("diff"): | |
450 c = "diff" | |
451 ld = 0 | |
452 if test.split(":")[1].isdigit: | |
453 ld = int(test.split(":")[1]) | |
454 tp = gxtp.TestOutput( | |
455 name=newname, | |
456 value="%s_sample" % newname, | |
457 compare=c, | |
458 lines_diff=ld, | |
459 ) | |
460 elif test.startswith("sim_size"): | |
461 c = "sim_size" | |
462 tn = test.split(":")[1].strip() | |
463 if tn > "": | |
464 if "." in tn: | |
465 delta = None | |
466 delta_frac = min(1.0, float(tn)) | |
467 else: | |
468 delta = int(tn) | |
469 delta_frac = None | |
470 tp = gxtp.TestOutput( | |
471 name=newname, | |
472 value="%s_sample" % newname, | |
473 compare=c, | |
474 delta=delta, | |
475 delta_frac=delta_frac, | |
476 ) | |
477 else: | |
478 c = test | |
479 tp = gxtp.TestOutput( | |
480 name=newname, | |
481 value="%s_sample" % newname, | |
482 compare=c, | |
483 ) | |
484 self.testparam.append(tp) | |
485 for p in self.infiles: | |
486 newname = p["infilename"] | |
487 newfmt = p["format"] | |
488 ndash = self.getNdash(newname) | |
489 reps = p.get("repeat", 0) == 1 | |
490 if not len(p["label"]) > 0: | |
491 alab = p["CL"] | |
492 else: | |
493 alab = p["label"] | |
494 aninput = gxtp.DataParam( | |
495 newname, | |
496 optional=False, | |
497 label=alab, | |
498 help=p["help"], | |
499 format=newfmt, | |
500 multiple=False, | |
501 num_dashes=ndash, | |
502 ) | |
503 aninput.positional = self.is_positional | |
504 if self.is_positional: | |
505 if p["origCL"].upper() == "STDIN": | |
506 aninput.positional = 9999998 | |
507 aninput.command_line_override = "> $%s" % newname | |
508 else: | |
509 aninput.positional = int(p["origCL"]) | |
510 aninput.command_line_override = "$%s" % newname | |
511 if reps: | |
512 repe = gxtp.Repeat(name=f"R_{newname}", title=f"Add as many {alab} as needed") | |
513 repe.append(aninput) | |
514 self.tinputs.append(repe) | |
515 tparm = gxtp.TestRepeat(name=f"R_{newname}") | |
516 tparm2 = gxtp.TestParam(newname, value="%s_sample" % newname) | |
517 tparm.append(tparm2) | |
518 self.testparam.append(tparm) | |
519 else: | |
520 self.tinputs.append(aninput) | |
521 tparm = gxtp.TestParam(newname, value="%s_sample" % newname) | |
522 self.testparam.append(tparm) | |
523 for p in self.addpar: | |
524 newname = p["name"] | |
525 newval = p["value"] | |
526 newlabel = p["label"] | |
527 newhelp = p["help"] | |
528 newtype = p["type"] | |
529 newcl = p["CL"] | |
530 oldcl = p["origCL"] | |
531 reps = p["repeat"] == "1" | |
532 if not len(newlabel) > 0: | |
533 newlabel = newname | |
534 ndash = self.getNdash(newname) | |
535 if newtype == "text": | |
536 aparm = gxtp.TextParam( | |
537 newname, | |
538 label=newlabel, | |
539 help=newhelp, | |
540 value=newval, | |
541 num_dashes=ndash, | |
542 ) | |
543 elif newtype == "integer": | |
544 aparm = gxtp.IntegerParam( | |
545 newname, | |
546 label=newlabel, | |
547 help=newhelp, | |
548 value=newval, | |
549 num_dashes=ndash, | |
550 ) | |
551 elif newtype == "float": | |
552 aparm = gxtp.FloatParam( | |
553 newname, | |
554 label=newlabel, | |
555 help=newhelp, | |
556 value=newval, | |
557 num_dashes=ndash, | |
558 ) | |
559 elif newtype == "boolean": | |
560 aparm = gxtp.BooleanParam( | |
561 newname, | |
562 label=newlabel, | |
563 help=newhelp, | |
564 value=newval, | |
565 num_dashes=ndash, | |
566 ) | |
567 else: | |
568 raise ValueError( | |
569 'Unrecognised parameter type "%s" for\ | |
570 additional parameter %s in makeXML' | |
571 % (newtype, newname) | |
572 ) | |
573 aparm.positional = self.is_positional | |
574 if self.is_positional: | |
575 aparm.positional = int(oldcl) | |
576 if reps: | |
577 repe = gxtp.Repeat(name=f"R_{newname}", title=f"Add as many {newlabel} as needed") | |
578 repe.append(aparm) | |
579 self.tinputs.append(repe) | |
580 tparm = gxtp.TestRepeat(name=f"R_{newname}") | |
581 tparm2 = gxtp.TestParam(newname, value=newval) | |
582 tparm.append(tparm2) | |
583 self.testparam.append(tparm) | |
584 else: | |
585 self.tinputs.append(aparm) | |
586 tparm = gxtp.TestParam(newname, value=newval) | |
587 self.testparam.append(tparm) | |
588 for p in self.selpar: | |
589 newname = p["name"] | |
590 newval = p["value"] | |
591 newlabel = p["label"] | |
592 newhelp = p["help"] | |
593 newtype = p["type"] | |
594 newcl = p["CL"] | |
595 if not len(newlabel) > 0: | |
596 newlabel = newname | |
597 ndash = self.getNdash(newname) | |
598 if newtype == "selecttext": | |
599 newtext = p["texts"] | |
600 aparm = gxtp.SelectParam( | |
601 newname, | |
602 label=newlabel, | |
603 help=newhelp, | |
604 num_dashes=ndash, | |
605 ) | |
606 for i in range(len(newval)): | |
607 anopt = gxtp.SelectOption( | |
608 value=newval[i], | |
609 text=newtext[i], | |
610 ) | |
611 aparm.append(anopt) | |
612 aparm.positional = self.is_positional | |
613 if self.is_positional: | |
614 aparm.positional = int(newcl) | |
615 self.tinputs.append(aparm) | |
616 tparm = gxtp.TestParam(newname, value=newval) | |
617 self.testparam.append(tparm) | |
618 else: | |
619 raise ValueError( | |
620 'Unrecognised parameter type "%s" for\ | |
621 selecttext parameter %s in makeXML' | |
622 % (newtype, newname) | |
623 ) | |
624 for p in self.collections: | |
625 newkind = p["kind"] | |
626 newname = p["name"] | |
627 newlabel = p["label"] | |
628 newdisc = p["discover"] | |
629 collect = gxtp.OutputCollection(newname, label=newlabel, type=newkind) | |
630 disc = gxtp.DiscoverDatasets( | |
631 pattern=newdisc, directory=f"{newname}", visible="false" | |
632 ) | |
633 collect.append(disc) | |
634 self.toutputs.append(collect) | |
635 try: | |
636 tparm = gxtp.TestOutputCollection(newname) # broken until PR merged. | |
637 self.testparam.append(tparm) | |
638 except Exception: | |
639 print("#### WARNING: Galaxyxml version does not have the PR merged yet - tests for collections must be over-ridden until then!") | |
640 | |
641 def doNoXMLparam(self): | |
642 """filter style package - stdin to stdout""" | |
643 if len(self.infiles) > 0: | |
644 alab = self.infiles[0]["label"] | |
645 if len(alab) == 0: | |
646 alab = self.infiles[0]["infilename"] | |
647 max1s = ( | |
648 "Maximum one input if parampass is 0 but multiple input files supplied - %s" | |
649 % str(self.infiles) | |
650 ) | |
651 assert len(self.infiles) == 1, max1s | |
652 newname = self.infiles[0]["infilename"] | |
653 aninput = gxtp.DataParam( | |
654 newname, | |
655 optional=False, | |
656 label=alab, | |
657 help=self.infiles[0]["help"], | |
658 format=self.infiles[0]["format"], | |
659 multiple=False, | |
660 num_dashes=0, | |
661 ) | |
662 aninput.command_line_override = "< $%s" % newname | |
663 aninput.positional = True | |
664 self.tinputs.append(aninput) | |
665 tp = gxtp.TestParam(name=newname, value="%s_sample" % newname) | |
666 self.testparam.append(tp) | |
667 if len(self.outfiles) > 0: | |
668 newname = self.outfiles[0]["name"] | |
669 newfmt = self.outfiles[0]["format"] | |
670 anout = gxtp.OutputData(newname, format=newfmt, num_dashes=0) | |
671 anout.command_line_override = "> $%s" % newname | |
672 anout.positional = self.is_positional | |
673 self.toutputs.append(anout) | |
674 tp = gxtp.TestOutput(name=newname, value="%s_sample" % newname) | |
675 self.testparam.append(tp) | |
676 | |
677 def makeXML(self): | |
678 """ | |
679 Create a Galaxy xml tool wrapper for the new script | |
680 Uses galaxyhtml | |
681 Hmmm. How to get the command line into correct order... | |
682 """ | |
683 if self.command_override: | |
684 self.newtool.command_override = self.command_override # config file | |
685 else: | |
686 self.newtool.command_override = self.xmlcl | |
687 cite = gxtp.Citations() | |
688 acite = gxtp.Citation(type="doi", value="10.1093/bioinformatics/bts573") | |
689 cite.append(acite) | |
690 self.newtool.citations = cite | |
691 safertext = "" | |
692 if self.args.help_text: | |
693 helptext = open(self.args.help_text, "r").readlines() | |
694 safertext = "\n".join([cheetah_escape(x) for x in helptext]) | |
695 if len(safertext.strip()) == 0: | |
696 safertext = ( | |
697 "Ask the tool author (%s) to rebuild with help text please\n" | |
698 % (self.args.user_email) | |
699 ) | |
700 if self.args.script_path: | |
701 if len(safertext) > 0: | |
702 safertext = safertext + "\n\n------\n" # transition allowed! | |
703 scr = [x for x in self.spacedScript if x.strip() > ""] | |
704 scr.insert(0, "\n\nScript::\n") | |
705 if len(scr) > 300: | |
706 scr = ( | |
707 scr[:100] | |
708 + [" >300 lines - stuff deleted", " ......"] | |
709 + scr[-100:] | |
710 ) | |
711 scr.append("\n") | |
712 safertext = safertext + "\n".join(scr) | |
713 self.newtool.help = safertext | |
714 self.newtool.version_command = f'echo "{self.args.tool_version}"' | |
715 requirements = gxtp.Requirements() | |
716 if self.args.packages: | |
717 for d in self.args.packages.split(","): | |
718 ver = "" | |
719 d = d.replace("==", ":") | |
720 d = d.replace("=", ":") | |
721 if ":" in d: | |
722 packg, ver = d.split(":") | |
723 else: | |
724 packg = d | |
725 requirements.append( | |
726 gxtp.Requirement("package", packg.strip(), ver.strip()) | |
727 ) | |
728 self.newtool.requirements = requirements | |
729 if self.args.parampass == "0": | |
730 self.doNoXMLparam() | |
731 else: | |
732 self.doXMLparam() | |
733 self.newtool.outputs = self.toutputs | |
734 self.newtool.inputs = self.tinputs | |
735 if self.args.script_path: | |
736 configfiles = gxtp.Configfiles() | |
737 configfiles.append( | |
738 gxtp.Configfile(name="runme", text="\n".join(self.escapedScript)) | |
739 ) | |
740 self.newtool.configfiles = configfiles | |
741 tests = gxtp.Tests() | |
742 test_a = gxtp.Test() | |
743 for tp in self.testparam: | |
744 test_a.append(tp) | |
745 tests.append(test_a) | |
746 self.newtool.tests = tests | |
747 self.newtool.add_comment( | |
748 "Created by %s at %s using the Galaxy Tool Factory." | |
749 % (self.args.user_email, timenow()) | |
750 ) | |
751 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL)) | |
752 exml0 = self.newtool.export() | |
753 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted | |
754 if ( | |
755 self.test_override | |
756 ): # cannot do this inside galaxyxml as it expects lxml objects for tests | |
757 part1 = exml.split("<tests>")[0] | |
758 part2 = exml.split("</tests>")[1] | |
759 fixed = "%s\n%s\n%s" % (part1, "\n".join(self.test_override), part2) | |
760 exml = fixed | |
761 # exml = exml.replace('range="1:"', 'range="1000:"') | |
762 xf = open("%s.xml" % self.tool_name, "w") | |
763 xf.write(exml) | |
764 xf.write("\n") | |
765 xf.close() | |
766 # ready for the tarball | |
767 | |
768 def run(self): | |
769 """ | |
770 generate test outputs by running a command line | |
771 won't work if command or test override in play - planemo is the | |
772 easiest way to generate test outputs for that case so is | |
773 automagically selected | |
774 """ | |
775 scl = " ".join(self.cl) | |
776 err = None | |
777 if self.args.parampass != "0": | |
778 if os.path.exists(self.elog): | |
779 ste = open(self.elog, "a") | |
780 else: | |
781 ste = open(self.elog, "w") | |
782 if self.lastclredirect: | |
783 sto = open(self.lastclredirect[1], "wb") # is name of an output file | |
784 else: | |
785 if os.path.exists(self.tlog): | |
786 sto = open(self.tlog, "a") | |
787 else: | |
788 sto = open(self.tlog, "w") | |
789 sto.write( | |
790 "## Executing Toolfactory generated command line = %s\n" % scl | |
791 ) | |
792 sto.flush() | |
793 subp = subprocess.run( | |
794 self.cl, shell=False, stdout=sto, stderr=ste | |
795 ) | |
796 sto.close() | |
797 ste.close() | |
798 retval = subp.returncode | |
799 else: # work around special case - stdin and write to stdout | |
800 if len(self.infiles) > 0: | |
801 sti = open(self.infiles[0]["name"], "rb") | |
802 else: | |
803 sti = sys.stdin | |
804 if len(self.outfiles) > 0: | |
805 sto = open(self.outfiles[0]["name"], "wb") | |
806 else: | |
807 sto = sys.stdout | |
808 subp = subprocess.run( | |
809 self.cl, shell=False, stdout=sto, stdin=sti | |
810 ) | |
811 sto.write("## Executing Toolfactory generated command line = %s\n" % scl) | |
812 retval = subp.returncode | |
813 sto.close() | |
814 sti.close() | |
815 if os.path.isfile(self.tlog) and os.stat(self.tlog).st_size == 0: | |
816 os.unlink(self.tlog) | |
817 if os.path.isfile(self.elog) and os.stat(self.elog).st_size == 0: | |
818 os.unlink(self.elog) | |
819 if retval != 0 and err: # problem | |
820 sys.stderr.write(err) | |
821 logging.debug("run done") | |
822 return retval | |
823 | |
824 def shedLoad(self): | |
825 """ | |
826 use bioblend to create new repository | |
827 or update existing | |
828 | |
829 """ | |
830 if os.path.exists(self.tlog): | |
831 sto = open(self.tlog, "a") | |
832 else: | |
833 sto = open(self.tlog, "w") | |
834 | |
835 ts = toolshed.ToolShedInstance( | |
836 url=self.args.toolshed_url, | |
837 key=self.args.toolshed_api_key, | |
838 verify=False, | |
839 ) | |
840 repos = ts.repositories.get_repositories() | |
841 rnames = [x.get("name", "?") for x in repos] | |
842 rids = [x.get("id", "?") for x in repos] | |
843 tfcat = "ToolFactory generated tools" | |
844 if self.tool_name not in rnames: | |
845 tscat = ts.categories.get_categories() | |
846 cnames = [x.get("name", "?").strip() for x in tscat] | |
847 cids = [x.get("id", "?") for x in tscat] | |
848 catID = None | |
849 if tfcat.strip() in cnames: | |
850 ci = cnames.index(tfcat) | |
851 catID = cids[ci] | |
852 res = ts.repositories.create_repository( | |
853 name=self.args.tool_name, | |
854 synopsis="Synopsis:%s" % self.args.tool_desc, | |
855 description=self.args.tool_desc, | |
856 type="unrestricted", | |
857 remote_repository_url=self.args.toolshed_url, | |
858 homepage_url=None, | |
859 category_ids=catID, | |
860 ) | |
861 tid = res.get("id", None) | |
862 sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n") | |
863 else: | |
864 i = rnames.index(self.tool_name) | |
865 tid = rids[i] | |
866 try: | |
867 res = ts.repositories.update_repository( | |
868 id=tid, tar_ball_path=self.newtarpath, commit_message=None | |
869 ) | |
870 sto.write(f"#update res id {id} ={res}\n") | |
871 except ConnectionError: | |
872 sto.write( | |
873 "####### Is the toolshed running and the API key correct? Bioblend shed upload failed\n" | |
874 ) | |
875 sto.close() | |
876 | |
877 def eph_galaxy_load(self): | |
878 """ | |
879 use ephemeris to load the new tool from the local toolshed after planemo uploads it | |
880 """ | |
881 if os.path.exists(self.tlog): | |
882 tout = open(self.tlog, "a") | |
883 else: | |
884 tout = open(self.tlog, "w") | |
885 cll = [ | |
886 "shed-tools", | |
887 "install", | |
888 "-g", | |
889 self.args.galaxy_url, | |
890 "--latest", | |
891 "-a", | |
892 self.args.galaxy_api_key, | |
893 "--name", | |
894 self.tool_name, | |
895 "--owner", | |
896 "fubar", | |
897 "--toolshed", | |
898 self.args.toolshed_url, | |
899 "--section_label", | |
900 "ToolFactory", | |
901 ] | |
902 tout.write("running\n%s\n" % " ".join(cll)) | |
903 subp = subprocess.run( | |
904 cll, | |
905 cwd=self.ourcwd, | |
906 shell=False, | |
907 stderr=tout, | |
908 stdout=tout, | |
909 ) | |
910 tout.write( | |
911 "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode) | |
912 ) | |
913 tout.close() | |
914 return subp.returncode | |
915 | |
916 def writeShedyml(self): | |
917 """for planemo""" | |
918 yuser = self.args.user_email.split("@")[0] | |
919 yfname = os.path.join(self.tooloutdir, ".shed.yml") | |
920 yamlf = open(yfname, "w") | |
921 odict = { | |
922 "name": self.tool_name, | |
923 "owner": yuser, | |
924 "type": "unrestricted", | |
925 "description": self.args.tool_desc, | |
926 "synopsis": self.args.tool_desc, | |
927 "category": "TF Generated Tools", | |
928 } | |
929 yaml.dump(odict, yamlf, allow_unicode=True) | |
930 yamlf.close() | |
931 | |
932 def makeTool(self): | |
933 """write xmls and input samples into place""" | |
934 if self.args.parampass == 0: | |
935 self.doNoXMLparam() | |
936 else: | |
937 self.makeXML() | |
938 if self.args.script_path: | |
939 stname = os.path.join(self.tooloutdir, self.sfile) | |
940 if not os.path.exists(stname): | |
941 shutil.copyfile(self.sfile, stname) | |
942 xreal = "%s.xml" % self.tool_name | |
943 xout = os.path.join(self.tooloutdir, xreal) | |
944 shutil.copyfile(xreal, xout) | |
945 for p in self.infiles: | |
946 pth = p["name"] | |
947 dest = os.path.join(self.testdir, "%s_sample" % p["infilename"]) | |
948 shutil.copyfile(pth, dest) | |
949 dest = os.path.join(self.repdir, "%s_sample" % p["infilename"]) | |
950 shutil.copyfile(pth, dest) | |
951 | |
952 def makeToolTar(self, report_fail=False): | |
953 """move outputs into test-data and prepare the tarball""" | |
954 excludeme = "_planemo_test_report.html" | |
955 | |
956 def exclude_function(tarinfo): | |
957 filename = tarinfo.name | |
958 return None if filename.endswith(excludeme) else tarinfo | |
959 | |
960 if os.path.exists(self.tlog): | |
961 tout = open(self.tlog, "a") | |
962 else: | |
963 tout = open(self.tlog, "w") | |
964 for p in self.outfiles: | |
965 oname = p["name"] | |
966 tdest = os.path.join(self.testdir, "%s_sample" % oname) | |
967 src = os.path.join(self.testdir, oname) | |
968 if not os.path.isfile(tdest): | |
969 if os.path.isfile(src): | |
970 shutil.copyfile(src, tdest) | |
971 dest = os.path.join(self.repdir, "%s.sample" % (oname)) | |
972 shutil.copyfile(src, dest) | |
973 else: | |
974 if report_fail: | |
975 tout.write( | |
976 "###Tool may have failed - output file %s not found in testdir after planemo run %s." | |
977 % (tdest, self.testdir) | |
978 ) | |
979 tf = tarfile.open(self.newtarpath, "w:gz") | |
980 tf.add( | |
981 name=self.tooloutdir, | |
982 arcname=self.tool_name, | |
983 filter=exclude_function, | |
984 ) | |
985 tf.close() | |
986 shutil.copyfile(self.newtarpath, self.args.new_tool) | |
987 | |
988 def moveRunOutputs(self): | |
989 """need to move planemo or run outputs into toolfactory collection""" | |
990 with os.scandir(self.tooloutdir) as outs: | |
991 for entry in outs: | |
992 if not entry.is_file(): | |
993 continue | |
994 if "." in entry.name: | |
995 _, ext = os.path.splitext(entry.name) | |
996 if ext in [".tgz", ".json"]: | |
997 continue | |
998 if ext in [".yml", ".xml", ".yaml"]: | |
999 newname = f"{entry.name.replace('.','_')}.txt" | |
1000 else: | |
1001 newname = entry.name | |
1002 else: | |
1003 newname = f"{entry.name}.txt" | |
1004 dest = os.path.join(self.repdir, newname) | |
1005 src = os.path.join(self.tooloutdir, entry.name) | |
1006 shutil.copyfile(src, dest) | |
1007 if self.args.include_tests: | |
1008 with os.scandir(self.testdir) as outs: | |
1009 for entry in outs: | |
1010 if (not entry.is_file()) or entry.name.endswith( | |
1011 "_planemo_test_report.html" | |
1012 ): | |
1013 continue | |
1014 if "." in entry.name: | |
1015 _, ext = os.path.splitext(entry.name) | |
1016 if ext in [".tgz", ".json"]: | |
1017 continue | |
1018 if ext in [".yml", ".xml", ".yaml"]: | |
1019 newname = f"{entry.name.replace('.','_')}.txt" | |
1020 else: | |
1021 newname = entry.name | |
1022 else: | |
1023 newname = f"{entry.name}.txt" | |
1024 dest = os.path.join(self.repdir, newname) | |
1025 src = os.path.join(self.testdir, entry.name) | |
1026 shutil.copyfile(src, dest) | |
1027 | |
1028 def planemo_test_once(self): | |
1029 """planemo is a requirement so is available for testing but needs a | |
1030 different call if in the biocontainer - see above | |
1031 and for generating test outputs if command or test overrides are | |
1032 supplied test outputs are sent to repdir for display | |
1033 """ | |
1034 xreal = "%s.xml" % self.tool_name | |
1035 tool_test_path = os.path.join( | |
1036 self.repdir, f"{self.tool_name}_planemo_test_report.html" | |
1037 ) | |
1038 if os.path.exists(self.tlog): | |
1039 tout = open(self.tlog, "a") | |
1040 else: | |
1041 tout = open(self.tlog, "w") | |
1042 cll = [ | |
1043 "planemo", | |
1044 "test", | |
1045 "--conda_auto_init", | |
1046 "--test_data", | |
1047 os.path.abspath(self.testdir), | |
1048 "--test_output", | |
1049 os.path.abspath(tool_test_path), | |
1050 "--galaxy_root", | |
1051 self.args.galaxy_root, | |
1052 "--update_test_data", | |
1053 os.path.abspath(xreal), | |
1054 ] | |
1055 p = subprocess.run( | |
1056 cll, | |
1057 shell=False, | |
1058 cwd=self.tooloutdir, | |
1059 stderr=tout, | |
1060 stdout=tout, | |
1061 ) | |
1062 tout.close() | |
1063 return p.returncode | |
1064 | |
1065 | |
1066 def main(): | |
1067 """ | |
1068 This is a Galaxy wrapper. | |
1069 It expects to be called by a special purpose tool.xml | |
1070 | |
1071 """ | |
1072 parser = argparse.ArgumentParser() | |
1073 a = parser.add_argument | |
1074 a("--script_path", default=None) | |
1075 a("--history_test", default=None) | |
1076 a("--cl_suffix", default=None) | |
1077 a("--sysexe", default=None) | |
1078 a("--packages", default=None) | |
1079 a("--tool_name", default="newtool") | |
1080 a("--tool_dir", default=None) | |
1081 a("--input_files", default=[], action="append") | |
1082 a("--output_files", default=[], action="append") | |
1083 a("--user_email", default="Unknown") | |
1084 a("--bad_user", default=None) | |
1085 a("--make_Tool", default="runonly") | |
1086 a("--help_text", default=None) | |
1087 a("--tool_desc", default=None) | |
1088 a("--tool_version", default=None) | |
1089 a("--citations", default=None) | |
1090 a("--command_override", default=None) | |
1091 a("--test_override", default=None) | |
1092 a("--additional_parameters", action="append", default=[]) | |
1093 a("--selecttext_parameters", action="append", default=[]) | |
1094 a("--edit_additional_parameters", action="store_true", default=False) | |
1095 a("--parampass", default="positional") | |
1096 a("--tfout", default="./tfout") | |
1097 a("--new_tool", default="new_tool") | |
1098 a("--galaxy_url", default="http://localhost:8080") | |
1099 a("--toolshed_url", default="http://localhost:9009") | |
1100 # make sure this is identical to tool_sheds_conf.xml | |
1101 # localhost != 127.0.0.1 so validation fails | |
1102 a("--toolshed_api_key", default="fakekey") | |
1103 a("--galaxy_api_key", default="fakekey") | |
1104 a("--galaxy_root", default="/galaxy-central") | |
1105 a("--galaxy_venv", default="/galaxy_venv") | |
1106 a("--collection", action="append", default=[]) | |
1107 a("--include_tests", default=False, action="store_true") | |
1108 args = parser.parse_args() | |
1109 assert not args.bad_user, ( | |
1110 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy \ | |
1111 admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file' | |
1112 % (args.bad_user, args.bad_user) | |
1113 ) | |
1114 assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq" | |
1115 assert ( | |
1116 args.sysexe or args.packages | |
1117 ), "## Tool Factory wrapper expects an interpreter \ | |
1118 or an executable package in --sysexe or --packages" | |
1119 r = ScriptRunner(args) | |
1120 r.writeShedyml() | |
1121 r.makeTool() | |
1122 if args.make_Tool == "generate": | |
1123 r.run() | |
1124 r.moveRunOutputs() | |
1125 r.makeToolTar() | |
1126 else: | |
1127 # r.planemo_test(genoutputs=True) # this fails :( - see PR | |
1128 # r.moveRunOutputs() | |
1129 # r.makeToolTar(report_fail=False) | |
1130 r.planemo_test_once() | |
1131 r.moveRunOutputs() | |
1132 r.makeToolTar(report_fail=True) | |
1133 if args.make_Tool == "gentestinstall": | |
1134 r.shedLoad() | |
1135 r.eph_galaxy_load() | |
1136 | |
1137 | |
1138 if __name__ == "__main__": | |
1139 main() |