28
|
1 # rgToolFactory.py
|
|
2 # see https://github.com/fubar2/toolfactory
|
|
3 #
|
|
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
|
|
5 #
|
|
6 # all rights reserved
|
|
7 # Licensed under the LGPL
|
|
8 # suggestions for improvement and bug fixes welcome at https://github.com/fubar2/toolfactory
|
|
9 #
|
|
10 # July 2020: BCC was fun and I feel like rip van winkle after 5 years.
|
|
11 # Decided to
|
|
12 # 1. Fix the toolfactory so it works - done for simplest case
|
|
13 # 2. Fix planemo so the toolfactory function works
|
|
14 # 3. Rewrite bits using galaxyxml functions where that makes sense - done
|
|
15 #
|
|
16 # removed all the old complications including making the new tool use this same script
|
|
17 # galaxyxml now generates the tool xml https://github.com/hexylena/galaxyxml
|
|
18 # No support for automatic HTML file creation from arbitrary outputs
|
|
19 # TODO: add option to run that code as a post execution hook
|
|
20 # TODO: add additional history input parameters - currently only one
|
|
21
|
|
22 import sys
|
|
23 import subprocess
|
|
24 import shutil
|
|
25 import os
|
|
26 import time
|
|
27 import tempfile
|
|
28 import argparse
|
|
29 import tarfile
|
|
30 import re
|
|
31 import galaxyxml.tool as gxt
|
|
32 import galaxyxml.tool.parameters as gxtp
|
|
33 import logging
|
|
34
|
|
35
|
|
36 progname = os.path.split(sys.argv[0])[1]
|
|
37 myversion = 'V2.1 July 2020'
|
|
38 verbose = True
|
|
39 debug = True
|
|
40 toolFactoryURL = 'https://github.com/fubar2/toolfactory'
|
|
41 ourdelim = '~~~'
|
|
42
|
|
43
|
|
44 def timenow():
|
|
45 """return current time as a string
|
|
46 """
|
|
47 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
|
48
|
|
49
|
|
50 def quote_non_numeric(s):
|
|
51 """return a prequoted string for non-numerics
|
|
52 useful for perl and Rscript parameter passing?
|
|
53 """
|
|
54 try:
|
|
55 _ = float(s)
|
|
56 return s
|
|
57 except ValueError:
|
|
58 return '"%s"' % s
|
|
59
|
|
60
|
|
61 html_escape_table = {
|
|
62 "&": "&",
|
|
63 ">": ">",
|
|
64 "<": "<",
|
|
65 "$": r"\$"
|
|
66 }
|
|
67
|
|
68
|
|
69 def html_escape(text):
|
|
70 """Produce entities within text."""
|
|
71 return "".join(html_escape_table.get(c, c) for c in text)
|
|
72
|
|
73
|
|
74 def html_unescape(text):
|
|
75 """Revert entities within text. Multiple character targets so use replace"""
|
|
76 t = text.replace('&', '&')
|
|
77 t = t.replace('>', '>')
|
|
78 t = t.replace('<', '<')
|
|
79 t = t.replace('\\$', '$')
|
|
80 return t
|
|
81
|
|
82
|
|
83 def parse_citations(citations_text):
|
|
84 """
|
|
85 """
|
|
86 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
|
|
87 citation_tuples = []
|
|
88 for citation in citations:
|
|
89 if citation.startswith("doi"):
|
|
90 citation_tuples.append(("doi", citation[len("doi"):].strip()))
|
|
91 else:
|
|
92 citation_tuples.append(
|
|
93 ("bibtex", citation[len("bibtex"):].strip()))
|
|
94 return citation_tuples
|
|
95
|
|
96
|
|
97 class ScriptRunner:
|
|
98 """Wrapper for an arbitrary script
|
|
99 uses galaxyxml
|
|
100
|
|
101 """
|
|
102
|
|
103 def __init__(self, args=None):
|
|
104 """
|
|
105 prepare command line cl for running the tool here
|
|
106 and prepare elements needed for galaxyxml tool generation
|
|
107 """
|
|
108 lastclredirect = None
|
|
109 self.cl = []
|
|
110 aCL = self.cl.append
|
|
111 if args.output_dir: # simplify for the tool tarball
|
|
112 os.chdir(args.output_dir)
|
|
113 self.args = args
|
|
114 # a sanitizer now does this but..
|
|
115 self.tool_name = re.sub('[^a-zA-Z0-9_]+', '', args.tool_name)
|
|
116 self.tool_id = self.tool_name
|
|
117 self.xmlfile = '%s.xml' % self.tool_name
|
|
118 if self.args.interpreter_name == "Executable": # binary - no need
|
|
119 aCL(self.args.exe_package) # this little CL will just run
|
|
120 else: # a script has been provided
|
|
121 rx = open(self.args.script_path, 'r').readlines()
|
|
122 # remove pesky dos line endings if needed
|
|
123 rx = [x.rstrip() for x in rx]
|
|
124 self.script = '\n'.join(rx)
|
|
125 fhandle, self.sfile = tempfile.mkstemp(
|
|
126 prefix=self.tool_name, suffix=".%s" % (args.interpreter_name))
|
|
127 # use self.sfile as script source for Popen
|
|
128 tscript = open(self.sfile, 'w')
|
|
129 tscript.write(self.script)
|
|
130 tscript.close()
|
|
131 self.indentedScript = " %s" % '\n'.join(
|
|
132 [' %s' % html_escape(x) for x in rx]) # for restructured text in help
|
|
133 self.escapedScript = "%s" % '\n'.join(
|
|
134 [' %s' % html_escape(x) for x in rx])
|
|
135 aCL(self.args.interpreter_name)
|
|
136 aCL(self.sfile)
|
|
137 self.elog = os.path.join(self.args.output_dir,
|
|
138 "%s_error.log" % self.tool_name)
|
|
139 if args.output_dir: # may not want these complexities
|
|
140 self.tlog = os.path.join(
|
|
141 self.args.output_dir, "%s_runner.log" % self.tool_name)
|
|
142 art = '%s.%s' % (self.tool_name, args.interpreter_name)
|
|
143 artpath = os.path.join(
|
|
144 self.args.output_dir,
|
|
145 art) # need full path
|
|
146 # use self.sfile as script source for Popen
|
|
147 artifact = open(artpath, 'w')
|
|
148 artifact.write(self.script)
|
|
149 artifact.close()
|
|
150 self.infile_paths = []
|
|
151 self.infile_format = []
|
|
152 self.infile_cl = []
|
|
153 self.infile_label = []
|
|
154 self.infile_help = []
|
|
155 if self.args.input_files:
|
|
156 aif = [x.split(ourdelim) for x in self.args.input_files]
|
|
157 # transpose the input_files array passed as
|
|
158 # --input_files="$input_files~~~$CL~~~$input_formats~~~$input_label~~~$input_help"
|
|
159 laif = list(map(list, zip(*aif)))
|
|
160 self.infile_paths, self.infile_cl, self.infile_format, self.infile_label, self.infile_help = laif
|
|
161 self.infile_name = []
|
|
162 # positionals have integers indicating order - need valid internal
|
|
163 # names
|
|
164 for i, scl in enumerate(self.infile_cl):
|
|
165 if scl.isdigit():
|
|
166 scl = 'input%s' % scl
|
|
167 if scl.upper() in ['STDOUT', 'STDIN']:
|
|
168 scl = 'input%d' % (i + 1)
|
|
169 # make a list of internal names for each input file
|
|
170 self.infile_name.append(scl)
|
|
171 # list all (cl param) pairs - positional needs sorting by cl index so decorate
|
|
172 clsuffix = []
|
|
173 clsuffix.append([self.args.output_cl, self.args.output_tab])
|
|
174 if self.args.parampass == '0': # only need two
|
|
175 aCL('<')
|
|
176 aCL('%s' % self.infile_paths[0])
|
|
177 aCL('>')
|
|
178 aCL('%s' % self.args.output_tab)
|
|
179 else:
|
|
180 for i, p in enumerate(self.infile_paths):
|
|
181 # decorator is cl - sort for positional
|
|
182 clsuffix.append([self.infile_cl[i], p])
|
|
183 for p in self.args.additional_parameters:
|
|
184 psplit = p.split(ourdelim)
|
|
185 pform = psplit[5]
|
|
186 if pform == 'STDOUT':
|
|
187 lastclredirect = ['>', psplit[1]]
|
|
188 else:
|
|
189 clsuffix.append([pform, psplit[1]]) # cl,value
|
|
190 clsuffix.sort()
|
|
191 if self.args.parampass == "positional":
|
|
192 # inputs in order then params in order TODO fix ordering using
|
|
193 # self.infile_cl
|
|
194 for (k, v) in clsuffix:
|
|
195 if ' ' in v:
|
|
196 aCL("v")
|
|
197 else:
|
|
198 aCL(v)
|
|
199 elif self.args.parampass == "argparse":
|
|
200 # inputs then params in argparse named form
|
|
201 for (k, v) in clsuffix:
|
|
202 if ' ' in v:
|
|
203 aCL('--%s' % k)
|
|
204 aCL('"%s"' % v)
|
|
205 else:
|
|
206 aCL('--%s' % k)
|
|
207 aCL('%s' % v)
|
|
208 if lastclredirect:
|
|
209 for v in lastclredirect:
|
|
210 aCL(v) # add the stdout parameter last
|
|
211 self.test1Output = '%s_test1_output.xls' % self.tool_name
|
|
212 self.test1HTML = '%s_test1_output.html' % self.tool_name
|
|
213
|
|
214 def makeXML(self):
|
|
215 """
|
|
216 Create a Galaxy xml tool wrapper for the new script
|
|
217 Uses galaxyhtml
|
|
218 """
|
|
219 # need interp and executable (?script) or else executable only
|
|
220 if self.args.interpreter_name:
|
|
221 exe = "$runMe" # our dynamic script from the tool builder
|
|
222 interp = self.args.interpreter_name
|
|
223 else:
|
|
224 interp = None
|
|
225 exe = self.args.exe_package
|
|
226 assert exe is not None, 'No interpeter or executable passed in to makeXML'
|
|
227 tool = gxt.Tool(self.args.tool_name, self.tool_id,
|
|
228 self.args.tool_version, self.args.tool_desc, exe)
|
|
229 if interp:
|
|
230 tool.interpreter = interp
|
|
231 if self.args.help_text:
|
|
232 helptext = open(self.args.help_text, 'r').readlines()
|
|
233 # must html escape here too - thanks to Marius van den Beek
|
|
234 helptext = [html_escape(x) for x in helptext]
|
|
235 tool.help = ''.join([x for x in helptext])
|
|
236 else:
|
|
237 tool.help = 'Please ask the tool author (%s) for help \
|
|
238 as none was supplied at tool generation\n' % (self.args.user_email)
|
|
239 tool.version_command = None # do not want
|
|
240 inputs = gxtp.Inputs()
|
|
241 outputs = gxtp.Outputs()
|
|
242 requirements = gxtp.Requirements()
|
|
243 testparam = []
|
|
244 is_positional = (self.args.parampass == 'positional')
|
|
245 if self.args.include_dependencies == "yes":
|
|
246 requirements.append(gxtp.Requirement('package', 'ghostscript'))
|
|
247 requirements.append(gxtp.Requirement('package', 'graphicsmagick'))
|
|
248 if self.args.interpreter_name:
|
|
249 if self.args.interpreter_name == 'python': # always needed for this runner script
|
|
250 requirements.append(gxtp.Requirement(
|
|
251 'package', 'python', self.args.interpreter_version))
|
|
252 elif self.args.interpreter_name not in ['bash', 'sh']:
|
|
253 requirements.append(gxtp.Requirement(
|
|
254 'package', self.args.interpreter_name, self.args.interpreter_version))
|
|
255 else:
|
|
256 if self.args.exe_package: # uses exe not interpreter
|
|
257 requirements.append(gxtp.Requirement(
|
|
258 'package', self.args.exe_package, self.args.exe_package_version))
|
|
259 tool.requirements = requirements
|
|
260 for i, infpath in enumerate(self.infile_paths):
|
|
261 if self.args.parampass == 0:
|
|
262 assert len(
|
|
263 self.infile_name) == 1, 'Maximum one "<" if parampass is 0 - more than one input files supplied'
|
|
264 newname = self.infile_name[i]
|
|
265 if len(newname) > 1:
|
|
266 ndash = 2
|
|
267 else:
|
|
268 ndash = 1
|
|
269 if not len(self.infile_label[i]) > 0:
|
|
270 alab = self.infile_name[i]
|
|
271 else:
|
|
272 alab = self.infile_label[i]
|
|
273 aninput = gxtp.DataParam(self.infile_name[i], optional=False, label=alab, help=self.infile_help[i],
|
|
274 format=self.infile_format[i], multiple=False, num_dashes=ndash)
|
|
275 if self.args.parampass == '0':
|
|
276 aninput.command_line_override = '< $%s' % self.infile_name[i]
|
|
277 aninput.positional = is_positional
|
|
278 inputs.append(aninput)
|
|
279 for parm in self.args.additional_parameters:
|
|
280 newname, newval, newlabel, newhelp, newtype, newcl = parm.split(
|
|
281 ourdelim)
|
|
282 if not len(newlabel) > 0:
|
|
283 newlabel = newname
|
|
284 if len(newname) > 1:
|
|
285 ndash = 2
|
|
286 else:
|
|
287 ndash = 1
|
|
288 if newtype == "text":
|
|
289 aparm = gxtp.TextParam(
|
|
290 newname, label=newlabel, help=newhelp, value=newval, num_dashes=ndash)
|
|
291 elif newtype == "integer":
|
|
292 aparm = gxtp.IntegerParam(
|
|
293 newname, label=newname, help=newhelp, value=newval, num_dashes=ndash)
|
|
294 elif newtype == "float":
|
|
295 aparm = gxtp.FloatParam(
|
|
296 newname, label=newname, help=newhelp, value=newval, num_dashes=ndash)
|
|
297 else:
|
|
298 raise ValueError('Unrecognised parameter type "%s" for\
|
|
299 additional parameter %s in makeXML' % (newtype, newname))
|
|
300 aparm.positional = is_positional
|
|
301 inputs.append(aparm)
|
|
302 tparm = gxtp.TestParam(newname, value=newval)
|
|
303 testparam.append(tparm)
|
|
304 tool.inputs = inputs
|
|
305 configfiles = gxtp.Configfiles()
|
|
306 configfiles.append(gxtp.Configfile(name="runMe", text=self.script))
|
|
307 tool.configfiles = configfiles
|
|
308 if self.args.output_tab:
|
|
309 ext = self.args.output_format
|
|
310 aparm = gxtp.OutputData(
|
|
311 self.args.output_cl, format=ext, num_dashes=ndash)
|
|
312 if is_positional:
|
|
313 aparm.command_line_override = '> $output1'
|
|
314 aparm.positional = is_positional
|
|
315 outputs.append(aparm)
|
|
316 tool.outputs = outputs
|
|
317 tests = gxtp.Tests()
|
|
318 test_a = gxtp.Test()
|
|
319 ext = self.infile_format[0].split(',')[0]
|
|
320 if is_positional:
|
|
321 param = gxtp.TestParam(
|
|
322 'input1', value='input1.%s' % ext, ftype=ext)
|
|
323 else:
|
|
324 param = gxtp.TestParam(self.infile_name[0], value='%s.%s' % (
|
|
325 self.infile_name[0], ext), ftype=ext)
|
|
326 test_a.append(param)
|
|
327 param = gxtp.TestParam('job_name', value='test_a')
|
|
328 test_a.append(param)
|
|
329 param = gxtp.TestParam('runMe', value="$runMe")
|
|
330 test_a.append(param)
|
|
331 for aparam in testparam:
|
|
332 test_a.append(aparam)
|
|
333 test_out = gxtp.TestOutput(
|
|
334 name=self.args.output_cl, value=self.test1Output)
|
|
335 test_a.append(test_out)
|
|
336 tests.append(test_a)
|
|
337 tool.tests = tests
|
|
338 tool.add_comment('Created by %s at %s using the Galaxy Tool Factory.' % (
|
|
339 self.args.user_email, timenow()))
|
|
340 tool.add_comment('Source in git at: %s' % (toolFactoryURL))
|
|
341 tool.add_comment(
|
|
342 'Cite: Creating re-usable tools from scripts doi: 10.1093/bioinformatics/bts573')
|
|
343 exml = tool.export()
|
|
344 xf = open(self.xmlfile, 'w')
|
|
345 xf.write(exml)
|
|
346 xf.write('\n')
|
|
347 xf.close()
|
|
348 # ready for the tarball
|
|
349
|
|
350 def makeTooltar(self):
|
|
351 """
|
|
352 a tool is a gz tarball with eg
|
|
353 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
|
|
354 """
|
|
355 retval = self.run()
|
|
356 if retval:
|
|
357 sys.stderr.write(
|
|
358 '## Run failed. Cannot build yet. Please fix and retry')
|
|
359 sys.exit(1)
|
|
360 tdir = 'tdir_%s' % self.tool_name
|
|
361 if not os.path.exists(tdir):
|
|
362 os.mkdir(tdir)
|
|
363 self.makeXML()
|
|
364 testdir = os.path.join(tdir, 'test-data')
|
|
365 if not os.path.exists(testdir):
|
|
366 os.mkdir(testdir) # make tests directory
|
|
367 for i, infile in enumerate(self.infile_paths):
|
|
368 dest = os.path.join(testdir, '%s.%s' %
|
|
369 (self.infile_name[i], self.infile_format[i]))
|
|
370 if infile != dest:
|
|
371 shutil.copyfile(infile, dest)
|
|
372 if self.args.output_tab and os.path.exists(self.args.output_tab):
|
|
373 shutil.copyfile(self.args.output_tab,
|
|
374 os.path.join(testdir, self.test1Output))
|
|
375 else:
|
|
376 print('#### no output_tab %s exists' % self.args.output_tab)
|
|
377 if self.args.output_dir:
|
|
378 if os.path.exists(self.tlog):
|
|
379 shutil.copyfile(self.tlog, os.path.join(
|
|
380 testdir, 'test1_out.log'))
|
|
381 stname = os.path.join(tdir, self.sfile)
|
|
382 if not os.path.exists(stname):
|
|
383 shutil.copyfile(self.sfile, stname)
|
|
384 xtname = os.path.join(tdir, self.xmlfile)
|
|
385 if not os.path.exists(xtname):
|
|
386 shutil.copyfile(self.xmlfile, xtname)
|
|
387 tarpath = "%s.tar.gz" % self.tool_name
|
|
388 tar = tarfile.open(tarpath, "w:gz")
|
|
389 tar.add(tdir, recursive=True, arcname='%s' % self.tool_name)
|
|
390 tar.close()
|
|
391 shutil.copyfile(tarpath, self.args.new_tool)
|
|
392 shutil.rmtree(tdir)
|
|
393 # TODO: replace with optional direct upload to local toolshed?
|
|
394 return retval
|
|
395
|
|
396 def run(self):
|
|
397 """
|
|
398 Some devteam tools have this defensive stderr read so I'm keeping with the faith
|
|
399 Feel free to update.
|
|
400 """
|
|
401 logging.debug('run cl=%s' % str(self.cl))
|
|
402 scl = ' '.join(self.cl)
|
|
403 err = None
|
|
404 if self.args.parampass != '0':
|
|
405 ste = open(self.elog, 'wb')
|
|
406 sto = open(self.tlog, 'wb')
|
|
407 sto.write(
|
|
408 bytes('## Executing Toolfactory generated command line = %s\n' % scl, "utf8"))
|
|
409 sto.flush()
|
|
410 p = subprocess.run(self.cl, shell=False, stdout=sto,
|
|
411 stderr=ste, cwd=self.args.output_dir)
|
|
412 sto.close()
|
|
413 ste.close()
|
|
414 tmp_stderr = open(self.elog, 'rb')
|
|
415 err = ''
|
|
416 buffsize = 1048576
|
|
417 try:
|
|
418 while True:
|
|
419 err += str(tmp_stderr.read(buffsize))
|
|
420 if not err or len(err) % buffsize != 0:
|
|
421 break
|
|
422 except OverflowError:
|
|
423 pass
|
|
424 tmp_stderr.close()
|
|
425 retval = p.returncode
|
|
426 else: # work around special case of simple scripts that take stdin and write to stdout
|
|
427 sti = open(self.infile_paths[0], 'rb')
|
|
428 sto = open(self.args.output_tab, 'wb')
|
|
429 # must use shell to redirect
|
|
430 p = subprocess.run(self.cl, shell=False, stdout=sto, stdin=sti)
|
|
431 retval = p.returncode
|
|
432 sto.close()
|
|
433 sti.close()
|
|
434 if self.args.output_dir:
|
|
435 if p.returncode != 0 and err: # problem
|
|
436 sys.stderr.write(err)
|
|
437 logging.debug('run done')
|
|
438 return retval
|
|
439
|
|
440
|
|
441 def main():
|
|
442 """
|
|
443 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
|
|
444 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
|
|
445 </command>
|
|
446 """
|
|
447 parser = argparse.ArgumentParser()
|
|
448 a = parser.add_argument
|
|
449 a('--script_path', default='')
|
|
450 a('--tool_name', default=None)
|
|
451 a('--interpreter_name', default=None)
|
|
452 a('--interpreter_version', default=None)
|
|
453 a('--exe_package', default=None)
|
|
454 a('--exe_package_version', default=None)
|
|
455 a('--output_dir', default='./')
|
|
456 a('--input_files', default=[], action="append")
|
|
457 a("--input_formats", default="tabular")
|
|
458 a('--output_tab', default=None)
|
|
459 a('--output_format', default='tabular')
|
|
460 a('--output_cl', default=None)
|
|
461 a('--user_email', default='Unknown')
|
|
462 a('--bad_user', default=None)
|
|
463 a('--make_Tool', default=None)
|
|
464 a('--help_text', default=None)
|
|
465 a('--tool_desc', default=None)
|
|
466 a('--new_tool', default=None)
|
|
467 a('--tool_version', default=None)
|
|
468 a('--include_dependencies', default=None)
|
|
469 a('--citations', default=None)
|
|
470 a('--additional_parameters', dest='additional_parameters',
|
|
471 action='append', default=[])
|
|
472 a('--edit_additional_parameters', action="store_true", default=False)
|
|
473 a('--parampass', default="positional")
|
|
474 args = parser.parse_args()
|
|
475 assert not args.bad_user, 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to "admin_users" in the Galaxy configuration file' % (
|
|
476 args.bad_user, args.bad_user)
|
|
477 assert args.tool_name, '## Tool Factory expects a tool name - eg --tool_name=DESeq'
|
|
478 assert (args.interpreter_name or args.exe_package), '## Tool Factory wrapper expects an interpreter - eg --interpreter_name=Rscript or an executable package findable by the dependency management package'
|
|
479 assert args.exe_package or (len(args.script_path) > 0 and os.path.isfile(
|
|
480 args.script_path)), '## Tool Factory wrapper expects a script path - eg --script_path=foo.R if no executable'
|
|
481 if args.output_dir:
|
|
482 try:
|
|
483 os.makedirs(args.output_dir)
|
|
484 except BaseException:
|
|
485 pass
|
|
486 args.input_files = [x.replace('"', '').replace("'", '')
|
|
487 for x in args.input_files]
|
|
488 # remove quotes we need to deal with spaces in CL params
|
|
489 for i, x in enumerate(args.additional_parameters):
|
|
490 args.additional_parameters[i] = args.additional_parameters[i].replace(
|
|
491 '"', '')
|
|
492 r = ScriptRunner(args)
|
|
493 if args.make_Tool:
|
|
494 retcode = r.makeTooltar()
|
|
495 else:
|
|
496 retcode = r.run()
|
|
497 if retcode:
|
|
498 sys.exit(retcode) # indicate failure to job runner
|
|
499
|
|
500
|
|
501 if __name__ == "__main__":
|
|
502 main()
|