26
|
1 # rgToolFactory.py
|
|
2 # see https://github.com/fubar2/toolfactory
|
25
|
3 #
|
|
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
|
|
5 #
|
|
6 # all rights reserved
|
|
7 # Licensed under the LGPL
|
26
|
8 # suggestions for improvement and bug fixes welcome at https://github.com/fubar2/toolfactory
|
25
|
9 #
|
26
|
10 # July 2020: BCC was fun and I feel like rip van winkle after 5 years.
|
|
11 # Decided to
|
|
12 # 1. Fix the toolfactory so it works - done for simplest case
|
|
13 # 2. Fix planemo so the toolfactory function works
|
|
14 # 3. Rewrite bits using galaxyxml functions where that makes sense - done
|
25
|
15 #
|
26
|
16 # removed all the old complications including making the new tool use this same script
|
|
17 # galaxyxml now generates the tool xml https://github.com/hexylena/galaxyxml
|
|
18 # No support for automatic HTML file creation from arbitrary outputs
|
|
19 # TODO: add option to run that code as a post execution hook
|
|
20 # TODO: add additional history input parameters - currently only one
|
25
|
21
|
26
|
22 import sys
|
|
23 import subprocess
|
|
24 import shutil
|
|
25 import os
|
25
|
26 import time
|
26
|
27 import tempfile
|
|
28 import argparse
|
25
|
29 import tarfile
|
|
30 import re
|
|
31 import math
|
26
|
32 import galaxyxml.tool as gxt
|
|
33 import galaxyxml.tool.parameters as gxtp
|
|
34 import logging
|
|
35
|
25
|
36
|
|
37 progname = os.path.split(sys.argv[0])[1]
|
26
|
38 myversion = 'V2.1 July 2020'
|
|
39 verbose = True
|
|
40 debug = True
|
|
41 toolFactoryURL = 'https://github.com/fubar2/toolfactory'
|
|
42 ourdelim = '~~~'
|
25
|
43
|
|
44
|
|
45 def timenow():
|
26
|
46 """return current time as a string
|
|
47 """
|
|
48 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
25
|
49
|
|
50 def quote_non_numeric(s):
|
26
|
51 """return a prequoted string for non-numerics
|
|
52 useful for perl and Rscript parameter passing?
|
|
53 """
|
|
54 try:
|
|
55 res = float(s)
|
|
56 return s
|
|
57 except ValueError:
|
|
58 return '"%s"' % s
|
25
|
59
|
|
60 html_escape_table = {
|
26
|
61 "&": "&",
|
|
62 ">": ">",
|
|
63 "<": "<",
|
|
64 "$": "\$"
|
|
65 }
|
25
|
66
|
|
67 def html_escape(text):
|
26
|
68 """Produce entities within text."""
|
|
69 return "".join(html_escape_table.get(c,c) for c in text)
|
25
|
70
|
|
71
|
|
72 def html_unescape(text):
|
26
|
73 """Revert entities within text. Multiple character targets so use replace"""
|
|
74 t = text.replace('&','&').replace('>','>').replace('<','<').replace('\$','$')
|
|
75 return t
|
25
|
76
|
|
77 def parse_citations(citations_text):
|
26
|
78 """
|
|
79 """
|
|
80 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
|
|
81 citation_tuples = []
|
|
82 for citation in citations:
|
|
83 if citation.startswith("doi"):
|
|
84 citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
|
|
85 else:
|
|
86 citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
|
|
87 return citation_tuples
|
25
|
88
|
26
|
89
|
25
|
90 class ScriptRunner:
|
26
|
91 """Wrapper for an arbitrary script
|
|
92 uses galaxyxml
|
|
93
|
|
94 """
|
25
|
95
|
|
96
|
26
|
97 def __init__(self,args=None):
|
|
98 """
|
|
99 cleanup inputs, setup some outputs
|
|
100
|
|
101 """
|
|
102 lastclredirect = None
|
|
103 self.cl = []
|
|
104 aCL = self.cl.append
|
|
105 if args.output_dir: # simplify for the tool tarball
|
|
106 os.chdir(args.output_dir)
|
|
107 self.args = args
|
|
108 self.tool_name = re.sub('[^a-zA-Z0-9_]+', '', args.tool_name) # a sanitizer now does this but..
|
|
109 self.tool_id = self.tool_name
|
|
110 self.xmlfile = '%s.xml' % self.tool_name
|
|
111 if self.args.interpreter_name == "Executable": # binary - no need
|
|
112 aCL(self.args.exe_package) # this little CL will just run
|
|
113 else: # a script has been provided
|
|
114 rx = open(self.args.script_path,'r').readlines()
|
|
115 rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed
|
|
116 self.script = '\n'.join(rx)
|
|
117 fhandle,self.sfile = tempfile.mkstemp(prefix=self.tool_name,suffix=".%s" % (args.interpreter_name))
|
|
118 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
|
|
119 tscript.write(self.script)
|
|
120 tscript.close()
|
|
121 self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help
|
|
122 self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
|
|
123 aCL(self.args.interpreter_name)
|
|
124 aCL(self.sfile)
|
|
125 self.elog = os.path.join(self.args.output_dir,"%s_error.log" % self.tool_name)
|
|
126 if args.output_dir: # may not want these complexities
|
|
127 self.tlog = os.path.join(self.args.output_dir,"%s_runner.log" % self.tool_name)
|
|
128 art = '%s.%s' % (self.tool_name,args.interpreter_name)
|
|
129 artpath = os.path.join(self.args.output_dir,art) # need full path
|
|
130 artifact = open(artpath,'w') # use self.sfile as script source for Popen
|
|
131 artifact.write(self.script)
|
|
132 artifact.close()
|
|
133 self.infile_paths = []
|
|
134 self.infile_format = []
|
|
135 self.infile_cl = []
|
|
136 self.infile_label = []
|
|
137 self.infile_help = []
|
|
138 if self.args.input_files:
|
|
139 aif = [x.split(ourdelim) for x in self.args.input_files]
|
|
140 laif = list(map(list, zip(*aif))) # transpose the input_files array --input_files="$input_files~~~$CL~~~$input_formats~~~$input_label~~~$input_help"
|
|
141 self.infile_paths,self.infile_cl,self.infile_format,self.infile_label,self.infile_help = laif
|
|
142 self.infile_name = []
|
|
143 for i,scl in enumerate(self.infile_cl): # positionals have integers indicating order - need valid internal names
|
|
144 if scl.isdigit():
|
|
145 scl = 'input%s' % scl
|
|
146 if scl.upper() in ['STDOUT','STDIN']:
|
|
147 scl = 'input%d' % (i+1)
|
|
148 self.infile_name.append(scl) # make a list of internal names for each input file
|
|
149 clsuffix = [] # list all (cl param) pairs - positional needs sorting by cl index
|
|
150 clsuffix.append([self.args.output_cl,self.args.output_tab])
|
|
151 if self.args.parampass == '0': # only need two
|
|
152 aCL('<')
|
|
153 aCL('%s' % self.infile_paths[0])
|
|
154 aCL('>')
|
|
155 aCL('%s' % self.args.output_tab)
|
|
156 else:
|
|
157 for i,p in enumerate(self.infile_paths):
|
|
158 clsuffix.append([self.infile_cl[i],p]) # decorator is cl - sort for positional
|
|
159 for p in self.args.additional_parameters:
|
|
160 psplit = p.split(ourdelim)
|
|
161 pform = psplit[5]
|
|
162 if pform == 'STDOUT':
|
|
163 lastclredirect = ['>',psplit[1]]
|
|
164 else:
|
|
165 clsuffix.append([pform,psplit[1]]) # cl,value
|
|
166 clsuffix.sort()
|
|
167 if self.args.parampass == "positional":
|
|
168 plist = [] # need to decorate with CL and sort
|
|
169 # inputs in order then params in order TODO fix ordering using self.infile_cl
|
|
170 for (k,v) in clsuffix:
|
|
171 if ' ' in v:
|
|
172 aCL("v")
|
|
173 else:
|
|
174 aCL(v)
|
|
175 elif self.args.parampass == "argparse":
|
|
176 # inputs then params in argparse named form
|
|
177 for (k,v) in clsuffix:
|
|
178 if ' ' in v:
|
|
179 aCL('--%s' % k)
|
|
180 aCL('"%s"' % v)
|
|
181 else:
|
|
182 aCL('--%s' % k)
|
|
183 aCL('%s' % v)
|
|
184 if lastclredirect:
|
|
185 for v in lastclredirect:
|
|
186 aCL(v) # add the stdout parameter last
|
|
187 self.test1Output = '%s_test1_output.xls' % self.tool_name
|
|
188 self.test1HTML = '%s_test1_output.html' % self.tool_name
|
|
189
|
|
190 def makeXML(self):
|
|
191 """
|
|
192 Create a Galaxy xml tool wrapper for the new script
|
|
193 Uses galaxyhtml
|
|
194 .
|
|
195
|
|
196 WRONG - fixme!
|
|
197 <test>
|
|
198 <param ftype="tabular,txt" value="infile.tabular,txt" name="infile"/>
|
|
199 <param value="test_a" name="job_name"/>
|
|
200 <param value="$runMe" name="runMe"/>
|
|
201 <param value="hello world" name="prefix"/>
|
|
202 <output value="reverseargp2_test1_output.xls" name="/home/ross/galaxy/database/objects/7/d/3/dataset_7d32a4f7-e5af-4f6d-9241-8b5347118c73.dat"/>
|
|
203 </test>
|
25
|
204
|
26
|
205 """
|
|
206 # need interp and executable (?script) or else executable only
|
|
207 if self.args.interpreter_name:
|
|
208 exe = "$runMe" # our dynamic script from the tool builder
|
|
209 interp = self.args.interpreter_name
|
|
210 else:
|
|
211 interp = None
|
|
212 exe = self.args.exe_package
|
|
213 assert exe != None, 'No interpeter or executable passed in to makeXML'
|
|
214 tool = gxt.Tool(self.args.tool_name,self.tool_id,self.args.tool_version,self.args.tool_desc,exe)
|
|
215 if interp:
|
|
216 tool.interpreter=interp
|
|
217 if self.args.help_text:
|
|
218 helptext = open(self.args.help_text,'r').readlines()
|
|
219 helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
|
|
220 tool.help = ''.join([x for x in helptext])
|
|
221 else:
|
|
222 tool.help = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (user_email)
|
|
223 tool.version_command = None # do not want
|
|
224 inputs = gxtp.Inputs()
|
|
225 outputs = gxtp.Outputs()
|
|
226 requirements = gxtp.Requirements()
|
|
227 testparam = []
|
|
228 is_positional = (self.args.parampass == 'positional')
|
|
229 if self.args.include_dependencies == "yes":
|
|
230 requirements.append(gxtp.Requirement('package', 'ghostscript'))
|
|
231 requirements.append(gxtp.Requirement('package', 'graphicsmagick'))
|
|
232 if self.args.interpreter_name:
|
|
233 if self.args.interpreter_name == 'python': # always needed for this runner script
|
|
234 requirements.append(gxtp.Requirement('package', 'python',self.args.interpreter_version))
|
|
235 elif not self.args.interpreter_name in ['bash','sh']:
|
|
236 requirements.append(gxtp.Requirement('package', self.args.interpreter_name,self.args.interpreter_version))
|
|
237 else:
|
|
238 if self.args.exe_package: # uses exe not interpreter
|
|
239 requirements.append(gxtp.Requirement('package', self.args.exe_package,self.args.exe_package_version))
|
|
240 tool.requirements = requirements
|
|
241 for i,infpath in enumerate(self.infile_paths):
|
|
242 if self.args.parampass == 0:
|
|
243 assert len(self.infile_name) == 1,'Maximum one "<" if parampass is 0 - more than one input files supplied'
|
|
244 newname = self.infile_name[i]
|
|
245 if len(newname) > 1:
|
|
246 ndash = 2
|
|
247 else:
|
|
248 ndash = 1
|
|
249 if not len(self.infile_label[i]) > 0:
|
|
250 alab = self.infile_name[i]
|
|
251 else:
|
|
252 alab = self.infile_label[i]
|
|
253 aninput = gxtp.DataParam(self.infile_name[i],optional=False, label=alab, help=self.infile_help[i], \
|
|
254 format=self.infile_format[i],multiple=False,num_dashes=ndash)
|
|
255 if self.args.parampass == '0':
|
|
256 aninput.command_line_override = '< $%s' % self.infile_name[i]
|
|
257 aninput.positional = is_positional
|
|
258 inputs.append(aninput)
|
|
259 for parm in self.args.additional_parameters:
|
|
260 newname,newval,newlabel,newhelp,newtype,newcl = parm.split(ourdelim)
|
|
261 if not len(newlabel) > 0:
|
|
262 newlabel = newname
|
|
263 if len(newname) > 1:
|
|
264 ndash = 2
|
|
265 else:
|
|
266 ndash = 1
|
|
267 if newtype == "text":
|
|
268 aparm = gxtp.TextParam(newname,label=newlabel,help=newhelp,value=newval,num_dashes=ndash)
|
|
269 elif newtype == "integer":
|
|
270 aparm = gxtp.IntegerParam(newname,label=newname,help=newhelp,value=newval,num_dashes=ndash)
|
|
271 elif newtype == "float":
|
|
272 aparm = gxtp.FloatParam(newname,label=newname,help=newhelp,value=newval,num_dashes=ndash)
|
|
273 else:
|
|
274 raise ValueError('Unrecognised parameter type "%s" for additional parameter %s in makeXML' % (newtype,psplit[0]))
|
|
275 aparm.positional = is_positional
|
|
276 inputs.append(aparm)
|
|
277 tparm = gxtp.TestParam(newname,value=newval)
|
|
278 testparam.append(tparm)
|
|
279 tool.inputs = inputs
|
|
280 configfiles = gxtp.Configfiles()
|
|
281 configfiles.append(gxtp.Configfile(name="runMe",text=self.script))
|
|
282 tool.configfiles = configfiles
|
|
283 if self.args.output_tab:
|
|
284 ext = self.args.output_format
|
|
285 aparm = gxtp.OutputData(self.args.output_cl, format=ext,num_dashes=ndash)
|
|
286 if is_positional:
|
|
287 aparm.command_line_override = '> $output1'
|
|
288 aparm.positional = is_positional
|
|
289 outputs.append(aparm)
|
|
290 tool.outputs = outputs
|
|
291 tests = gxtp.Tests()
|
|
292 test_a = gxtp.Test()
|
|
293 ext = self.infile_format[0].split(',')[0]
|
|
294 if is_positional:
|
|
295 param = gxtp.TestParam('input1',value='input1.%s' % ext,ftype=ext)
|
|
296 else:
|
|
297 param = gxtp.TestParam(self.infile_name[0],value='%s.%s' % (self.infile_name[0],ext),ftype=ext)
|
|
298 test_a.append(param)
|
|
299 param = gxtp.TestParam('job_name', value='test_a')
|
|
300 test_a.append(param)
|
|
301 param = gxtp.TestParam('runMe', value="$runMe")
|
|
302 test_a.append(param)
|
|
303 for aparam in testparam:
|
|
304 test_a.append(aparam)
|
|
305 test_out = gxtp.TestOutput(name=self.args.output_cl, value=self.test1Output)
|
|
306 test_a.append(test_out)
|
|
307 tests.append(test_a)
|
|
308 tool.tests = tests
|
|
309 tool.add_comment('Created by %s at %s using the Galaxy Tool Factory.' % (self.args.user_email,timenow()))
|
|
310 tool.add_comment('Source in git at: %s' % (toolFactoryURL))
|
|
311 tool.add_comment('Cite: Creating re-usable tools from scripts doi: 10.1093/bioinformatics/bts573')
|
|
312 exml = tool.export()
|
|
313 xf = open(self.xmlfile,'w')
|
|
314 xf.write(exml)
|
|
315 xf.write('\n')
|
|
316 xf.close()
|
|
317 # ready for the tarball
|
25
|
318
|
|
319
|
26
|
320 def makeTooltar(self):
|
|
321 """
|
|
322 a tool is a gz tarball with eg
|
|
323 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
|
|
324 """
|
|
325 retval = self.run()
|
|
326 if retval:
|
|
327 sys.stderr.write('## Run failed. Cannot build yet. Please fix and retry')
|
|
328 sys.exit(1)
|
|
329 tdir = 'tdir_%s' % self.tool_name
|
|
330 if not os.path.exists(tdir):
|
|
331 os.mkdir(tdir)
|
|
332 self.makeXML()
|
|
333 if self.args.help_text:
|
|
334 hlp = open(self.args.help_text,'r').read()
|
|
335 else:
|
|
336 hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
|
|
337 readme_dict = {'readme':hlp,'interpreter_name':self.args.interpreter_name,'interpreter_version':self.args.interpreter_version}
|
|
338 testdir = os.path.join(tdir,'test-data')
|
|
339 if not os.path.exists(testdir):
|
|
340 os.mkdir(testdir) # make tests directory
|
|
341 for i,infile in enumerate(self.infile_paths):
|
|
342 dest = os.path.join(testdir,'%s.%s' % (self.infile_name[i],self.infile_format[i]))
|
|
343 if infile != dest:
|
|
344 shutil.copyfile(infile,dest)
|
|
345 if self.args.output_tab and os.path.exists(self.args.output_tab):
|
|
346 shutil.copyfile(self.args.output_tab,os.path.join(testdir,self.test1Output))
|
|
347 else:
|
|
348 print('#### no output_tab %s exists' % self.args.output_tab)
|
|
349 if self.args.output_dir:
|
|
350 if os.path.exists(self.tlog):
|
|
351 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
|
|
352 stname = os.path.join(tdir,self.sfile)
|
|
353 if not os.path.exists(stname):
|
|
354 shutil.copyfile(self.sfile, stname)
|
|
355 xtname = os.path.join(tdir,self.xmlfile)
|
|
356 if not os.path.exists(xtname):
|
|
357 shutil.copyfile(self.xmlfile,xtname)
|
|
358 tarpath = "%s.tar.gz" % self.tool_name
|
|
359 tar = tarfile.open(tarpath, "w:gz")
|
|
360 tar.add(tdir,recursive=True,arcname='%s' % self.tool_name)
|
|
361 tar.close()
|
|
362 shutil.copyfile(tarpath,self.args.new_tool)
|
|
363 shutil.rmtree(tdir)
|
|
364 ## TODO: replace with optional direct upload to local toolshed?
|
|
365 return retval
|
25
|
366
|
|
367
|
26
|
368 def run(self):
|
|
369 """
|
|
370 Some devteam tools have this defensive stderr read so I'm keeping with the faith
|
|
371 Feel free to update.
|
|
372 """
|
|
373 logging.debug('run cl=%s' % str(self.cl))
|
|
374 scl = ' '.join(self.cl)
|
|
375 err = None
|
|
376 if self.args.parampass != '0':
|
|
377 ste = open(self.elog,'wb')
|
|
378 sto = open(self.tlog,'wb')
|
|
379 sto.write(bytes('## Executing Toolfactory generated command line = %s\n' % scl,"utf8"))
|
|
380 sto.flush()
|
|
381 p = subprocess.run(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.args.output_dir)
|
|
382 sto.close()
|
|
383 ste.close()
|
|
384 tmp_stderr = open(self.elog, 'rb' )
|
|
385 err = ''
|
|
386 buffsize = 1048576
|
|
387 try:
|
|
388 while True:
|
|
389 err += str(tmp_stderr.read( buffsize ))
|
|
390 if not err or len( err ) % buffsize != 0:
|
|
391 break
|
|
392 except OverflowError:
|
|
393 pass
|
|
394 tmp_stderr.close()
|
|
395 retval = p.returncode
|
|
396 else: # work around special case of simple scripts that take stdin and write to stdout
|
|
397 sti = open(self.infile_paths[0],'rb')
|
|
398 sto = open(self.args.output_tab,'wb')
|
|
399 p = subprocess.run(self.cl, shell=False, stdout=sto,stdin=sti) # must use shell to redirect
|
|
400 retval = p.returncode
|
|
401 sto.close()
|
|
402 sti.close()
|
|
403 if self.args.output_dir:
|
|
404 if p.returncode != 0 and err: # problem
|
|
405 sys.stderr.write(err)
|
|
406 logging.debug('run done')
|
|
407 return retval
|
25
|
408
|
|
409
|
|
410
|
|
411 def main():
|
26
|
412 u = """
|
|
413 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
|
|
414 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
|
|
415 </command>
|
|
416 """
|
|
417 parser = argparse.ArgumentParser()
|
|
418 a = parser.add_argument
|
|
419 a('--script_path',default='')
|
|
420 a('--tool_name',default=None)
|
|
421 a('--interpreter_name',default=None)
|
|
422 a('--interpreter_version',default=None)
|
|
423 a('--exe_package',default=None)
|
|
424 a('--exe_package_version',default=None)
|
|
425 a('--output_dir',default='./')
|
|
426 a('--input_files',default=[], action="append") # these are "galaxypath,metadataname" pairs
|
|
427 a("--input_formats",default="tabular")
|
|
428 a('--output_tab',default=None)
|
|
429 a('--output_format',default='tabular')
|
|
430 a('--output_cl',default=None)
|
|
431 a('--user_email',default='Unknown')
|
|
432 a('--bad_user',default=None)
|
|
433 a('--make_Tool',default=None)
|
|
434 a('--help_text',default=None)
|
|
435 a('--tool_desc',default=None)
|
|
436 a('--new_tool',default=None)
|
|
437 a('--tool_version',default=None)
|
|
438 a('--include_dependencies',default=None)
|
|
439 a('--citations',default=None)
|
|
440 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
|
|
441 a('--edit_additional_parameters', action="store_true", default=False)
|
|
442 a('--parampass',default="positional")
|
|
443 args = parser.parse_args()
|
|
444 assert not args.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to "admin_users" in the Galaxy configuration file' % (args.bad_user,args.bad_user)
|
|
445 assert args.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
|
|
446 assert (args.interpreter_name or args.exe_package),'## Tool Factory wrapper expects an interpreter - eg --interpreter_name=Rscript or an executable package findable by the dependency management package'
|
|
447 assert args.exe_package or (len(args.script_path) > 0 and os.path.isfile(args.script_path)),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R if no executable'
|
|
448 if args.output_dir:
|
|
449 try:
|
|
450 os.makedirs(args.output_dir)
|
|
451 except:
|
|
452 pass
|
|
453 args.input_files = [x.replace('"','').replace("'",'') for x in args.input_files]
|
|
454 for i,x in enumerate(args.additional_parameters): # remove quotes we need to deal with spaces in CL params
|
|
455 args.additional_parameters[i] = args.additional_parameters[i].replace('"','')
|
|
456 r = ScriptRunner(args)
|
|
457 if args.make_Tool:
|
|
458 retcode = r.makeTooltar()
|
|
459 else:
|
|
460 retcode = r.run()
|
|
461 if retcode:
|
|
462 sys.exit(retcode) # indicate failure to job runner
|
25
|
463
|
|
464
|
|
465 if __name__ == "__main__":
|
26
|
466 main()
|
25
|
467
|
|
468
|