0
|
1 #!/usr/bin/env python
|
|
2 import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
|
|
3 import shlex, subprocess
|
|
4
|
|
5 """
|
|
6 sys.argv
|
|
7 this --galaxy_datasets= --quime_script
|
|
8
|
|
9 alpha_rarefaction
|
|
10 output html
|
|
11 wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
|
|
12 wf_arare/alpha_rarefaction_plots/html_plots/
|
|
13 wf_arare/alpha_div
|
|
14 wf_arare/alpha_div/alpha_rarefaction_101_0.txt
|
|
15
|
|
16 --galaxy_summary_html=$output_html
|
|
17 --galaxy_summary_template=$output_template
|
|
18 --galaxy_summary_links='label:link,label:link'
|
|
19 --galaxy_outputdir=$output_html.extra_files_path
|
|
20
|
|
21
|
|
22 """
|
|
23
|
|
24 def stop_err( msg ):
|
|
25 sys.stderr.write( "%s\n" % msg )
|
|
26 sys.exit()
|
|
27
|
|
28 def __main__():
|
|
29 debug = False
|
|
30 tmp_dir = None
|
|
31 inputdir = None
|
|
32 outputdir = None
|
|
33 dataset_patterns = None
|
|
34 datasetid = None
|
|
35 new_dataset_patterns = None
|
|
36 new_files_path = None
|
|
37 summary_html=None
|
|
38 summary_template=None
|
|
39 summary_links=None
|
|
40 ## adds "log file" printing capabilities for primary output in dynamic file output
|
|
41 logfile = None
|
|
42 ## added support for correcting file extensions
|
|
43 newext = None
|
|
44 extchange = None
|
|
45 ## check if there are files to generate
|
|
46 cmd_args = []
|
|
47 for arg in sys.argv[1:]:
|
|
48 if arg.startswith('--galaxy_'):
|
|
49 (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
|
|
50 '''
|
|
51 if opt == '--galaxy_tmpdir':
|
|
52 try:
|
|
53 if not os.path.exists(val):
|
|
54 os.makedirs(val)
|
|
55 tmp_dir = val
|
|
56 except Exception, ex:
|
|
57 stop_err(ex)
|
|
58 '''
|
|
59 if opt == '--galaxy_outputdir':
|
|
60 try:
|
|
61 if not os.path.exists(val):
|
|
62 os.makedirs(val)
|
|
63 outputdir = val
|
|
64 except Exception, ex:
|
|
65 stop_err(ex)
|
|
66 if opt == '--galaxy_datasets':
|
|
67 dataset_patterns = val.split(',')
|
|
68 if opt == '--galaxy_datasetid':
|
|
69 datasetid = val
|
|
70 if opt == '--galaxy_new_datasets':
|
|
71 new_dataset_patterns = val.split(',')
|
|
72 if opt == '--galaxy_new_files_path':
|
|
73 if not os.path.exists(val):
|
|
74 os.makedirs(val)
|
|
75 new_files_path = val
|
|
76 if opt == '--galaxy_summary_html':
|
|
77 summary_html=val
|
|
78 if opt == '--galaxy_summary_template':
|
|
79 summary_template=val
|
|
80 if opt == '--galaxy_summary_links':
|
|
81 summary_links=val
|
|
82 if opt == '--galaxy_debug':
|
|
83 debug = True
|
|
84 if opt == '--galaxy_logfile':
|
|
85 logfile = val
|
|
86 if opt == '--galaxy_ext_change':
|
|
87 extchange = val
|
|
88 if opt == '--galaxy_new_ext':
|
|
89 newext = val
|
|
90 if opt == '--galaxy_inputdir':
|
|
91 inputdir = val
|
|
92 else:
|
|
93 cmd_args.append(arg)
|
|
94 if debug: print >> sys.stdout, '\n : '.join(cmd_args)
|
|
95 try:
|
|
96 stderr = ''
|
|
97 # allow for changing of file extension for files which require it
|
|
98 if extchange != None and inputdir != None and newext != None:
|
|
99 #newfile = os.path.join(inputdir,"temporary."+newext)
|
|
100 try:
|
|
101 os.link(extchange,inputdir+"/temporary."+newext)
|
|
102 except:
|
|
103 shutil.copy2(extchange,inputdir+"/temporary."+newext)
|
|
104 cmdline = ' '.join(cmd_args)
|
|
105 if debug: print >> sys.stdout, cmdline
|
|
106 '''
|
|
107 if tmp_dir == None or not os.path.isdir(tmp_dir):
|
|
108 tmp_dir = tempfile.mkdtemp()
|
|
109 if outputdir == None or not os.path.isdir(outputdir):
|
|
110 outputdir = tmp_dir
|
|
111 '''
|
|
112 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
|
|
113 tmp_stderr = open( tmp_stderr_name, 'wb' )
|
|
114 tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
|
|
115 tmp_stdout = open( tmp_stdout_name, 'wb' )
|
|
116 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
|
|
117 returncode = proc.wait()
|
|
118 tmp_stderr.close()
|
|
119 # get stderr, allowing for case where it's very large
|
|
120 tmp_stderr = open( tmp_stderr_name, 'rb' )
|
|
121 buffsize = 1048576
|
|
122 try:
|
|
123 while True:
|
|
124 stderr += tmp_stderr.read( buffsize )
|
|
125 if not stderr or len( stderr ) % buffsize != 0:
|
|
126 break
|
|
127 if debug: print >> sys.stderr, stderr
|
|
128 except OverflowError:
|
|
129 pass
|
|
130 tmp_stderr.close()
|
|
131 if returncode != 0:
|
|
132 if debug: print >> sys.stderr, "returncode = %d" % returncode
|
|
133 raise Exception, stderr
|
|
134 #raise Exception, sys.stderr
|
|
135 # collect results
|
|
136 if dataset_patterns != None:
|
|
137 for root, dirs, files in os.walk(outputdir):
|
|
138 for fname in files:
|
|
139 fpath = os.path.join(root,fname)
|
|
140 if dataset_patterns != None:
|
|
141 for output in dataset_patterns:
|
|
142 (pattern,path) = output.split(':')
|
|
143 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
|
|
144 if path == None or path == 'None':
|
|
145 continue
|
|
146 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
|
|
147 if re.match(pattern,fname):
|
|
148 found = True
|
|
149 # flist.remove(fname)
|
|
150 try:
|
|
151 shutil.copy2(fpath, path)
|
|
152 if new_files_path != None:
|
|
153 os.link(fpath, os.path.join(new_files_path,fname))
|
|
154 except Exception, ex:
|
|
155 stop_err('%s' % ex)
|
|
156 # move result to outdir
|
|
157 # Need to flatten the dir hierachy in order for galaxy to serve the href links
|
|
158 if summary_html != None:
|
|
159 """
|
|
160 for root, dirs, files in os.walk(outputdir):
|
|
161 if root != outputdir:
|
|
162 for fname in files:
|
|
163 fpath = os.path.join(root,fname)
|
|
164 """
|
|
165 ## move everything up one level
|
|
166 dlist = os.listdir(outputdir)
|
|
167 for dname in dlist:
|
|
168 dpath = os.path.join(outputdir,dname)
|
|
169 if os.path.isdir(dpath):
|
|
170 flist = os.listdir(dpath)
|
|
171 for fname in flist:
|
|
172 fpath = os.path.join(dpath,fname)
|
|
173 shutil.move(fpath,outputdir)
|
|
174 if summary_template != None:
|
|
175 shutil.copy(summary_template,summary_html)
|
|
176 """
|
|
177 flist = os.listdir(outputdir)
|
|
178 if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
|
|
179 if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
|
|
180 if dataset_patterns != None:
|
|
181 for output in dataset_patterns:
|
|
182 (pattern,path) = output.split(':')
|
|
183 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
|
|
184 if path == None or path == 'None':
|
|
185 continue
|
|
186 for fname in flist:
|
|
187 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
|
|
188 if re.match(pattern,fname):
|
|
189 found = True
|
|
190 flist.remove(fname)
|
|
191 fpath = os.path.join(outputdir,fname)
|
|
192 try:
|
|
193 shutil.copy2(fpath, path)
|
|
194 except Exception, ex:
|
|
195 stop_err('%s' % ex)
|
|
196 """
|
|
197 # Handle the dynamically generated galaxy datasets
|
|
198 # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
|
|
199 # --new_datasets = specifies files to be found in the new_file_path
|
|
200 # The list items are separated by commas
|
|
201 # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :)
|
|
202 # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output
|
|
203 # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later
|
|
204 # directory use
|
|
205 if new_dataset_patterns != None and new_files_path != None and datasetid != None:
|
|
206 for output in new_dataset_patterns:
|
|
207 if ':' in output: pattern,ext = output.split(':',1)
|
|
208 flist = os.listdir(new_files_path)
|
|
209 for fname in flist:
|
|
210 m = re.match(pattern,fname)
|
|
211 if m:
|
|
212 fpath = os.path.join(new_files_path,fname)
|
|
213 if len(m.groups()) > 0:
|
|
214 root = m.groups()[0]
|
|
215 else:
|
|
216 # remove the ext from the name if it exists, galaxy will add back later
|
|
217 # remove underscores since galaxy uses that as a field separator for dynamic datasets
|
|
218 root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
|
|
219 # filename pattern required by galaxy
|
|
220 fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
|
|
221 if debug: print >> sys.stdout, '> %s' % fpath
|
|
222 if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
|
|
223 try:
|
|
224 os.link(fpath, os.path.join(new_files_path,fn))
|
|
225 # needed for files with variable output and a directory structure
|
|
226 if outputdir != None:
|
|
227 os.link(fpath, os.path.join(outputdir,fname))
|
|
228 # clean out files from tmp directory, may be unnecessary
|
|
229 #os.remove(fpath)
|
|
230 except:
|
|
231 shutil.copy2(fpath, os.path.join(new_files_path,fn))
|
|
232 # needed for files with variable output and a directory structure
|
|
233 if outputdir != None:
|
|
234 os.link(fpath, os.path.join(outputdir,fname))
|
|
235
|
|
236 print "bob" + logfile
|
|
237 '''
|
|
238 if logfile != None:
|
|
239 print "bleep"
|
|
240 if outputdir != None:
|
|
241 print "beep"
|
|
242 logwrite = open(logfile, 'w+')
|
|
243 logwrite.write('Tool started. Files created by tool: \n')
|
|
244 flist = os.listdir(outputdir)
|
|
245 for fname in flist:
|
|
246 if 'DS_Store' not in fname:
|
|
247 logwrite.write(fname+'\n')
|
|
248 logwrite.write('Tool Finished.')
|
|
249 logwrite.close()
|
|
250 if new_files_path != None:
|
|
251 print "boop"
|
|
252 logwrite = open(logfile, 'w+')
|
|
253 if len(logfile.readline() > 0):
|
|
254 logwrite.write('Tool started. Files created by tool: \n')
|
|
255 flist = os.listdir(new_files_path)
|
|
256 for fname in flist:
|
|
257 if 'DS_Store' not in fname:
|
|
258 logwrite.write(fname+'\n')
|
|
259 logwrite.write('Tool Finished.')
|
|
260 logwrite.close()
|
|
261 '''
|
|
262 except Exception, e:
|
|
263 msg = str(e) + stderr
|
|
264 #msg = str(e) + str(sys.stderr)
|
|
265 #stop_err( 'Error running ' + msg)
|
|
266 finally:
|
|
267 # Only remove temporary directories and files from temporary directory
|
|
268 # Enclose in try block, so we don't report error on stale nfs handles
|
|
269 try:
|
|
270 if logfile != None:
|
|
271 if outputdir != None:
|
|
272 logwrite = open(logfile, 'r+')
|
|
273 logwrite.write('Tool started. Files created by tool: \n')
|
|
274 flist = os.listdir(outputdir)
|
|
275 for fname in flist:
|
|
276 if 'DS_Store' not in fname and 'primary' not in fname:
|
|
277 logwrite.write(fname+'\n')
|
|
278 logwrite.write('Tool Finished.')
|
|
279 logwrite.close()
|
|
280 if new_files_path != None:
|
|
281 logwrite = open(logfile, 'r+')
|
|
282 logwrite.write('Tool started. Files created by tool: \n')
|
|
283 flist = os.listdir(new_files_path)
|
|
284 for fname in flist:
|
|
285 if 'DS_Store' not in fname and 'primary' not in fname:
|
|
286 logwrite.write(fname+'\n')
|
|
287 logwrite.write('Tool Finished.')
|
|
288 logwrite.close()
|
|
289 if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir):
|
|
290 #shutil.rmtree(tmp_dir)
|
|
291 pass
|
|
292 if outputdir != None and 'files' not in outputdir:
|
|
293 flist = os.listdir(outputdir)
|
|
294 for fname in flist:
|
|
295 if 'DS_Store' not in fname and 'primary' not in fname:
|
|
296 os.remove(os.path.join(outputdir,fname))
|
|
297 if inputdir != None and 'files' not in inputdir:
|
|
298 flist = os.listdir(inputdir)
|
|
299 for fname in flist:
|
|
300 if 'DS_Store' not in fname and 'primary' not in fname:
|
|
301 os.remove(os.path.join(inputdir,fname))
|
|
302 if new_files_path != None and 'files' not in new_files_path:
|
|
303 flist = os.listdir(new_files_path)
|
|
304 for fname in flist:
|
|
305 if 'DS_Store' not in fname and 'primary' not in fname:
|
|
306 os.remove(os.path.join(new_files_path,fname))
|
|
307
|
|
308 except:
|
|
309 pass
|
|
310
|
|
311 if __name__ == "__main__": __main__()
|
|
312
|