comparison qiime/qiime_wrapper.py @ 0:003162f90751 draft

Uploaded
author azuzolo
date Wed, 06 Jun 2012 16:40:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:003162f90751
1 #!/usr/bin/env python
2 import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
3 import shlex, subprocess
4
5 """
6 sys.argv
7 this --galaxy_datasets= --quime_script
8
9 alpha_rarefaction
10 output html
11 wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
12 wf_arare/alpha_rarefaction_plots/html_plots/
13 wf_arare/alpha_div
14 wf_arare/alpha_div/alpha_rarefaction_101_0.txt
15
16 --galaxy_summary_html=$output_html
17 --galaxy_summary_template=$output_template
18 --galaxy_summary_links='label:link,label:link'
19 --galaxy_outputdir=$output_html.extra_files_path
20
21
22 """
23
24 def stop_err( msg ):
25 sys.stderr.write( "%s\n" % msg )
26 sys.exit()
27
28 def __main__():
29 debug = False
30 tmp_dir = None
31 inputdir = None
32 outputdir = None
33 dataset_patterns = None
34 datasetid = None
35 new_dataset_patterns = None
36 new_files_path = None
37 summary_html=None
38 summary_template=None
39 summary_links=None
40 ## adds "log file" printing capabilities for primary output in dynamic file output
41 logfile = None
42 ## added support for correcting file extensions
43 newext = None
44 extchange = None
45 ## check if there are files to generate
46 cmd_args = []
47 for arg in sys.argv[1:]:
48 if arg.startswith('--galaxy_'):
49 (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
50 '''
51 if opt == '--galaxy_tmpdir':
52 try:
53 if not os.path.exists(val):
54 os.makedirs(val)
55 tmp_dir = val
56 except Exception, ex:
57 stop_err(ex)
58 '''
59 if opt == '--galaxy_outputdir':
60 try:
61 if not os.path.exists(val):
62 os.makedirs(val)
63 outputdir = val
64 except Exception, ex:
65 stop_err(ex)
66 if opt == '--galaxy_datasets':
67 dataset_patterns = val.split(',')
68 if opt == '--galaxy_datasetid':
69 datasetid = val
70 if opt == '--galaxy_new_datasets':
71 new_dataset_patterns = val.split(',')
72 if opt == '--galaxy_new_files_path':
73 if not os.path.exists(val):
74 os.makedirs(val)
75 new_files_path = val
76 if opt == '--galaxy_summary_html':
77 summary_html=val
78 if opt == '--galaxy_summary_template':
79 summary_template=val
80 if opt == '--galaxy_summary_links':
81 summary_links=val
82 if opt == '--galaxy_debug':
83 debug = True
84 if opt == '--galaxy_logfile':
85 logfile = val
86 if opt == '--galaxy_ext_change':
87 extchange = val
88 if opt == '--galaxy_new_ext':
89 newext = val
90 if opt == '--galaxy_inputdir':
91 inputdir = val
92 else:
93 cmd_args.append(arg)
94 if debug: print >> sys.stdout, '\n : '.join(cmd_args)
95 try:
96 stderr = ''
97 # allow for changing of file extension for files which require it
98 if extchange != None and inputdir != None and newext != None:
99 #newfile = os.path.join(inputdir,"temporary."+newext)
100 try:
101 os.link(extchange,inputdir+"/temporary."+newext)
102 except:
103 shutil.copy2(extchange,inputdir+"/temporary."+newext)
104 cmdline = ' '.join(cmd_args)
105 if debug: print >> sys.stdout, cmdline
106 '''
107 if tmp_dir == None or not os.path.isdir(tmp_dir):
108 tmp_dir = tempfile.mkdtemp()
109 if outputdir == None or not os.path.isdir(outputdir):
110 outputdir = tmp_dir
111 '''
112 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
113 tmp_stderr = open( tmp_stderr_name, 'wb' )
114 tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
115 tmp_stdout = open( tmp_stdout_name, 'wb' )
116 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
117 returncode = proc.wait()
118 tmp_stderr.close()
119 # get stderr, allowing for case where it's very large
120 tmp_stderr = open( tmp_stderr_name, 'rb' )
121 buffsize = 1048576
122 try:
123 while True:
124 stderr += tmp_stderr.read( buffsize )
125 if not stderr or len( stderr ) % buffsize != 0:
126 break
127 if debug: print >> sys.stderr, stderr
128 except OverflowError:
129 pass
130 tmp_stderr.close()
131 if returncode != 0:
132 if debug: print >> sys.stderr, "returncode = %d" % returncode
133 raise Exception, stderr
134 #raise Exception, sys.stderr
135 # collect results
136 if dataset_patterns != None:
137 for root, dirs, files in os.walk(outputdir):
138 for fname in files:
139 fpath = os.path.join(root,fname)
140 if dataset_patterns != None:
141 for output in dataset_patterns:
142 (pattern,path) = output.split(':')
143 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
144 if path == None or path == 'None':
145 continue
146 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
147 if re.match(pattern,fname):
148 found = True
149 # flist.remove(fname)
150 try:
151 shutil.copy2(fpath, path)
152 if new_files_path != None:
153 os.link(fpath, os.path.join(new_files_path,fname))
154 except Exception, ex:
155 stop_err('%s' % ex)
156 # move result to outdir
157 # Need to flatten the dir hierachy in order for galaxy to serve the href links
158 if summary_html != None:
159 """
160 for root, dirs, files in os.walk(outputdir):
161 if root != outputdir:
162 for fname in files:
163 fpath = os.path.join(root,fname)
164 """
165 ## move everything up one level
166 dlist = os.listdir(outputdir)
167 for dname in dlist:
168 dpath = os.path.join(outputdir,dname)
169 if os.path.isdir(dpath):
170 flist = os.listdir(dpath)
171 for fname in flist:
172 fpath = os.path.join(dpath,fname)
173 shutil.move(fpath,outputdir)
174 if summary_template != None:
175 shutil.copy(summary_template,summary_html)
176 """
177 flist = os.listdir(outputdir)
178 if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
179 if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
180 if dataset_patterns != None:
181 for output in dataset_patterns:
182 (pattern,path) = output.split(':')
183 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
184 if path == None or path == 'None':
185 continue
186 for fname in flist:
187 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
188 if re.match(pattern,fname):
189 found = True
190 flist.remove(fname)
191 fpath = os.path.join(outputdir,fname)
192 try:
193 shutil.copy2(fpath, path)
194 except Exception, ex:
195 stop_err('%s' % ex)
196 """
197 # Handle the dynamically generated galaxy datasets
198 # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
199 # --new_datasets = specifies files to be found in the new_file_path
200 # The list items are separated by commas
201 # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :)
202 # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output
203 # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later
204 # directory use
205 if new_dataset_patterns != None and new_files_path != None and datasetid != None:
206 for output in new_dataset_patterns:
207 if ':' in output: pattern,ext = output.split(':',1)
208 flist = os.listdir(new_files_path)
209 for fname in flist:
210 m = re.match(pattern,fname)
211 if m:
212 fpath = os.path.join(new_files_path,fname)
213 if len(m.groups()) > 0:
214 root = m.groups()[0]
215 else:
216 # remove the ext from the name if it exists, galaxy will add back later
217 # remove underscores since galaxy uses that as a field separator for dynamic datasets
218 root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
219 # filename pattern required by galaxy
220 fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
221 if debug: print >> sys.stdout, '> %s' % fpath
222 if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
223 try:
224 os.link(fpath, os.path.join(new_files_path,fn))
225 # needed for files with variable output and a directory structure
226 if outputdir != None:
227 os.link(fpath, os.path.join(outputdir,fname))
228 # clean out files from tmp directory, may be unnecessary
229 #os.remove(fpath)
230 except:
231 shutil.copy2(fpath, os.path.join(new_files_path,fn))
232 # needed for files with variable output and a directory structure
233 if outputdir != None:
234 os.link(fpath, os.path.join(outputdir,fname))
235
236 print "bob" + logfile
237 '''
238 if logfile != None:
239 print "bleep"
240 if outputdir != None:
241 print "beep"
242 logwrite = open(logfile, 'w+')
243 logwrite.write('Tool started. Files created by tool: \n')
244 flist = os.listdir(outputdir)
245 for fname in flist:
246 if 'DS_Store' not in fname:
247 logwrite.write(fname+'\n')
248 logwrite.write('Tool Finished.')
249 logwrite.close()
250 if new_files_path != None:
251 print "boop"
252 logwrite = open(logfile, 'w+')
253 if len(logfile.readline() > 0):
254 logwrite.write('Tool started. Files created by tool: \n')
255 flist = os.listdir(new_files_path)
256 for fname in flist:
257 if 'DS_Store' not in fname:
258 logwrite.write(fname+'\n')
259 logwrite.write('Tool Finished.')
260 logwrite.close()
261 '''
262 except Exception, e:
263 msg = str(e) + stderr
264 #msg = str(e) + str(sys.stderr)
265 #stop_err( 'Error running ' + msg)
266 finally:
267 # Only remove temporary directories and files from temporary directory
268 # Enclose in try block, so we don't report error on stale nfs handles
269 try:
270 if logfile != None:
271 if outputdir != None:
272 logwrite = open(logfile, 'r+')
273 logwrite.write('Tool started. Files created by tool: \n')
274 flist = os.listdir(outputdir)
275 for fname in flist:
276 if 'DS_Store' not in fname and 'primary' not in fname:
277 logwrite.write(fname+'\n')
278 logwrite.write('Tool Finished.')
279 logwrite.close()
280 if new_files_path != None:
281 logwrite = open(logfile, 'r+')
282 logwrite.write('Tool started. Files created by tool: \n')
283 flist = os.listdir(new_files_path)
284 for fname in flist:
285 if 'DS_Store' not in fname and 'primary' not in fname:
286 logwrite.write(fname+'\n')
287 logwrite.write('Tool Finished.')
288 logwrite.close()
289 if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir):
290 #shutil.rmtree(tmp_dir)
291 pass
292 if outputdir != None and 'files' not in outputdir:
293 flist = os.listdir(outputdir)
294 for fname in flist:
295 if 'DS_Store' not in fname and 'primary' not in fname:
296 os.remove(os.path.join(outputdir,fname))
297 if inputdir != None and 'files' not in inputdir:
298 flist = os.listdir(inputdir)
299 for fname in flist:
300 if 'DS_Store' not in fname and 'primary' not in fname:
301 os.remove(os.path.join(inputdir,fname))
302 if new_files_path != None and 'files' not in new_files_path:
303 flist = os.listdir(new_files_path)
304 for fname in flist:
305 if 'DS_Store' not in fname and 'primary' not in fname:
306 os.remove(os.path.join(new_files_path,fname))
307
308 except:
309 pass
310
311 if __name__ == "__main__": __main__()
312