comparison qiime_wrapper.py @ 0:e5c3175506b7 default tip

Initial tool configs for qiime, most need work.
author Jim Johnson <jj@umn.edu>
date Sun, 17 Jul 2011 10:30:11 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e5c3175506b7
1 #!/usr/bin/env python
2 import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
3 import shlex, subprocess
4
5 """
6 sys.argv
7 this --galaxy_datasets= --quime_script
8
9 alpha_rarefaction
10 output html
11 wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
12 wf_arare/alpha_rarefaction_plots/html_plots/
13 wf_arare/alpha_div
14 wf_arare/alpha_div/alpha_rarefaction_101_0.txt
15
16 --galaxy_summary_html=$output_html
17 --galaxy_summary_template=$output_template
18 --galaxy_summary_links='label:link,label:link'
19 --galaxy_outputdir=$output_html.extra_files_path
20
21
22 """
23
24 def stop_err( msg ):
25 sys.stderr.write( "%s\n" % msg )
26 sys.exit()
27
28 def __main__():
29 debug = False
30 tmp_dir = None
31 inputdir = None
32 outputdir = None
33 dataset_patterns = None
34 datasetid = None
35 new_dataset_patterns = None
36 new_files_path = None
37 summary_html=None
38 summary_template=None
39 summary_links=None
40 ## check if there are files to generate
41 cmd_args = []
42 for arg in sys.argv[1:]:
43 if arg.startswith('--galaxy_'):
44 (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
45 if opt == '--galaxy_tmpdir':
46 try:
47 if not os.path.exists(val):
48 os.makedirs(val)
49 tmp_dir = val
50 except Exception, ex:
51 stop_err(ex)
52 if opt == '--galaxy_outputdir':
53 try:
54 if not os.path.exists(val):
55 os.makedirs(val)
56 outputdir = val
57 except Exception, ex:
58 stop_err(ex)
59 if opt == '--galaxy_datasets':
60 dataset_patterns = val.split(',')
61 if opt == '--galaxy_datasetid':
62 datasetid = val
63 if opt == '--galaxy_new_datasets':
64 new_dataset_patterns = val.split(',')
65 if opt == '--galaxy_new_files_path':
66 new_dataset_patterns = val
67 if opt == '--galaxy_summary_html':
68 summary_html=val
69 if opt == '--galaxy_summary_template':
70 summary_template=val
71 if opt == '--galaxy_summary_links':
72 summary_links=val
73 if opt == '--galaxy_debug':
74 debug = True
75 else:
76 cmd_args.append(arg)
77 if debug: print >> sys.stdout, '\n : '.join(cmd_args)
78 try:
79 cmdline = ' '.join(cmd_args)
80 if debug: print >> sys.stdout, cmdline
81 if tmp_dir == None or not os.path.isdir(tmp_dir):
82 tmp_dir = tempfile.mkdtemp()
83 if outputdir == None or not os.path.isdir(outputdir):
84 outputdir = tmp_dir
85 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
86 tmp_stderr = open( tmp_stderr_name, 'wb' )
87 tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
88 tmp_stdout = open( tmp_stdout_name, 'wb' )
89 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
90 returncode = proc.wait()
91 tmp_stderr.close()
92 # get stderr, allowing for case where it's very large
93 tmp_stderr = open( tmp_stderr_name, 'rb' )
94 stderr = ''
95 buffsize = 1048576
96 try:
97 while True:
98 stderr += tmp_stderr.read( buffsize )
99 if not stderr or len( stderr ) % buffsize != 0:
100 break
101 if debug: print >> sys.stderr, stderr
102 except OverflowError:
103 pass
104 tmp_stderr.close()
105 if returncode != 0:
106 if debug: print >> sys.stderr, "returncode = %d" % returncode
107 raise Exception, stderr
108 # collect results
109 if dataset_patterns != None:
110 for root, dirs, files in os.walk(outputdir):
111 for fname in files:
112 fpath = os.path.join(root,fname)
113 if dataset_patterns != None:
114 for output in dataset_patterns:
115 (pattern,path) = output.split(':')
116 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
117 if path == None or path == 'None':
118 continue
119 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
120 if re.match(pattern,fname):
121 found = True
122 # flist.remove(fname)
123 try:
124 shutil.copy2(fpath, path)
125 except Exception, ex:
126 stop_err('%s' % ex)
127 # move result to outdir
128 # Need to flatten the dir hierachy in order for galaxy to serve the href links
129 if summary_html != None:
130 """
131 for root, dirs, files in os.walk(outputdir):
132 if root != outputdir:
133 for fname in files:
134 fpath = os.path.join(root,fname)
135 """
136 ## move everything up one level
137 dlist = os.listdir(outputdir)
138 for dname in dlist:
139 dpath = os.path.join(outputdir,dname)
140 if os.path.isdir(dpath):
141 flist = os.listdir(dpath)
142 for fname in flist:
143 fpath = os.path.join(dpath,fname)
144 shutil.move(fpath,outputdir)
145 if summary_template != None:
146 shutil.copy(summary_template,summary_html)
147 """
148 flist = os.listdir(outputdir)
149 if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
150 if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
151 if dataset_patterns != None:
152 for output in dataset_patterns:
153 (pattern,path) = output.split(':')
154 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
155 if path == None or path == 'None':
156 continue
157 for fname in flist:
158 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
159 if re.match(pattern,fname):
160 found = True
161 flist.remove(fname)
162 fpath = os.path.join(outputdir,fname)
163 try:
164 shutil.copy2(fpath, path)
165 except Exception, ex:
166 stop_err('%s' % ex)
167 """
168 # Handle the dynamically generated galaxy datasets
169 # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
170 # --new_datasets= specifies files to copy to the new_file_path
171 # The list items are separated by commas
172 # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :)
173 # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output
174 if new_dataset_patterns != None and new_files_path != None and datasetid != None:
175 for output in new_dataset_patterns(','):
176 (pattern,ext) = output.split(':');
177 for fname in flist:
178 m = re.match(pattern,fname)
179 if m:
180 fpath = os.path.join(outputdir,fname)
181 if len(m.groups()) > 0:
182 root = m.groups()[0]
183 else:
184 # remove the ext from the name if it exists, galaxy will add back later
185 # remove underscores since galaxy uses that as a field separator for dynamic datasets
186 root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
187 # filename pattern required by galaxy
188 fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
189 if debug: print >> sys.stdout, '> %s' % fpath
190 if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
191 try:
192 os.link(fpath, os.path.join(new_files_path,fn))
193 except:
194 shutil.copy2(fpath, os.path.join(new_files_path,fn))
195
196 except Exception, e:
197 msg = str(e) + stderr
198 stop_err( 'Error running ' + msg)
199 finally:
200 # Only remove temporary directories
201 # Enclose in try block, so we don't report error on stale nfs handles
202
203 try:
204 if inputdir != None and os.path.exists(inputdir):
205 shutil.rmtree(inputdir)
206 except:
207 pass
208
209 if __name__ == "__main__": __main__()
210