Mercurial > repos > jjohnson > qiime
comparison qiime_wrapper.py @ 0:e5c3175506b7 default tip
Initial tool configs for qiime, most need work.
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Sun, 17 Jul 2011 10:30:11 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e5c3175506b7 |
---|---|
1 #!/usr/bin/env python | |
2 import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re | |
3 import shlex, subprocess | |
4 | |
5 """ | |
6 sys.argv | |
7 this --galaxy_datasets= --quime_script | |
8 | |
9 alpha_rarefaction | |
10 output html | |
11 wf_arare/alpha_rarefaction_plots/rarefaction_plots.html | |
12 wf_arare/alpha_rarefaction_plots/html_plots/ | |
13 wf_arare/alpha_div | |
14 wf_arare/alpha_div/alpha_rarefaction_101_0.txt | |
15 | |
16 --galaxy_summary_html=$output_html | |
17 --galaxy_summary_template=$output_template | |
18 --galaxy_summary_links='label:link,label:link' | |
19 --galaxy_outputdir=$output_html.extra_files_path | |
20 | |
21 | |
22 """ | |
23 | |
24 def stop_err( msg ): | |
25 sys.stderr.write( "%s\n" % msg ) | |
26 sys.exit() | |
27 | |
28 def __main__(): | |
29 debug = False | |
30 tmp_dir = None | |
31 inputdir = None | |
32 outputdir = None | |
33 dataset_patterns = None | |
34 datasetid = None | |
35 new_dataset_patterns = None | |
36 new_files_path = None | |
37 summary_html=None | |
38 summary_template=None | |
39 summary_links=None | |
40 ## check if there are files to generate | |
41 cmd_args = [] | |
42 for arg in sys.argv[1:]: | |
43 if arg.startswith('--galaxy_'): | |
44 (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) | |
45 if opt == '--galaxy_tmpdir': | |
46 try: | |
47 if not os.path.exists(val): | |
48 os.makedirs(val) | |
49 tmp_dir = val | |
50 except Exception, ex: | |
51 stop_err(ex) | |
52 if opt == '--galaxy_outputdir': | |
53 try: | |
54 if not os.path.exists(val): | |
55 os.makedirs(val) | |
56 outputdir = val | |
57 except Exception, ex: | |
58 stop_err(ex) | |
59 if opt == '--galaxy_datasets': | |
60 dataset_patterns = val.split(',') | |
61 if opt == '--galaxy_datasetid': | |
62 datasetid = val | |
63 if opt == '--galaxy_new_datasets': | |
64 new_dataset_patterns = val.split(',') | |
65 if opt == '--galaxy_new_files_path': | |
66 new_dataset_patterns = val | |
67 if opt == '--galaxy_summary_html': | |
68 summary_html=val | |
69 if opt == '--galaxy_summary_template': | |
70 summary_template=val | |
71 if opt == '--galaxy_summary_links': | |
72 summary_links=val | |
73 if opt == '--galaxy_debug': | |
74 debug = True | |
75 else: | |
76 cmd_args.append(arg) | |
77 if debug: print >> sys.stdout, '\n : '.join(cmd_args) | |
78 try: | |
79 cmdline = ' '.join(cmd_args) | |
80 if debug: print >> sys.stdout, cmdline | |
81 if tmp_dir == None or not os.path.isdir(tmp_dir): | |
82 tmp_dir = tempfile.mkdtemp() | |
83 if outputdir == None or not os.path.isdir(outputdir): | |
84 outputdir = tmp_dir | |
85 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name | |
86 tmp_stderr = open( tmp_stderr_name, 'wb' ) | |
87 tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name | |
88 tmp_stdout = open( tmp_stdout_name, 'wb' ) | |
89 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) | |
90 returncode = proc.wait() | |
91 tmp_stderr.close() | |
92 # get stderr, allowing for case where it's very large | |
93 tmp_stderr = open( tmp_stderr_name, 'rb' ) | |
94 stderr = '' | |
95 buffsize = 1048576 | |
96 try: | |
97 while True: | |
98 stderr += tmp_stderr.read( buffsize ) | |
99 if not stderr or len( stderr ) % buffsize != 0: | |
100 break | |
101 if debug: print >> sys.stderr, stderr | |
102 except OverflowError: | |
103 pass | |
104 tmp_stderr.close() | |
105 if returncode != 0: | |
106 if debug: print >> sys.stderr, "returncode = %d" % returncode | |
107 raise Exception, stderr | |
108 # collect results | |
109 if dataset_patterns != None: | |
110 for root, dirs, files in os.walk(outputdir): | |
111 for fname in files: | |
112 fpath = os.path.join(root,fname) | |
113 if dataset_patterns != None: | |
114 for output in dataset_patterns: | |
115 (pattern,path) = output.split(':') | |
116 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) | |
117 if path == None or path == 'None': | |
118 continue | |
119 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) | |
120 if re.match(pattern,fname): | |
121 found = True | |
122 # flist.remove(fname) | |
123 try: | |
124 shutil.copy2(fpath, path) | |
125 except Exception, ex: | |
126 stop_err('%s' % ex) | |
127 # move result to outdir | |
128 # Need to flatten the dir hierachy in order for galaxy to serve the href links | |
129 if summary_html != None: | |
130 """ | |
131 for root, dirs, files in os.walk(outputdir): | |
132 if root != outputdir: | |
133 for fname in files: | |
134 fpath = os.path.join(root,fname) | |
135 """ | |
136 ## move everything up one level | |
137 dlist = os.listdir(outputdir) | |
138 for dname in dlist: | |
139 dpath = os.path.join(outputdir,dname) | |
140 if os.path.isdir(dpath): | |
141 flist = os.listdir(dpath) | |
142 for fname in flist: | |
143 fpath = os.path.join(dpath,fname) | |
144 shutil.move(fpath,outputdir) | |
145 if summary_template != None: | |
146 shutil.copy(summary_template,summary_html) | |
147 """ | |
148 flist = os.listdir(outputdir) | |
149 if debug: print >> sys.stdout, 'outputdir: %s' % outputdir | |
150 if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) | |
151 if dataset_patterns != None: | |
152 for output in dataset_patterns: | |
153 (pattern,path) = output.split(':') | |
154 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) | |
155 if path == None or path == 'None': | |
156 continue | |
157 for fname in flist: | |
158 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) | |
159 if re.match(pattern,fname): | |
160 found = True | |
161 flist.remove(fname) | |
162 fpath = os.path.join(outputdir,fname) | |
163 try: | |
164 shutil.copy2(fpath, path) | |
165 except Exception, ex: | |
166 stop_err('%s' % ex) | |
167 """ | |
168 # Handle the dynamically generated galaxy datasets | |
169 # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput | |
170 # --new_datasets= specifies files to copy to the new_file_path | |
171 # The list items are separated by commas | |
172 # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) | |
173 # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output | |
174 if new_dataset_patterns != None and new_files_path != None and datasetid != None: | |
175 for output in new_dataset_patterns(','): | |
176 (pattern,ext) = output.split(':'); | |
177 for fname in flist: | |
178 m = re.match(pattern,fname) | |
179 if m: | |
180 fpath = os.path.join(outputdir,fname) | |
181 if len(m.groups()) > 0: | |
182 root = m.groups()[0] | |
183 else: | |
184 # remove the ext from the name if it exists, galaxy will add back later | |
185 # remove underscores since galaxy uses that as a field separator for dynamic datasets | |
186 root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') | |
187 # filename pattern required by galaxy | |
188 fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) | |
189 if debug: print >> sys.stdout, '> %s' % fpath | |
190 if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) | |
191 try: | |
192 os.link(fpath, os.path.join(new_files_path,fn)) | |
193 except: | |
194 shutil.copy2(fpath, os.path.join(new_files_path,fn)) | |
195 | |
196 except Exception, e: | |
197 msg = str(e) + stderr | |
198 stop_err( 'Error running ' + msg) | |
199 finally: | |
200 # Only remove temporary directories | |
201 # Enclose in try block, so we don't report error on stale nfs handles | |
202 | |
203 try: | |
204 if inputdir != None and os.path.exists(inputdir): | |
205 shutil.rmtree(inputdir) | |
206 except: | |
207 pass | |
208 | |
209 if __name__ == "__main__": __main__() | |
210 |