Mercurial > repos > jjohnson > qiime
diff qiime_wrapper.py @ 0:e5c3175506b7 default tip
Initial tool configs for qiime, most need work.
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Sun, 17 Jul 2011 10:30:11 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime_wrapper.py Sun Jul 17 10:30:11 2011 -0500 @@ -0,0 +1,210 @@ +#!/usr/bin/env python +import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re +import shlex, subprocess + +""" +sys.argv +this --galaxy_datasets= --quime_script + +alpha_rarefaction + output html + wf_arare/alpha_rarefaction_plots/rarefaction_plots.html + wf_arare/alpha_rarefaction_plots/html_plots/ + wf_arare/alpha_div + wf_arare/alpha_div/alpha_rarefaction_101_0.txt + + --galaxy_summary_html=$output_html + --galaxy_summary_template=$output_template + --galaxy_summary_links='label:link,label:link' + --galaxy_outputdir=$output_html.extra_files_path + + +""" + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + debug = False + tmp_dir = None + inputdir = None + outputdir = None + dataset_patterns = None + datasetid = None + new_dataset_patterns = None + new_files_path = None + summary_html=None + summary_template=None + summary_links=None + ## check if there are files to generate + cmd_args = [] + for arg in sys.argv[1:]: + if arg.startswith('--galaxy_'): + (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) + if opt == '--galaxy_tmpdir': + try: + if not os.path.exists(val): + os.makedirs(val) + tmp_dir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_outputdir': + try: + if not os.path.exists(val): + os.makedirs(val) + outputdir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_datasets': + dataset_patterns = val.split(',') + if opt == '--galaxy_datasetid': + datasetid = val + if opt == '--galaxy_new_datasets': + new_dataset_patterns = val.split(',') + if opt == '--galaxy_new_files_path': + new_dataset_patterns = val + if opt == '--galaxy_summary_html': + summary_html=val + if opt == '--galaxy_summary_template': + summary_template=val + if opt == '--galaxy_summary_links': + summary_links=val + if opt == '--galaxy_debug': + debug = True + else: + cmd_args.append(arg) + if debug: print >> sys.stdout, '\n : '.join(cmd_args) + try: + cmdline = ' '.join(cmd_args) + if debug: print >> sys.stdout, cmdline + if tmp_dir == None or not os.path.isdir(tmp_dir): + tmp_dir = tempfile.mkdtemp() + if outputdir == None or not os.path.isdir(outputdir): + outputdir = tmp_dir + tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name + tmp_stderr = open( tmp_stderr_name, 'wb' ) + tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name + tmp_stdout = open( tmp_stdout_name, 'wb' ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_stderr_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + if debug: print >> sys.stderr, stderr + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + if debug: print >> sys.stderr, "returncode = %d" % returncode + raise Exception, stderr + # collect results + if dataset_patterns != None: + for root, dirs, files in os.walk(outputdir): + for fname in files: + fpath = os.path.join(root,fname) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + # flist.remove(fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + # move result to outdir + # Need to flatten the dir hierachy in order for galaxy to serve the href links + if summary_html != None: + """ + for root, dirs, files in os.walk(outputdir): + if root != outputdir: + for fname in files: + fpath = os.path.join(root,fname) + """ + ## move everything up one level + dlist = os.listdir(outputdir) + for dname in dlist: + dpath = os.path.join(outputdir,dname) + if os.path.isdir(dpath): + flist = os.listdir(dpath) + for fname in flist: + fpath = os.path.join(dpath,fname) + shutil.move(fpath,outputdir) + if summary_template != None: + shutil.copy(summary_template,summary_html) + """ + flist = os.listdir(outputdir) + if debug: print >> sys.stdout, 'outputdir: %s' % outputdir + if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + for fname in flist: + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + flist.remove(fname) + fpath = os.path.join(outputdir,fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + """ + # Handle the dynamically generated galaxy datasets + # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput + # --new_datasets= specifies files to copy to the new_file_path + # The list items are separated by commas + # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) + # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output + if new_dataset_patterns != None and new_files_path != None and datasetid != None: + for output in new_dataset_patterns(','): + (pattern,ext) = output.split(':'); + for fname in flist: + m = re.match(pattern,fname) + if m: + fpath = os.path.join(outputdir,fname) + if len(m.groups()) > 0: + root = m.groups()[0] + else: + # remove the ext from the name if it exists, galaxy will add back later + # remove underscores since galaxy uses that as a field separator for dynamic datasets + root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') + # filename pattern required by galaxy + fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) + if debug: print >> sys.stdout, '> %s' % fpath + if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) + try: + os.link(fpath, os.path.join(new_files_path,fn)) + except: + shutil.copy2(fpath, os.path.join(new_files_path,fn)) + + except Exception, e: + msg = str(e) + stderr + stop_err( 'Error running ' + msg) + finally: + # Only remove temporary directories + # Enclose in try block, so we don't report error on stale nfs handles + + try: + if inputdir != None and os.path.exists(inputdir): + shutil.rmtree(inputdir) + except: + pass + +if __name__ == "__main__": __main__() +