Mercurial > repos > jjohnson > qiime
view qiime_wrapper.py @ 0:e5c3175506b7 default tip
Initial tool configs for qiime, most need work.
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Sun, 17 Jul 2011 10:30:11 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re import shlex, subprocess """ sys.argv this --galaxy_datasets= --quime_script alpha_rarefaction output html wf_arare/alpha_rarefaction_plots/rarefaction_plots.html wf_arare/alpha_rarefaction_plots/html_plots/ wf_arare/alpha_div wf_arare/alpha_div/alpha_rarefaction_101_0.txt --galaxy_summary_html=$output_html --galaxy_summary_template=$output_template --galaxy_summary_links='label:link,label:link' --galaxy_outputdir=$output_html.extra_files_path """ def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def __main__(): debug = False tmp_dir = None inputdir = None outputdir = None dataset_patterns = None datasetid = None new_dataset_patterns = None new_files_path = None summary_html=None summary_template=None summary_links=None ## check if there are files to generate cmd_args = [] for arg in sys.argv[1:]: if arg.startswith('--galaxy_'): (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) if opt == '--galaxy_tmpdir': try: if not os.path.exists(val): os.makedirs(val) tmp_dir = val except Exception, ex: stop_err(ex) if opt == '--galaxy_outputdir': try: if not os.path.exists(val): os.makedirs(val) outputdir = val except Exception, ex: stop_err(ex) if opt == '--galaxy_datasets': dataset_patterns = val.split(',') if opt == '--galaxy_datasetid': datasetid = val if opt == '--galaxy_new_datasets': new_dataset_patterns = val.split(',') if opt == '--galaxy_new_files_path': new_dataset_patterns = val if opt == '--galaxy_summary_html': summary_html=val if opt == '--galaxy_summary_template': summary_template=val if opt == '--galaxy_summary_links': summary_links=val if opt == '--galaxy_debug': debug = True else: cmd_args.append(arg) if debug: print >> sys.stdout, '\n : '.join(cmd_args) try: cmdline = ' '.join(cmd_args) if debug: print >> sys.stdout, cmdline if tmp_dir == None or not os.path.isdir(tmp_dir): tmp_dir = tempfile.mkdtemp() if outputdir == None or not os.path.isdir(outputdir): outputdir = tmp_dir tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name tmp_stderr = open( tmp_stderr_name, 'wb' ) tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name tmp_stdout = open( tmp_stdout_name, 'wb' ) proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp_stderr_name, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break if debug: print >> sys.stderr, stderr except OverflowError: pass tmp_stderr.close() if returncode != 0: if debug: print >> sys.stderr, "returncode = %d" % returncode raise Exception, stderr # collect results if dataset_patterns != None: for root, dirs, files in os.walk(outputdir): for fname in files: fpath = os.path.join(root,fname) if dataset_patterns != None: for output in dataset_patterns: (pattern,path) = output.split(':') if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) if path == None or path == 'None': continue if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) if re.match(pattern,fname): found = True # flist.remove(fname) try: shutil.copy2(fpath, path) except Exception, ex: stop_err('%s' % ex) # move result to outdir # Need to flatten the dir hierachy in order for galaxy to serve the href links if summary_html != None: """ for root, dirs, files in os.walk(outputdir): if root != outputdir: for fname in files: fpath = os.path.join(root,fname) """ ## move everything up one level dlist = os.listdir(outputdir) for dname in dlist: dpath = os.path.join(outputdir,dname) if os.path.isdir(dpath): flist = os.listdir(dpath) for fname in flist: fpath = os.path.join(dpath,fname) shutil.move(fpath,outputdir) if summary_template != None: shutil.copy(summary_template,summary_html) """ flist = os.listdir(outputdir) if debug: print >> sys.stdout, 'outputdir: %s' % outputdir if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) if dataset_patterns != None: for output in dataset_patterns: (pattern,path) = output.split(':') if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) if path == None or path == 'None': continue for fname in flist: if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) if re.match(pattern,fname): found = True flist.remove(fname) fpath = os.path.join(outputdir,fname) try: shutil.copy2(fpath, path) except Exception, ex: stop_err('%s' % ex) """ # Handle the dynamically generated galaxy datasets # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput # --new_datasets= specifies files to copy to the new_file_path # The list items are separated by commas # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output if new_dataset_patterns != None and new_files_path != None and datasetid != None: for output in new_dataset_patterns(','): (pattern,ext) = output.split(':'); for fname in flist: m = re.match(pattern,fname) if m: fpath = os.path.join(outputdir,fname) if len(m.groups()) > 0: root = m.groups()[0] else: # remove the ext from the name if it exists, galaxy will add back later # remove underscores since galaxy uses that as a field separator for dynamic datasets root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') # filename pattern required by galaxy fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) if debug: print >> sys.stdout, '> %s' % fpath if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) try: os.link(fpath, os.path.join(new_files_path,fn)) except: shutil.copy2(fpath, os.path.join(new_files_path,fn)) except Exception, e: msg = str(e) + stderr stop_err( 'Error running ' + msg) finally: # Only remove temporary directories # Enclose in try block, so we don't report error on stale nfs handles try: if inputdir != None and os.path.exists(inputdir): shutil.rmtree(inputdir) except: pass if __name__ == "__main__": __main__()