Mercurial > repos > azuzolo > qiime1_3_0
diff qiime/qiime_wrapper.py @ 0:003162f90751 draft
Uploaded
author | azuzolo |
---|---|
date | Wed, 06 Jun 2012 16:40:30 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qiime/qiime_wrapper.py Wed Jun 06 16:40:30 2012 -0400 @@ -0,0 +1,312 @@ +#!/usr/bin/env python +import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re +import shlex, subprocess + +""" +sys.argv +this --galaxy_datasets= --quime_script + +alpha_rarefaction + output html + wf_arare/alpha_rarefaction_plots/rarefaction_plots.html + wf_arare/alpha_rarefaction_plots/html_plots/ + wf_arare/alpha_div + wf_arare/alpha_div/alpha_rarefaction_101_0.txt + + --galaxy_summary_html=$output_html + --galaxy_summary_template=$output_template + --galaxy_summary_links='label:link,label:link' + --galaxy_outputdir=$output_html.extra_files_path + + +""" + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + debug = False + tmp_dir = None + inputdir = None + outputdir = None + dataset_patterns = None + datasetid = None + new_dataset_patterns = None + new_files_path = None + summary_html=None + summary_template=None + summary_links=None + ## adds "log file" printing capabilities for primary output in dynamic file output + logfile = None + ## added support for correcting file extensions + newext = None + extchange = None + ## check if there are files to generate + cmd_args = [] + for arg in sys.argv[1:]: + if arg.startswith('--galaxy_'): + (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) + ''' + if opt == '--galaxy_tmpdir': + try: + if not os.path.exists(val): + os.makedirs(val) + tmp_dir = val + except Exception, ex: + stop_err(ex) + ''' + if opt == '--galaxy_outputdir': + try: + if not os.path.exists(val): + os.makedirs(val) + outputdir = val + except Exception, ex: + stop_err(ex) + if opt == '--galaxy_datasets': + dataset_patterns = val.split(',') + if opt == '--galaxy_datasetid': + datasetid = val + if opt == '--galaxy_new_datasets': + new_dataset_patterns = val.split(',') + if opt == '--galaxy_new_files_path': + if not os.path.exists(val): + os.makedirs(val) + new_files_path = val + if opt == '--galaxy_summary_html': + summary_html=val + if opt == '--galaxy_summary_template': + summary_template=val + if opt == '--galaxy_summary_links': + summary_links=val + if opt == '--galaxy_debug': + debug = True + if opt == '--galaxy_logfile': + logfile = val + if opt == '--galaxy_ext_change': + extchange = val + if opt == '--galaxy_new_ext': + newext = val + if opt == '--galaxy_inputdir': + inputdir = val + else: + cmd_args.append(arg) + if debug: print >> sys.stdout, '\n : '.join(cmd_args) + try: + stderr = '' + # allow for changing of file extension for files which require it + if extchange != None and inputdir != None and newext != None: + #newfile = os.path.join(inputdir,"temporary."+newext) + try: + os.link(extchange,inputdir+"/temporary."+newext) + except: + shutil.copy2(extchange,inputdir+"/temporary."+newext) + cmdline = ' '.join(cmd_args) + if debug: print >> sys.stdout, cmdline + ''' + if tmp_dir == None or not os.path.isdir(tmp_dir): + tmp_dir = tempfile.mkdtemp() + if outputdir == None or not os.path.isdir(outputdir): + outputdir = tmp_dir + ''' + tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name + tmp_stderr = open( tmp_stderr_name, 'wb' ) + tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name + tmp_stdout = open( tmp_stdout_name, 'wb' ) + proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_stderr_name, 'rb' ) + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + if debug: print >> sys.stderr, stderr + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + if debug: print >> sys.stderr, "returncode = %d" % returncode + raise Exception, stderr + #raise Exception, sys.stderr + # collect results + if dataset_patterns != None: + for root, dirs, files in os.walk(outputdir): + for fname in files: + fpath = os.path.join(root,fname) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + # flist.remove(fname) + try: + shutil.copy2(fpath, path) + if new_files_path != None: + os.link(fpath, os.path.join(new_files_path,fname)) + except Exception, ex: + stop_err('%s' % ex) + # move result to outdir + # Need to flatten the dir hierachy in order for galaxy to serve the href links + if summary_html != None: + """ + for root, dirs, files in os.walk(outputdir): + if root != outputdir: + for fname in files: + fpath = os.path.join(root,fname) + """ + ## move everything up one level + dlist = os.listdir(outputdir) + for dname in dlist: + dpath = os.path.join(outputdir,dname) + if os.path.isdir(dpath): + flist = os.listdir(dpath) + for fname in flist: + fpath = os.path.join(dpath,fname) + shutil.move(fpath,outputdir) + if summary_template != None: + shutil.copy(summary_template,summary_html) + """ + flist = os.listdir(outputdir) + if debug: print >> sys.stdout, 'outputdir: %s' % outputdir + if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) + if dataset_patterns != None: + for output in dataset_patterns: + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if path == None or path == 'None': + continue + for fname in flist: + if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) + if re.match(pattern,fname): + found = True + flist.remove(fname) + fpath = os.path.join(outputdir,fname) + try: + shutil.copy2(fpath, path) + except Exception, ex: + stop_err('%s' % ex) + """ + # Handle the dynamically generated galaxy datasets + # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput + # --new_datasets = specifies files to be found in the new_file_path + # The list items are separated by commas + # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) + # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output + # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later + # directory use + if new_dataset_patterns != None and new_files_path != None and datasetid != None: + for output in new_dataset_patterns: + if ':' in output: pattern,ext = output.split(':',1) + flist = os.listdir(new_files_path) + for fname in flist: + m = re.match(pattern,fname) + if m: + fpath = os.path.join(new_files_path,fname) + if len(m.groups()) > 0: + root = m.groups()[0] + else: + # remove the ext from the name if it exists, galaxy will add back later + # remove underscores since galaxy uses that as a field separator for dynamic datasets + root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') + # filename pattern required by galaxy + fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) + if debug: print >> sys.stdout, '> %s' % fpath + if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) + try: + os.link(fpath, os.path.join(new_files_path,fn)) + # needed for files with variable output and a directory structure + if outputdir != None: + os.link(fpath, os.path.join(outputdir,fname)) + # clean out files from tmp directory, may be unnecessary + #os.remove(fpath) + except: + shutil.copy2(fpath, os.path.join(new_files_path,fn)) + # needed for files with variable output and a directory structure + if outputdir != None: + os.link(fpath, os.path.join(outputdir,fname)) + + print "bob" + logfile + ''' + if logfile != None: + print "bleep" + if outputdir != None: + print "beep" + logwrite = open(logfile, 'w+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if new_files_path != None: + print "boop" + logwrite = open(logfile, 'w+') + if len(logfile.readline() > 0): + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + ''' + except Exception, e: + msg = str(e) + stderr + #msg = str(e) + str(sys.stderr) + #stop_err( 'Error running ' + msg) + finally: + # Only remove temporary directories and files from temporary directory + # Enclose in try block, so we don't report error on stale nfs handles + try: + if logfile != None: + if outputdir != None: + logwrite = open(logfile, 'r+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if new_files_path != None: + logwrite = open(logfile, 'r+') + logwrite.write('Tool started. Files created by tool: \n') + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + logwrite.write(fname+'\n') + logwrite.write('Tool Finished.') + logwrite.close() + if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir): + #shutil.rmtree(tmp_dir) + pass + if outputdir != None and 'files' not in outputdir: + flist = os.listdir(outputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(outputdir,fname)) + if inputdir != None and 'files' not in inputdir: + flist = os.listdir(inputdir) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(inputdir,fname)) + if new_files_path != None and 'files' not in new_files_path: + flist = os.listdir(new_files_path) + for fname in flist: + if 'DS_Store' not in fname and 'primary' not in fname: + os.remove(os.path.join(new_files_path,fname)) + + except: + pass + +if __name__ == "__main__": __main__() +