Mercurial > repos > azuzolo > qiime1_3_0
view qiime/qiime_wrapper.py @ 1:2c1d19ebac20 draft default tip
Deleted selected files
author | azuzolo |
---|---|
date | Wed, 06 Jun 2012 16:41:00 -0400 |
parents | 003162f90751 |
children |
line wrap: on
line source
#!/usr/bin/env python import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re import shlex, subprocess """ sys.argv this --galaxy_datasets= --quime_script alpha_rarefaction output html wf_arare/alpha_rarefaction_plots/rarefaction_plots.html wf_arare/alpha_rarefaction_plots/html_plots/ wf_arare/alpha_div wf_arare/alpha_div/alpha_rarefaction_101_0.txt --galaxy_summary_html=$output_html --galaxy_summary_template=$output_template --galaxy_summary_links='label:link,label:link' --galaxy_outputdir=$output_html.extra_files_path """ def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def __main__(): debug = False tmp_dir = None inputdir = None outputdir = None dataset_patterns = None datasetid = None new_dataset_patterns = None new_files_path = None summary_html=None summary_template=None summary_links=None ## adds "log file" printing capabilities for primary output in dynamic file output logfile = None ## added support for correcting file extensions newext = None extchange = None ## check if there are files to generate cmd_args = [] for arg in sys.argv[1:]: if arg.startswith('--galaxy_'): (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) ''' if opt == '--galaxy_tmpdir': try: if not os.path.exists(val): os.makedirs(val) tmp_dir = val except Exception, ex: stop_err(ex) ''' if opt == '--galaxy_outputdir': try: if not os.path.exists(val): os.makedirs(val) outputdir = val except Exception, ex: stop_err(ex) if opt == '--galaxy_datasets': dataset_patterns = val.split(',') if opt == '--galaxy_datasetid': datasetid = val if opt == '--galaxy_new_datasets': new_dataset_patterns = val.split(',') if opt == '--galaxy_new_files_path': if not os.path.exists(val): os.makedirs(val) new_files_path = val if opt == '--galaxy_summary_html': summary_html=val if opt == '--galaxy_summary_template': summary_template=val if opt == '--galaxy_summary_links': summary_links=val if opt == '--galaxy_debug': debug = True if opt == '--galaxy_logfile': logfile = val if opt == '--galaxy_ext_change': extchange = val if opt == '--galaxy_new_ext': newext = val if opt == '--galaxy_inputdir': inputdir = val else: cmd_args.append(arg) if debug: print >> sys.stdout, '\n : '.join(cmd_args) try: stderr = '' # allow for changing of file extension for files which require it if extchange != None and inputdir != None and newext != None: #newfile = os.path.join(inputdir,"temporary."+newext) try: os.link(extchange,inputdir+"/temporary."+newext) except: shutil.copy2(extchange,inputdir+"/temporary."+newext) cmdline = ' '.join(cmd_args) if debug: print >> sys.stdout, cmdline ''' if tmp_dir == None or not os.path.isdir(tmp_dir): tmp_dir = tempfile.mkdtemp() if outputdir == None or not os.path.isdir(outputdir): outputdir = tmp_dir ''' tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name tmp_stderr = open( tmp_stderr_name, 'wb' ) tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name tmp_stdout = open( tmp_stdout_name, 'wb' ) proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp_stderr_name, 'rb' ) buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break if debug: print >> sys.stderr, stderr except OverflowError: pass tmp_stderr.close() if returncode != 0: if debug: print >> sys.stderr, "returncode = %d" % returncode raise Exception, stderr #raise Exception, sys.stderr # collect results if dataset_patterns != None: for root, dirs, files in os.walk(outputdir): for fname in files: fpath = os.path.join(root,fname) if dataset_patterns != None: for output in dataset_patterns: (pattern,path) = output.split(':') if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) if path == None or path == 'None': continue if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) if re.match(pattern,fname): found = True # flist.remove(fname) try: shutil.copy2(fpath, path) if new_files_path != None: os.link(fpath, os.path.join(new_files_path,fname)) except Exception, ex: stop_err('%s' % ex) # move result to outdir # Need to flatten the dir hierachy in order for galaxy to serve the href links if summary_html != None: """ for root, dirs, files in os.walk(outputdir): if root != outputdir: for fname in files: fpath = os.path.join(root,fname) """ ## move everything up one level dlist = os.listdir(outputdir) for dname in dlist: dpath = os.path.join(outputdir,dname) if os.path.isdir(dpath): flist = os.listdir(dpath) for fname in flist: fpath = os.path.join(dpath,fname) shutil.move(fpath,outputdir) if summary_template != None: shutil.copy(summary_template,summary_html) """ flist = os.listdir(outputdir) if debug: print >> sys.stdout, 'outputdir: %s' % outputdir if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) if dataset_patterns != None: for output in dataset_patterns: (pattern,path) = output.split(':') if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) if path == None or path == 'None': continue for fname in flist: if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) if re.match(pattern,fname): found = True flist.remove(fname) fpath = os.path.join(outputdir,fname) try: shutil.copy2(fpath, path) except Exception, ex: stop_err('%s' % ex) """ # Handle the dynamically generated galaxy datasets # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput # --new_datasets = specifies files to be found in the new_file_path # The list items are separated by commas # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later # directory use if new_dataset_patterns != None and new_files_path != None and datasetid != None: for output in new_dataset_patterns: if ':' in output: pattern,ext = output.split(':',1) flist = os.listdir(new_files_path) for fname in flist: m = re.match(pattern,fname) if m: fpath = os.path.join(new_files_path,fname) if len(m.groups()) > 0: root = m.groups()[0] else: # remove the ext from the name if it exists, galaxy will add back later # remove underscores since galaxy uses that as a field separator for dynamic datasets root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') # filename pattern required by galaxy fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) if debug: print >> sys.stdout, '> %s' % fpath if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) try: os.link(fpath, os.path.join(new_files_path,fn)) # needed for files with variable output and a directory structure if outputdir != None: os.link(fpath, os.path.join(outputdir,fname)) # clean out files from tmp directory, may be unnecessary #os.remove(fpath) except: shutil.copy2(fpath, os.path.join(new_files_path,fn)) # needed for files with variable output and a directory structure if outputdir != None: os.link(fpath, os.path.join(outputdir,fname)) print "bob" + logfile ''' if logfile != None: print "bleep" if outputdir != None: print "beep" logwrite = open(logfile, 'w+') logwrite.write('Tool started. Files created by tool: \n') flist = os.listdir(outputdir) for fname in flist: if 'DS_Store' not in fname: logwrite.write(fname+'\n') logwrite.write('Tool Finished.') logwrite.close() if new_files_path != None: print "boop" logwrite = open(logfile, 'w+') if len(logfile.readline() > 0): logwrite.write('Tool started. Files created by tool: \n') flist = os.listdir(new_files_path) for fname in flist: if 'DS_Store' not in fname: logwrite.write(fname+'\n') logwrite.write('Tool Finished.') logwrite.close() ''' except Exception, e: msg = str(e) + stderr #msg = str(e) + str(sys.stderr) #stop_err( 'Error running ' + msg) finally: # Only remove temporary directories and files from temporary directory # Enclose in try block, so we don't report error on stale nfs handles try: if logfile != None: if outputdir != None: logwrite = open(logfile, 'r+') logwrite.write('Tool started. Files created by tool: \n') flist = os.listdir(outputdir) for fname in flist: if 'DS_Store' not in fname and 'primary' not in fname: logwrite.write(fname+'\n') logwrite.write('Tool Finished.') logwrite.close() if new_files_path != None: logwrite = open(logfile, 'r+') logwrite.write('Tool started. Files created by tool: \n') flist = os.listdir(new_files_path) for fname in flist: if 'DS_Store' not in fname and 'primary' not in fname: logwrite.write(fname+'\n') logwrite.write('Tool Finished.') logwrite.close() if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir): #shutil.rmtree(tmp_dir) pass if outputdir != None and 'files' not in outputdir: flist = os.listdir(outputdir) for fname in flist: if 'DS_Store' not in fname and 'primary' not in fname: os.remove(os.path.join(outputdir,fname)) if inputdir != None and 'files' not in inputdir: flist = os.listdir(inputdir) for fname in flist: if 'DS_Store' not in fname and 'primary' not in fname: os.remove(os.path.join(inputdir,fname)) if new_files_path != None and 'files' not in new_files_path: flist = os.listdir(new_files_path) for fname in flist: if 'DS_Store' not in fname and 'primary' not in fname: os.remove(os.path.join(new_files_path,fname)) except: pass if __name__ == "__main__": __main__()