view qiime/qiime_wrapper.py @ 1:2c1d19ebac20 draft default tip

Deleted selected files
author azuzolo
date Wed, 06 Jun 2012 16:41:00 -0400
parents 003162f90751
children
line wrap: on
line source

#!/usr/bin/env python
import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
import shlex, subprocess

"""
sys.argv
this  --galaxy_datasets=   --quime_script 

alpha_rarefaction 
  output html 
    wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
    wf_arare/alpha_rarefaction_plots/html_plots/
    wf_arare/alpha_div
    wf_arare/alpha_div/alpha_rarefaction_101_0.txt

    --galaxy_summary_html=$output_html
    --galaxy_summary_template=$output_template
    --galaxy_summary_links='label:link,label:link'
    --galaxy_outputdir=$output_html.extra_files_path
    
    
"""

def stop_err( msg ):
	sys.stderr.write( "%s\n" % msg )
	sys.exit()

def __main__():
	debug = False
	tmp_dir = None
	inputdir = None
	outputdir = None
	dataset_patterns = None
	datasetid = None
	new_dataset_patterns = None
	new_files_path = None
	summary_html=None
	summary_template=None
	summary_links=None
    ## adds "log file" printing capabilities for primary output in dynamic file output 
	logfile = None
    ## added support for correcting file extensions
    	newext = None
	extchange = None	
    ## check if there are files to generate
	cmd_args = []
	for arg in sys.argv[1:]:
		if arg.startswith('--galaxy_'):
			(opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
			'''
			if opt == '--galaxy_tmpdir':
				try:
					if not os.path.exists(val):
						os.makedirs(val)
					tmp_dir = val
				except Exception, ex:
					stop_err(ex)
			'''
			if opt == '--galaxy_outputdir':
				try:
					if not os.path.exists(val):
						os.makedirs(val)
					outputdir = val
				except Exception, ex:
					stop_err(ex)
			if opt == '--galaxy_datasets':
				dataset_patterns = val.split(',')
			if opt == '--galaxy_datasetid':
				datasetid = val
			if opt == '--galaxy_new_datasets':
				new_dataset_patterns = val.split(',')
			if opt == '--galaxy_new_files_path':
				if not os.path.exists(val):
					os.makedirs(val)
				new_files_path = val
			if opt == '--galaxy_summary_html':
				summary_html=val
			if opt == '--galaxy_summary_template':
				summary_template=val
			if opt == '--galaxy_summary_links':
				summary_links=val
			if opt == '--galaxy_debug':
				debug = True
			if opt == '--galaxy_logfile':
				logfile = val
			if opt == '--galaxy_ext_change':
				extchange = val
			if opt == '--galaxy_new_ext':
				newext = val
			if opt == '--galaxy_inputdir':
				inputdir = val
		else:
			cmd_args.append(arg)
	if debug: print >> sys.stdout, '\n : '.join(cmd_args) 
	try:	
		stderr = ''
		# allow for changing of file extension for files which require it
		if extchange != None and inputdir != None and newext != None:
			#newfile = os.path.join(inputdir,"temporary."+newext)
			try:
				os.link(extchange,inputdir+"/temporary."+newext)
			except:
				shutil.copy2(extchange,inputdir+"/temporary."+newext)
		cmdline = ' '.join(cmd_args)
		if debug: print >> sys.stdout, cmdline
		'''
		if tmp_dir == None or not os.path.isdir(tmp_dir):
			tmp_dir = tempfile.mkdtemp()
		if outputdir == None or not os.path.isdir(outputdir):
			outputdir = tmp_dir
		'''
		tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
		tmp_stderr = open( tmp_stderr_name, 'wb' )
		tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
		tmp_stdout = open( tmp_stdout_name, 'wb' )
		proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
		returncode = proc.wait()
		tmp_stderr.close()
        # get stderr, allowing for case where it's very large
		tmp_stderr = open( tmp_stderr_name, 'rb' )
		buffsize = 1048576
		try:
			while True:
				stderr += tmp_stderr.read( buffsize )
				if not stderr or len( stderr ) % buffsize != 0:
					break
			if debug: print >> sys.stderr, stderr
		except OverflowError:
			pass
		tmp_stderr.close()
		if returncode != 0:
			if debug: print >> sys.stderr, "returncode = %d" % returncode 
			raise Exception, stderr
			#raise Exception, sys.stderr
        # collect results
		if dataset_patterns != None:
			for root, dirs, files in os.walk(outputdir):
				for fname in files:
					fpath = os.path.join(root,fname)
					if dataset_patterns != None:
						for output in dataset_patterns:
							(pattern,path) = output.split(':')
							if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
							if path == None or path == 'None':
								continue
							if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
							if re.match(pattern,fname):
								found = True
								# flist.remove(fname)
								try:
									shutil.copy2(fpath, path)
									if new_files_path != None:
										os.link(fpath, os.path.join(new_files_path,fname))
								except Exception, ex:
									stop_err('%s' % ex)
		# move result to outdir 
		# Need to flatten the dir hierachy in order for galaxy to serve the href links
		if summary_html != None:
			"""
			for root, dirs, files in os.walk(outputdir):
				if root != outputdir:
					for fname in files:
						fpath = os.path.join(root,fname)
			"""
			## move everything up one level
			dlist = os.listdir(outputdir)
			for dname in dlist:
				dpath = os.path.join(outputdir,dname)
				if os.path.isdir(dpath):
					flist = os.listdir(dpath)
					for fname in flist:
						fpath = os.path.join(dpath,fname)
						shutil.move(fpath,outputdir)
			if summary_template != None:
				shutil.copy(summary_template,summary_html)
		"""
		flist = os.listdir(outputdir)
		if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
		if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
		if dataset_patterns != None:
			for output in dataset_patterns:
				(pattern,path) = output.split(':')
				if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
				if path == None or path == 'None':
					continue
				for fname in flist:
					if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
					if re.match(pattern,fname):
						found = True
						flist.remove(fname)
						fpath = os.path.join(outputdir,fname)
						try:
							shutil.copy2(fpath, path)
						except Exception, ex:
							stop_err('%s' % ex)
		"""
    	# Handle the dynamically generated galaxy datasets
    	# http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
    	# --new_datasets = specifies files to be found in the new_file_path
    	# The list items are separated by commas
    	# Each item conatins: a regex pattern for matching filenames and  a galaxy datatype (separated by :)
    	# The regex match.groups()[0] is used as the id name of the dataset, and must result in  unique name for each output
		# The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later
		# directory use
		if new_dataset_patterns != None and new_files_path != None and datasetid != None:
			for output in new_dataset_patterns:
				if ':' in output: pattern,ext = output.split(':',1)
				flist = os.listdir(new_files_path)
				for fname in flist:
					m = re.match(pattern,fname)
					if m:
						fpath = os.path.join(new_files_path,fname)
						if len(m.groups()) > 0:
							root = m.groups()[0]
						else:
                       	# remove  the ext from the name if it exists, galaxy will add back later
                   	   	# remove underscores since galaxy uses that as a field separator for dynamic datasets
							root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
	                   	# filename pattern required by galaxy 
						fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
						if debug:  print >> sys.stdout, '> %s' % fpath
						if debug:  print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
						try:
							os.link(fpath, os.path.join(new_files_path,fn))
							# needed for files with variable output and a directory structure
							if outputdir != None:
								os.link(fpath, os.path.join(outputdir,fname))
							# clean out files from tmp directory, may be unnecessary
							#os.remove(fpath)
						except:
							shutil.copy2(fpath, os.path.join(new_files_path,fn))
							# needed for files with variable output and a directory structure
							if outputdir != None:
								os.link(fpath, os.path.join(outputdir,fname))
						
		print "bob" + logfile
		'''
		if logfile != None:
			print "bleep"
			if outputdir != None:
				print "beep"
				logwrite = open(logfile, 'w+')
				logwrite.write('Tool started. Files created by tool: \n')
				flist = os.listdir(outputdir)
				for fname in flist:
					if 'DS_Store' not in fname:
						logwrite.write(fname+'\n')
				logwrite.write('Tool Finished.')
				logwrite.close()
			if new_files_path != None:
				print "boop"
				logwrite = open(logfile, 'w+')
				if len(logfile.readline() > 0):
					logwrite.write('Tool started. Files created by tool: \n')
					flist = os.listdir(new_files_path)
					for fname in flist:
						if 'DS_Store' not in fname:
							logwrite.write(fname+'\n')
					logwrite.write('Tool Finished.')
					logwrite.close()
		'''
	except Exception, e:
		msg = str(e) + stderr
		#msg = str(e) + str(sys.stderr)
		#stop_err( 'Error running  ' + msg)
	finally:
        # Only remove temporary directories and files from temporary directory
        # Enclose in try block, so we don't report error on stale nfs handles
		try:
			if logfile != None:
				if outputdir != None:
					logwrite = open(logfile, 'r+')
					logwrite.write('Tool started. Files created by tool: \n')
					flist = os.listdir(outputdir)
					for fname in flist:
						if 'DS_Store' not in fname and 'primary' not in fname:
							logwrite.write(fname+'\n')
					logwrite.write('Tool Finished.')
					logwrite.close()
				if new_files_path != None:
					logwrite = open(logfile, 'r+')
					logwrite.write('Tool started. Files created by tool: \n')
					flist = os.listdir(new_files_path)
					for fname in flist:
						if 'DS_Store' not in fname and 'primary' not in fname:
							logwrite.write(fname+'\n')
					logwrite.write('Tool Finished.')
					logwrite.close()
			if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir):
				#shutil.rmtree(tmp_dir)
				pass
			if outputdir != None and 'files' not in outputdir:
				flist = os.listdir(outputdir)
				for fname in flist:
					if 'DS_Store' not in fname and 'primary' not in fname:
						os.remove(os.path.join(outputdir,fname))
			if inputdir != None and 'files' not in inputdir:
				flist = os.listdir(inputdir)
				for fname in flist:
					if 'DS_Store' not in fname and 'primary' not in fname:
						os.remove(os.path.join(inputdir,fname))
			if new_files_path != None and 'files' not in new_files_path:
				flist = os.listdir(new_files_path)
				for fname in flist:
					if 'DS_Store' not in fname and 'primary' not in fname:
						os.remove(os.path.join(new_files_path,fname))

		except:
			pass
		
if __name__ == "__main__": __main__()