diff qiime/qiime_wrapper.py @ 0:003162f90751 draft

Uploaded
author azuzolo
date Wed, 06 Jun 2012 16:40:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qiime/qiime_wrapper.py	Wed Jun 06 16:40:30 2012 -0400
@@ -0,0 +1,312 @@
+#!/usr/bin/env python
+import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
+import shlex, subprocess
+
+"""
+sys.argv
+this  --galaxy_datasets=   --quime_script 
+
+alpha_rarefaction 
+  output html 
+    wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
+    wf_arare/alpha_rarefaction_plots/html_plots/
+    wf_arare/alpha_div
+    wf_arare/alpha_div/alpha_rarefaction_101_0.txt
+
+    --galaxy_summary_html=$output_html
+    --galaxy_summary_template=$output_template
+    --galaxy_summary_links='label:link,label:link'
+    --galaxy_outputdir=$output_html.extra_files_path
+    
+    
+"""
+
+def stop_err( msg ):
+	sys.stderr.write( "%s\n" % msg )
+	sys.exit()
+
+def __main__():
+	debug = False
+	tmp_dir = None
+	inputdir = None
+	outputdir = None
+	dataset_patterns = None
+	datasetid = None
+	new_dataset_patterns = None
+	new_files_path = None
+	summary_html=None
+	summary_template=None
+	summary_links=None
+    ## adds "log file" printing capabilities for primary output in dynamic file output 
+	logfile = None
+    ## added support for correcting file extensions
+    	newext = None
+	extchange = None	
+    ## check if there are files to generate
+	cmd_args = []
+	for arg in sys.argv[1:]:
+		if arg.startswith('--galaxy_'):
+			(opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
+			'''
+			if opt == '--galaxy_tmpdir':
+				try:
+					if not os.path.exists(val):
+						os.makedirs(val)
+					tmp_dir = val
+				except Exception, ex:
+					stop_err(ex)
+			'''
+			if opt == '--galaxy_outputdir':
+				try:
+					if not os.path.exists(val):
+						os.makedirs(val)
+					outputdir = val
+				except Exception, ex:
+					stop_err(ex)
+			if opt == '--galaxy_datasets':
+				dataset_patterns = val.split(',')
+			if opt == '--galaxy_datasetid':
+				datasetid = val
+			if opt == '--galaxy_new_datasets':
+				new_dataset_patterns = val.split(',')
+			if opt == '--galaxy_new_files_path':
+				if not os.path.exists(val):
+					os.makedirs(val)
+				new_files_path = val
+			if opt == '--galaxy_summary_html':
+				summary_html=val
+			if opt == '--galaxy_summary_template':
+				summary_template=val
+			if opt == '--galaxy_summary_links':
+				summary_links=val
+			if opt == '--galaxy_debug':
+				debug = True
+			if opt == '--galaxy_logfile':
+				logfile = val
+			if opt == '--galaxy_ext_change':
+				extchange = val
+			if opt == '--galaxy_new_ext':
+				newext = val
+			if opt == '--galaxy_inputdir':
+				inputdir = val
+		else:
+			cmd_args.append(arg)
+	if debug: print >> sys.stdout, '\n : '.join(cmd_args) 
+	try:	
+		stderr = ''
+		# allow for changing of file extension for files which require it
+		if extchange != None and inputdir != None and newext != None:
+			#newfile = os.path.join(inputdir,"temporary."+newext)
+			try:
+				os.link(extchange,inputdir+"/temporary."+newext)
+			except:
+				shutil.copy2(extchange,inputdir+"/temporary."+newext)
+		cmdline = ' '.join(cmd_args)
+		if debug: print >> sys.stdout, cmdline
+		'''
+		if tmp_dir == None or not os.path.isdir(tmp_dir):
+			tmp_dir = tempfile.mkdtemp()
+		if outputdir == None or not os.path.isdir(outputdir):
+			outputdir = tmp_dir
+		'''
+		tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
+		tmp_stderr = open( tmp_stderr_name, 'wb' )
+		tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
+		tmp_stdout = open( tmp_stdout_name, 'wb' )
+		proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
+		returncode = proc.wait()
+		tmp_stderr.close()
+        # get stderr, allowing for case where it's very large
+		tmp_stderr = open( tmp_stderr_name, 'rb' )
+		buffsize = 1048576
+		try:
+			while True:
+				stderr += tmp_stderr.read( buffsize )
+				if not stderr or len( stderr ) % buffsize != 0:
+					break
+			if debug: print >> sys.stderr, stderr
+		except OverflowError:
+			pass
+		tmp_stderr.close()
+		if returncode != 0:
+			if debug: print >> sys.stderr, "returncode = %d" % returncode 
+			raise Exception, stderr
+			#raise Exception, sys.stderr
+        # collect results
+		if dataset_patterns != None:
+			for root, dirs, files in os.walk(outputdir):
+				for fname in files:
+					fpath = os.path.join(root,fname)
+					if dataset_patterns != None:
+						for output in dataset_patterns:
+							(pattern,path) = output.split(':')
+							if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
+							if path == None or path == 'None':
+								continue
+							if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
+							if re.match(pattern,fname):
+								found = True
+								# flist.remove(fname)
+								try:
+									shutil.copy2(fpath, path)
+									if new_files_path != None:
+										os.link(fpath, os.path.join(new_files_path,fname))
+								except Exception, ex:
+									stop_err('%s' % ex)
+		# move result to outdir 
+		# Need to flatten the dir hierachy in order for galaxy to serve the href links
+		if summary_html != None:
+			"""
+			for root, dirs, files in os.walk(outputdir):
+				if root != outputdir:
+					for fname in files:
+						fpath = os.path.join(root,fname)
+			"""
+			## move everything up one level
+			dlist = os.listdir(outputdir)
+			for dname in dlist:
+				dpath = os.path.join(outputdir,dname)
+				if os.path.isdir(dpath):
+					flist = os.listdir(dpath)
+					for fname in flist:
+						fpath = os.path.join(dpath,fname)
+						shutil.move(fpath,outputdir)
+			if summary_template != None:
+				shutil.copy(summary_template,summary_html)
+		"""
+		flist = os.listdir(outputdir)
+		if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
+		if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
+		if dataset_patterns != None:
+			for output in dataset_patterns:
+				(pattern,path) = output.split(':')
+				if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
+				if path == None or path == 'None':
+					continue
+				for fname in flist:
+					if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
+					if re.match(pattern,fname):
+						found = True
+						flist.remove(fname)
+						fpath = os.path.join(outputdir,fname)
+						try:
+							shutil.copy2(fpath, path)
+						except Exception, ex:
+							stop_err('%s' % ex)
+		"""
+    	# Handle the dynamically generated galaxy datasets
+    	# http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
+    	# --new_datasets = specifies files to be found in the new_file_path
+    	# The list items are separated by commas
+    	# Each item conatins: a regex pattern for matching filenames and  a galaxy datatype (separated by :)
+    	# The regex match.groups()[0] is used as the id name of the dataset, and must result in  unique name for each output
+		# The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later
+		# directory use
+		if new_dataset_patterns != None and new_files_path != None and datasetid != None:
+			for output in new_dataset_patterns:
+				if ':' in output: pattern,ext = output.split(':',1)
+				flist = os.listdir(new_files_path)
+				for fname in flist:
+					m = re.match(pattern,fname)
+					if m:
+						fpath = os.path.join(new_files_path,fname)
+						if len(m.groups()) > 0:
+							root = m.groups()[0]
+						else:
+                       	# remove  the ext from the name if it exists, galaxy will add back later
+                   	   	# remove underscores since galaxy uses that as a field separator for dynamic datasets
+							root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
+	                   	# filename pattern required by galaxy 
+						fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
+						if debug:  print >> sys.stdout, '> %s' % fpath
+						if debug:  print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
+						try:
+							os.link(fpath, os.path.join(new_files_path,fn))
+							# needed for files with variable output and a directory structure
+							if outputdir != None:
+								os.link(fpath, os.path.join(outputdir,fname))
+							# clean out files from tmp directory, may be unnecessary
+							#os.remove(fpath)
+						except:
+							shutil.copy2(fpath, os.path.join(new_files_path,fn))
+							# needed for files with variable output and a directory structure
+							if outputdir != None:
+								os.link(fpath, os.path.join(outputdir,fname))
+						
+		print "bob" + logfile
+		'''
+		if logfile != None:
+			print "bleep"
+			if outputdir != None:
+				print "beep"
+				logwrite = open(logfile, 'w+')
+				logwrite.write('Tool started. Files created by tool: \n')
+				flist = os.listdir(outputdir)
+				for fname in flist:
+					if 'DS_Store' not in fname:
+						logwrite.write(fname+'\n')
+				logwrite.write('Tool Finished.')
+				logwrite.close()
+			if new_files_path != None:
+				print "boop"
+				logwrite = open(logfile, 'w+')
+				if len(logfile.readline() > 0):
+					logwrite.write('Tool started. Files created by tool: \n')
+					flist = os.listdir(new_files_path)
+					for fname in flist:
+						if 'DS_Store' not in fname:
+							logwrite.write(fname+'\n')
+					logwrite.write('Tool Finished.')
+					logwrite.close()
+		'''
+	except Exception, e:
+		msg = str(e) + stderr
+		#msg = str(e) + str(sys.stderr)
+		#stop_err( 'Error running  ' + msg)
+	finally:
+        # Only remove temporary directories and files from temporary directory
+        # Enclose in try block, so we don't report error on stale nfs handles
+		try:
+			if logfile != None:
+				if outputdir != None:
+					logwrite = open(logfile, 'r+')
+					logwrite.write('Tool started. Files created by tool: \n')
+					flist = os.listdir(outputdir)
+					for fname in flist:
+						if 'DS_Store' not in fname and 'primary' not in fname:
+							logwrite.write(fname+'\n')
+					logwrite.write('Tool Finished.')
+					logwrite.close()
+				if new_files_path != None:
+					logwrite = open(logfile, 'r+')
+					logwrite.write('Tool started. Files created by tool: \n')
+					flist = os.listdir(new_files_path)
+					for fname in flist:
+						if 'DS_Store' not in fname and 'primary' not in fname:
+							logwrite.write(fname+'\n')
+					logwrite.write('Tool Finished.')
+					logwrite.close()
+			if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir):
+				#shutil.rmtree(tmp_dir)
+				pass
+			if outputdir != None and 'files' not in outputdir:
+				flist = os.listdir(outputdir)
+				for fname in flist:
+					if 'DS_Store' not in fname and 'primary' not in fname:
+						os.remove(os.path.join(outputdir,fname))
+			if inputdir != None and 'files' not in inputdir:
+				flist = os.listdir(inputdir)
+				for fname in flist:
+					if 'DS_Store' not in fname and 'primary' not in fname:
+						os.remove(os.path.join(inputdir,fname))
+			if new_files_path != None and 'files' not in new_files_path:
+				flist = os.listdir(new_files_path)
+				for fname in flist:
+					if 'DS_Store' not in fname and 'primary' not in fname:
+						os.remove(os.path.join(new_files_path,fname))
+
+		except:
+			pass
+		
+if __name__ == "__main__": __main__()
+