diff qiime_wrapper.py @ 0:e5c3175506b7 default tip

Initial tool configs for qiime, most need work.
author Jim Johnson <jj@umn.edu>
date Sun, 17 Jul 2011 10:30:11 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qiime_wrapper.py	Sun Jul 17 10:30:11 2011 -0500
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
+import shlex, subprocess
+
+"""
+sys.argv
+this  --galaxy_datasets=   --quime_script 
+
+alpha_rarefaction 
+  output html 
+    wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
+    wf_arare/alpha_rarefaction_plots/html_plots/
+    wf_arare/alpha_div
+    wf_arare/alpha_div/alpha_rarefaction_101_0.txt
+
+    --galaxy_summary_html=$output_html
+    --galaxy_summary_template=$output_template
+    --galaxy_summary_links='label:link,label:link'
+    --galaxy_outputdir=$output_html.extra_files_path
+    
+    
+"""
+
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def __main__():
+    debug = False
+    tmp_dir = None
+    inputdir = None
+    outputdir = None
+    dataset_patterns = None
+    datasetid = None
+    new_dataset_patterns = None
+    new_files_path = None
+    summary_html=None
+    summary_template=None
+    summary_links=None
+    ## check if there are files to generate
+    cmd_args = []
+    for arg in sys.argv[1:]:
+        if arg.startswith('--galaxy_'):
+            (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
+            if opt == '--galaxy_tmpdir':
+                try:
+                    if not os.path.exists(val):
+                        os.makedirs(val)
+                    tmp_dir = val
+                except Exception, ex:
+                    stop_err(ex)
+            if opt == '--galaxy_outputdir':
+                try:
+                    if not os.path.exists(val):
+                        os.makedirs(val)
+                    outputdir = val
+                except Exception, ex:
+                    stop_err(ex)
+            if opt == '--galaxy_datasets':
+                dataset_patterns = val.split(',')
+            if opt == '--galaxy_datasetid':
+                datasetid = val
+            if opt == '--galaxy_new_datasets':
+                new_dataset_patterns = val.split(',')
+            if opt == '--galaxy_new_files_path':
+                new_dataset_patterns = val
+            if opt == '--galaxy_summary_html':
+                summary_html=val
+            if opt == '--galaxy_summary_template':
+                summary_template=val
+            if opt == '--galaxy_summary_links':
+                summary_links=val
+            if opt == '--galaxy_debug':
+                debug = True
+        else:
+            cmd_args.append(arg)
+    if debug: print >> sys.stdout, '\n : '.join(cmd_args) 
+    try:
+        cmdline = ' '.join(cmd_args)
+        if debug: print >> sys.stdout, cmdline 
+        if tmp_dir == None or not os.path.isdir(tmp_dir):
+            tmp_dir = tempfile.mkdtemp()
+        if outputdir == None or not os.path.isdir(outputdir):
+            outputdir = tmp_dir
+        tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
+        tmp_stderr = open( tmp_stderr_name, 'wb' )
+        tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
+        tmp_stdout = open( tmp_stdout_name, 'wb' )
+        proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
+        returncode = proc.wait()
+        tmp_stderr.close()
+        # get stderr, allowing for case where it's very large
+        tmp_stderr = open( tmp_stderr_name, 'rb' )
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += tmp_stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+            if debug: print >> sys.stderr, stderr
+        except OverflowError:
+            pass
+        tmp_stderr.close()
+        if returncode != 0:
+            if debug: print >> sys.stderr, "returncode = %d" % returncode 
+            raise Exception, stderr
+        # collect results
+        if dataset_patterns != None:
+            for root, dirs, files in os.walk(outputdir):
+                for fname in files:
+                    fpath = os.path.join(root,fname)
+                    if dataset_patterns != None:
+                        for output in dataset_patterns:
+                            (pattern,path) = output.split(':')
+                            if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
+                            if path == None or path == 'None':
+                                continue
+                            if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
+                            if re.match(pattern,fname):
+                                found = True
+                                # flist.remove(fname)
+                                try:
+                                    shutil.copy2(fpath, path)
+                                except Exception, ex:
+                                    stop_err('%s' % ex)
+        # move result to outdir 
+        # Need to flatten the dir hierachy in order for galaxy to serve the href links
+        if summary_html != None:
+            """
+            for root, dirs, files in os.walk(outputdir):
+                if root != outputdir:
+                    for fname in files:
+                        fpath = os.path.join(root,fname)
+            """
+            ## move everything up one level
+            dlist = os.listdir(outputdir)
+            for dname in dlist:
+                dpath = os.path.join(outputdir,dname)
+                if os.path.isdir(dpath):
+                    flist = os.listdir(dpath)
+                    for fname in flist:
+                        fpath = os.path.join(dpath,fname)
+                        shutil.move(fpath,outputdir)
+            if summary_template != None:
+                shutil.copy(summary_template,summary_html)
+        """
+        flist = os.listdir(outputdir)
+        if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
+        if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
+        if dataset_patterns != None:
+            for output in dataset_patterns:
+                (pattern,path) = output.split(':')
+                if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
+                if path == None or path == 'None':
+                    continue
+                for fname in flist:
+                    if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
+                    if re.match(pattern,fname):
+                        found = True
+                        flist.remove(fname)
+                        fpath = os.path.join(outputdir,fname)
+                        try:
+                            shutil.copy2(fpath, path)
+                        except Exception, ex:
+                            stop_err('%s' % ex)
+        """
+        # Handle the dynamically generated galaxy datasets
+        # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
+        # --new_datasets=   specifies files to copy to the new_file_path
+        # The list items are separated by commas
+        # Each item  conatins:   a regex pattern for matching filenames and  a galaxy datatype (separated by :)
+        # The regex match.groups()[0] is used as the id name of the dataset, and must result in  unique name for each output
+        if new_dataset_patterns != None and new_files_path != None and datasetid != None:
+            for output in new_dataset_patterns(','):
+                (pattern,ext) = output.split(':');
+                for fname in flist:
+                    m = re.match(pattern,fname)
+                    if m:
+                        fpath = os.path.join(outputdir,fname)
+                        if len(m.groups()) > 0:
+                            root = m.groups()[0]
+                        else:
+                            # remove  the ext from the name if it exists, galaxy will add back later
+                            # remove underscores since galaxy uses that as a field separator for dynamic datasets
+                            root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
+                        # filename pattern required by galaxy 
+                        fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
+                        if debug:  print >> sys.stdout, '> %s' % fpath
+                        if debug:  print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
+                        try:
+                            os.link(fpath, os.path.join(new_files_path,fn))
+                        except:
+                            shutil.copy2(fpath, os.path.join(new_files_path,fn))
+
+    except Exception, e:
+        msg = str(e) + stderr
+        stop_err( 'Error running  ' + msg)
+    finally:
+        # Only remove temporary directories
+        # Enclose in try block, so we don't report error on stale nfs handles
+        
+        try:
+            if inputdir != None and os.path.exists(inputdir):
+                shutil.rmtree(inputdir)
+        except:
+            pass
+
+if __name__ == "__main__": __main__()
+