view qiime_wrapper.py @ 0:e5c3175506b7 default tip

Initial tool configs for qiime, most need work.
author Jim Johnson <jj@umn.edu>
date Sun, 17 Jul 2011 10:30:11 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env python
import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re
import shlex, subprocess

"""
sys.argv
this  --galaxy_datasets=   --quime_script 

alpha_rarefaction 
  output html 
    wf_arare/alpha_rarefaction_plots/rarefaction_plots.html
    wf_arare/alpha_rarefaction_plots/html_plots/
    wf_arare/alpha_div
    wf_arare/alpha_div/alpha_rarefaction_101_0.txt

    --galaxy_summary_html=$output_html
    --galaxy_summary_template=$output_template
    --galaxy_summary_links='label:link,label:link'
    --galaxy_outputdir=$output_html.extra_files_path
    
    
"""

def stop_err( msg ):
    sys.stderr.write( "%s\n" % msg )
    sys.exit()

def __main__():
    debug = False
    tmp_dir = None
    inputdir = None
    outputdir = None
    dataset_patterns = None
    datasetid = None
    new_dataset_patterns = None
    new_files_path = None
    summary_html=None
    summary_template=None
    summary_links=None
    ## check if there are files to generate
    cmd_args = []
    for arg in sys.argv[1:]:
        if arg.startswith('--galaxy_'):
            (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None)
            if opt == '--galaxy_tmpdir':
                try:
                    if not os.path.exists(val):
                        os.makedirs(val)
                    tmp_dir = val
                except Exception, ex:
                    stop_err(ex)
            if opt == '--galaxy_outputdir':
                try:
                    if not os.path.exists(val):
                        os.makedirs(val)
                    outputdir = val
                except Exception, ex:
                    stop_err(ex)
            if opt == '--galaxy_datasets':
                dataset_patterns = val.split(',')
            if opt == '--galaxy_datasetid':
                datasetid = val
            if opt == '--galaxy_new_datasets':
                new_dataset_patterns = val.split(',')
            if opt == '--galaxy_new_files_path':
                new_dataset_patterns = val
            if opt == '--galaxy_summary_html':
                summary_html=val
            if opt == '--galaxy_summary_template':
                summary_template=val
            if opt == '--galaxy_summary_links':
                summary_links=val
            if opt == '--galaxy_debug':
                debug = True
        else:
            cmd_args.append(arg)
    if debug: print >> sys.stdout, '\n : '.join(cmd_args) 
    try:
        cmdline = ' '.join(cmd_args)
        if debug: print >> sys.stdout, cmdline 
        if tmp_dir == None or not os.path.isdir(tmp_dir):
            tmp_dir = tempfile.mkdtemp()
        if outputdir == None or not os.path.isdir(outputdir):
            outputdir = tmp_dir
        tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name
        tmp_stderr = open( tmp_stderr_name, 'wb' )
        tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name
        tmp_stdout = open( tmp_stdout_name, 'wb' )
        proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )
        returncode = proc.wait()
        tmp_stderr.close()
        # get stderr, allowing for case where it's very large
        tmp_stderr = open( tmp_stderr_name, 'rb' )
        stderr = ''
        buffsize = 1048576
        try:
            while True:
                stderr += tmp_stderr.read( buffsize )
                if not stderr or len( stderr ) % buffsize != 0:
                    break
            if debug: print >> sys.stderr, stderr
        except OverflowError:
            pass
        tmp_stderr.close()
        if returncode != 0:
            if debug: print >> sys.stderr, "returncode = %d" % returncode 
            raise Exception, stderr
        # collect results
        if dataset_patterns != None:
            for root, dirs, files in os.walk(outputdir):
                for fname in files:
                    fpath = os.path.join(root,fname)
                    if dataset_patterns != None:
                        for output in dataset_patterns:
                            (pattern,path) = output.split(':')
                            if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
                            if path == None or path == 'None':
                                continue
                            if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
                            if re.match(pattern,fname):
                                found = True
                                # flist.remove(fname)
                                try:
                                    shutil.copy2(fpath, path)
                                except Exception, ex:
                                    stop_err('%s' % ex)
        # move result to outdir 
        # Need to flatten the dir hierachy in order for galaxy to serve the href links
        if summary_html != None:
            """
            for root, dirs, files in os.walk(outputdir):
                if root != outputdir:
                    for fname in files:
                        fpath = os.path.join(root,fname)
            """
            ## move everything up one level
            dlist = os.listdir(outputdir)
            for dname in dlist:
                dpath = os.path.join(outputdir,dname)
                if os.path.isdir(dpath):
                    flist = os.listdir(dpath)
                    for fname in flist:
                        fpath = os.path.join(dpath,fname)
                        shutil.move(fpath,outputdir)
            if summary_template != None:
                shutil.copy(summary_template,summary_html)
        """
        flist = os.listdir(outputdir)
        if debug: print >> sys.stdout, 'outputdir: %s' % outputdir
        if debug: print >> sys.stdout, 'files: %s' % ','.join(flist)
        if dataset_patterns != None:
            for output in dataset_patterns:
                (pattern,path) = output.split(':')
                if debug: print >> sys.stdout, '%s -> %s' % (pattern,path)
                if path == None or path == 'None':
                    continue
                for fname in flist:
                    if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname))
                    if re.match(pattern,fname):
                        found = True
                        flist.remove(fname)
                        fpath = os.path.join(outputdir,fname)
                        try:
                            shutil.copy2(fpath, path)
                        except Exception, ex:
                            stop_err('%s' % ex)
        """
        # Handle the dynamically generated galaxy datasets
        # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
        # --new_datasets=   specifies files to copy to the new_file_path
        # The list items are separated by commas
        # Each item  conatins:   a regex pattern for matching filenames and  a galaxy datatype (separated by :)
        # The regex match.groups()[0] is used as the id name of the dataset, and must result in  unique name for each output
        if new_dataset_patterns != None and new_files_path != None and datasetid != None:
            for output in new_dataset_patterns(','):
                (pattern,ext) = output.split(':');
                for fname in flist:
                    m = re.match(pattern,fname)
                    if m:
                        fpath = os.path.join(outputdir,fname)
                        if len(m.groups()) > 0:
                            root = m.groups()[0]
                        else:
                            # remove  the ext from the name if it exists, galaxy will add back later
                            # remove underscores since galaxy uses that as a field separator for dynamic datasets
                            root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','')
                        # filename pattern required by galaxy 
                        fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext )
                        if debug:  print >> sys.stdout, '> %s' % fpath
                        if debug:  print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn)
                        try:
                            os.link(fpath, os.path.join(new_files_path,fn))
                        except:
                            shutil.copy2(fpath, os.path.join(new_files_path,fn))

    except Exception, e:
        msg = str(e) + stderr
        stop_err( 'Error running  ' + msg)
    finally:
        # Only remove temporary directories
        # Enclose in try block, so we don't report error on stale nfs handles
        
        try:
            if inputdir != None and os.path.exists(inputdir):
                shutil.rmtree(inputdir)
        except:
            pass

if __name__ == "__main__": __main__()