| 
6
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 #Dan Blankenberg
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 """
 | 
| 
 | 
     5 A wrapper script for running the GenomeAnalysisTK.jar commands.
 | 
| 
 | 
     6 """
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 from __future__ import print_function
 | 
| 
 | 
     9 import sys, argparse, os, tempfile, subprocess, shutil
 | 
| 
 | 
    10 from binascii import unhexlify
 | 
| 
 | 
    11 from string import Template
 | 
| 
 | 
    12 from galaxy import eggs
 | 
| 
 | 
    13 #import pkg_resources; pkg_resources.require( "bx-python" )
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is
 | 
| 
 | 
    16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
 | 
| 
 | 
    17 #DEFAULT_GATK_PREFIX = "gatk_file"
 | 
| 
 | 
    18 #CHUNK_SIZE = 2**20 #1mb
 | 
| 
 | 
    19 #
 | 
| 
 | 
    20 #
 | 
| 
 | 
    21 def cleanup_before_exit( tmp_dir ):
 | 
| 
 | 
    22     if tmp_dir and os.path.exists( tmp_dir ):
 | 
| 
 | 
    23         shutil.rmtree( tmp_dir )
 | 
| 
 | 
    24 
 | 
| 
 | 
    25 def _create_config(args, config_path):
 | 
| 
 | 
    26     conf_file = open(config_path, "w")
 | 
| 
 | 
    27     conf_file.write("[user]\n")
 | 
| 
 | 
    28     for option in args:
 | 
| 
 | 
    29         if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None:
 | 
| 
 | 
    30     	    conf_file.write("%s=%s\n" % (option, args[option]))
 | 
| 
 | 
    31     conf_file.close()
 | 
| 
 | 
    32 
 | 
| 
 | 
    33 def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error):
 | 
| 
 | 
    34     stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name
 | 
| 
 | 
    35     proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) )
 | 
| 
 | 
    36     return_code = proc.wait()                          
 | 
| 
 | 
    37     if return_code:
 | 
| 
 | 
    38 	for line in open( stderr_name ):
 | 
| 
 | 
    39            print(line, file=sys.stderr)
 | 
| 
 | 
    40 	os.unlink( stderr_name ) #clean up
 | 
| 
 | 
    41  	cleanup_before_exit( tmp_dir )
 | 
| 
 | 
    42  	raise Exception( msg_error )
 | 
| 
 | 
    43     else:
 | 
| 
 | 
    44         os.unlink( stderr_name )
 | 
| 
 | 
    45 
 | 
| 
 | 
    46 def index_bam_files( bam_filenames, tmp_dir ):
 | 
| 
 | 
    47     for bam_filename in bam_filenames:
 | 
| 
 | 
    48         bam_index_filename = "%s.bai" % bam_filename
 | 
| 
 | 
    49         print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename))
 | 
| 
 | 
    50         if not os.path.exists( bam_index_filename ):
 | 
| 
 | 
    51             #need to index this bam file
 | 
| 
 | 
    52             command = 'samtools index %s %s' % ( bam_filename, bam_index_filename )
 | 
| 
 | 
    53             my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename)
 | 
| 
 | 
    54 
 | 
| 
 | 
    55 def index_fasta_files( fasta_filenames, tmp_dir ):
 | 
| 
 | 
    56     for fasta_filename in fasta_filenames:
 | 
| 
 | 
    57         fasta_index_filename = "%s.fai" % fasta_filename
 | 
| 
 | 
    58         print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename))
 | 
| 
 | 
    59         if not os.path.exists( fasta_index_filename ):
 | 
| 
 | 
    60             #need to index this bam file
 | 
| 
 | 
    61             command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename )
 | 
| 
 | 
    62             my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename)
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 def __main__():
 | 
| 
 | 
    65     #Parse Command Line  OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD
 | 
| 
 | 
    66     #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD
 | 
| 
 | 
    67         
 | 
| 
 | 
    68     root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2"
 | 
| 
 | 
    69     expected_dir="for_tests"
 | 
| 
 | 
    70     job_dir=os.getcwd()
 | 
| 
 | 
    71     analysis_dir=job_dir + "/StrelkaAnalysis"
 | 
| 
 | 
    72     config_script=root_dir + "/configureStrelkaWorkflow.pl"
 | 
| 
 | 
    73     tmp_dir = "tmp"  #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' )
 | 
| 
 | 
    74     config_ini = "%s/config.ini" % (tmp_dir)
 | 
| 
 | 
    75 
 | 
| 
 | 
    76     print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" +  config_ini)
 | 
| 
 | 
    77    
 | 
| 
 | 
    78     #manage parsing
 | 
| 
 | 
    79     parser = argparse.ArgumentParser()                                             
 | 
| 
 | 
    80     parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False )
 | 
| 
 | 
    81     parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False )   
 | 
| 
 | 
    82     parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False )
 | 
| 
 | 
    83     parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False )
 | 
| 
 | 
    84     parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False )
 | 
| 
 | 
    85     parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False )
 | 
| 
 | 
    86     parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False )
 | 
| 
 | 
    87     parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False )
 | 
| 
 | 
    88     parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False )
 | 
| 
 | 
    89     parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False )
 | 
| 
 | 
    90     parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False )
 | 
| 
 | 
    91     parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False )
 | 
| 
 | 
    92     parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False )
 | 
| 
 | 
    93     parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False )
 | 
| 
 | 
    94     parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False )
 | 
| 
 | 
    95     parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False )
 | 
| 
 | 
    96     parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False )
 | 
| 
 | 
    97     parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False )
 | 
| 
 | 
    98     parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False )
 | 
| 
 | 
    99     parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False )
 | 
| 
 | 
   100     parser.add_argument( '--binSize', help='path to tumor bam file', required = False )
 | 
| 
 | 
   101     parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False )
 | 
| 
 | 
   102     parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False )
 | 
| 
 | 
   103     parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False )
 | 
| 
 | 
   104     args = parser.parse_args()
 | 
| 
 | 
   105 
 | 
| 
 | 
   106     #verifying eveything's ok
 | 
| 
 | 
   107     if not os.path.isfile(config_script):
 | 
| 
 | 
   108     	sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'")
 | 
| 
 | 
   109     print("configuring...", file=sys.stdout)
 | 
| 
 | 
   110     if os.path.exists(analysis_dir):
 | 
| 
 | 
   111 	sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen")
 | 
| 
 | 
   112     
 | 
| 
 | 
   113 
 | 
| 
 | 
   114     # creating index if needed
 | 
| 
 | 
   115     os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH']
 | 
| 
 | 
   116     bam_filenames = [ args.tumorBam, args.normalBam ]
 | 
| 
 | 
   117     index_bam_files( bam_filenames, tmp_dir )
 | 
| 
 | 
   118     fasta_files = [ args.refFile ]
 | 
| 
 | 
   119     index_fasta_files( fasta_files, tmp_dir )
 | 
| 
 | 
   120     
 | 
| 
 | 
   121     #creating config file if needed
 | 
| 
 | 
   122     if args.configFile == "Custom":
 | 
| 
 | 
   123     	_create_config(vars(args), config_ini)
 | 
| 
 | 
   124     elif args.configFile == "Default":
 | 
| 
 | 
   125         cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini)
 | 
| 
 | 
   126         my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
 | 
| 
 | 
   127     else:
 | 
| 
 | 
   128     	if not os.path.exists(args.configFile):
 | 
| 
 | 
   129 	     print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr)
 | 
| 
 | 
   130     	cmdbash="cp %s %s" % (args.configFile, config_ini)
 | 
| 
 | 
   131         my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
 | 
| 
 | 
   132 
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 
 | 
| 
 | 
   135 
 | 
| 
 | 
   136     #configuration of workflow
 | 
| 
 | 
   137     cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir)
 | 
| 
 | 
   138     print( "**** Starting configuration.")
 | 
| 
 | 
   139     print( "**** Configuration cmd: '" + cmd + "'")
 | 
| 
 | 
   140     my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !")
 | 
| 
 | 
   141     print("completed configuration")
 | 
| 
 | 
   142     
 | 
| 
 | 
   143     #run the workflow !
 | 
| 
 | 
   144     cmd="make -C " + analysis_dir
 | 
| 
 | 
   145     print("**** starting workflow.")
 | 
| 
 | 
   146     print("**** workflow cmd: '" + cmd + "'")
 | 
| 
 | 
   147     my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !")   
 | 
| 
 | 
   148     print("**** completed workflow execution")
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 
 | 
| 
 | 
   151 
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 
 | 
| 
 | 
   157 
 | 
| 
 | 
   158 
 | 
| 
 | 
   159 
 | 
| 
 | 
   160 
 | 
| 
 | 
   161 
 | 
| 
 | 
   162 
 | 
| 
 | 
   163 #bam_filenames = []
 | 
| 
 | 
   164 #    if options.datasets:
 | 
| 
 | 
   165 #        for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets:
 | 
| 
 | 
   166 #            gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function
 | 
| 
 | 
   167 #            if dataset_arg:
 | 
| 
 | 
   168 #                cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename )
 | 
| 
 | 
   169 #            if galaxy_ext == "bam":
 | 
| 
 | 
   170 #                bam_filenames.append( gatk_filename )
 | 
| 
 | 
   171 #    #set up stdout and stderr output options
 | 
| 
 | 
   172 #    stdout = open_file_from_option( options.stdout, mode = 'wb' )
 | 
| 
 | 
   173 #    stderr = open_file_from_option( options.stderr, mode = 'wb' )
 | 
| 
 | 
   174 #    #if no stderr file is specified, we'll use our own
 | 
| 
 | 
   175 #    if stderr is None:
 | 
| 
 | 
   176 #        stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir )
 | 
| 
 | 
   177 #    
 | 
| 
 | 
   178 #    proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
 | 
| 
 | 
   179 #    return_code = proc.wait()
 | 
| 
 | 
   180 #    
 | 
| 
 | 
   181 #    if return_code:
 | 
| 
 | 
   182 #        stderr_target = sys.stderr
 | 
| 
 | 
   183 #    else:
 | 
| 
 | 
   184 #        stderr_target = sys.stdout
 | 
| 
 | 
   185 #    stderr.flush()
 | 
| 
 | 
   186 #    stderr.seek(0)
 | 
| 
 | 
   187 #    while True:
 | 
| 
 | 
   188 #        chunk = stderr.read( CHUNK_SIZE )
 | 
| 
 | 
   189 #        if chunk:
 | 
| 
 | 
   190 #            stderr_target.write( chunk )
 | 
| 
 | 
   191 #        else:
 | 
| 
 | 
   192 #            break
 | 
| 
 | 
   193 #    stderr.close()
 | 
| 
 | 
   194 #    #generate html reports
 | 
| 
 | 
   195 #    if options.html_report_from_directory:
 | 
| 
 | 
   196 #        for ( html_filename, html_dir ) in options.html_report_from_directory:
 | 
| 
 | 
   197 #            html_report_from_directory( open( html_filename, 'wb' ), html_dir )
 | 
| 
 | 
   198 #    
 | 
| 
 | 
   199 #    cleanup_before_exit( tmp_dir )
 | 
| 
 | 
   200 
 | 
| 
 | 
   201 if __name__=="__main__": __main__()
 |