Mercurial > repos > mini > strelka
diff strelka_wrapper.py @ 6:87568e5a7d4f
Testing strelka version 0.0.1
author | mini |
---|---|
date | Fri, 26 Sep 2014 13:24:13 +0200 |
parents | |
children | 3e8541ece3c7 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka_wrapper.py Fri Sep 26 13:24:13 2014 +0200 @@ -0,0 +1,201 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for running the GenomeAnalysisTK.jar commands. +""" + +from __future__ import print_function +import sys, argparse, os, tempfile, subprocess, shutil +from binascii import unhexlify +from string import Template +from galaxy import eggs +#import pkg_resources; pkg_resources.require( "bx-python" ) + +#GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is +#GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed +#DEFAULT_GATK_PREFIX = "gatk_file" +#CHUNK_SIZE = 2**20 #1mb +# +# +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def _create_config(args, config_path): + conf_file = open(config_path, "w") + conf_file.write("[user]\n") + for option in args: + if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None: + conf_file.write("%s=%s\n" % (option, args[option])) + conf_file.close() + +def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error): + stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name + proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) ) + return_code = proc.wait() + if return_code: + for line in open( stderr_name ): + print(line, file=sys.stderr) + os.unlink( stderr_name ) #clean up + cleanup_before_exit( tmp_dir ) + raise Exception( msg_error ) + else: + os.unlink( stderr_name ) + +def index_bam_files( bam_filenames, tmp_dir ): + for bam_filename in bam_filenames: + bam_index_filename = "%s.bai" % bam_filename + print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename)) + if not os.path.exists( bam_index_filename ): + #need to index this bam file + command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) + my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename) + +def index_fasta_files( fasta_filenames, tmp_dir ): + for fasta_filename in fasta_filenames: + fasta_index_filename = "%s.fai" % fasta_filename + print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename)) + if not os.path.exists( fasta_index_filename ): + #need to index this bam file + command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) + my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) + +def __main__(): + #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD + #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD + + root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2" + expected_dir="for_tests" + job_dir=os.getcwd() + analysis_dir=job_dir + "/StrelkaAnalysis" + config_script=root_dir + "/configureStrelkaWorkflow.pl" + tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) + config_ini = "%s/config.ini" % (tmp_dir) + + print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) + + #manage parsing + parser = argparse.ArgumentParser() + parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) + parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) + parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) + parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) + parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False ) + parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False ) + parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False ) + parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False ) + parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False ) + parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False ) + parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False ) + parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False ) + parser.add_argument( '--binSize', help='path to tumor bam file', required = False ) + parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False ) + parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False ) + parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False ) + args = parser.parse_args() + + #verifying eveything's ok + if not os.path.isfile(config_script): + sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") + print("configuring...", file=sys.stdout) + if os.path.exists(analysis_dir): + sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") + + + # creating index if needed + os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] + bam_filenames = [ args.tumorBam, args.normalBam ] + index_bam_files( bam_filenames, tmp_dir ) + fasta_files = [ args.refFile ] + index_fasta_files( fasta_files, tmp_dir ) + + #creating config file if needed + if args.configFile == "Custom": + _create_config(vars(args), config_ini) + elif args.configFile == "Default": + cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini) + my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") + else: + if not os.path.exists(args.configFile): + print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) + cmdbash="cp %s %s" % (args.configFile, config_ini) + my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") + + + + + #configuration of workflow + cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir) + print( "**** Starting configuration.") + print( "**** Configuration cmd: '" + cmd + "'") + my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !") + print("completed configuration") + + #run the workflow ! + cmd="make -C " + analysis_dir + print("**** starting workflow.") + print("**** workflow cmd: '" + cmd + "'") + my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") + print("**** completed workflow execution") + + + + + + + + + + + + + + +#bam_filenames = [] +# if options.datasets: +# for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: +# gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function +# if dataset_arg: +# cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) +# if galaxy_ext == "bam": +# bam_filenames.append( gatk_filename ) +# #set up stdout and stderr output options +# stdout = open_file_from_option( options.stdout, mode = 'wb' ) +# stderr = open_file_from_option( options.stderr, mode = 'wb' ) +# #if no stderr file is specified, we'll use our own +# if stderr is None: +# stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) +# +# proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) +# return_code = proc.wait() +# +# if return_code: +# stderr_target = sys.stderr +# else: +# stderr_target = sys.stdout +# stderr.flush() +# stderr.seek(0) +# while True: +# chunk = stderr.read( CHUNK_SIZE ) +# if chunk: +# stderr_target.write( chunk ) +# else: +# break +# stderr.close() +# #generate html reports +# if options.html_report_from_directory: +# for ( html_filename, html_dir ) in options.html_report_from_directory: +# html_report_from_directory( open( html_filename, 'wb' ), html_dir ) +# +# cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__()