Mercurial > repos > mini > strelka
comparison strelka_wrapper.py @ 6:87568e5a7d4f
Testing strelka version 0.0.1
| author | mini |
|---|---|
| date | Fri, 26 Sep 2014 13:24:13 +0200 |
| parents | |
| children | 3e8541ece3c7 |
comparison
equal
deleted
inserted
replaced
| 5:07cbbd662111 | 6:87568e5a7d4f |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Dan Blankenberg | |
| 3 | |
| 4 """ | |
| 5 A wrapper script for running the GenomeAnalysisTK.jar commands. | |
| 6 """ | |
| 7 | |
| 8 from __future__ import print_function | |
| 9 import sys, argparse, os, tempfile, subprocess, shutil | |
| 10 from binascii import unhexlify | |
| 11 from string import Template | |
| 12 from galaxy import eggs | |
| 13 #import pkg_resources; pkg_resources.require( "bx-python" ) | |
| 14 | |
| 15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is | |
| 16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed | |
| 17 #DEFAULT_GATK_PREFIX = "gatk_file" | |
| 18 #CHUNK_SIZE = 2**20 #1mb | |
| 19 # | |
| 20 # | |
| 21 def cleanup_before_exit( tmp_dir ): | |
| 22 if tmp_dir and os.path.exists( tmp_dir ): | |
| 23 shutil.rmtree( tmp_dir ) | |
| 24 | |
| 25 def _create_config(args, config_path): | |
| 26 conf_file = open(config_path, "w") | |
| 27 conf_file.write("[user]\n") | |
| 28 for option in args: | |
| 29 if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None: | |
| 30 conf_file.write("%s=%s\n" % (option, args[option])) | |
| 31 conf_file.close() | |
| 32 | |
| 33 def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error): | |
| 34 stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name | |
| 35 proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) ) | |
| 36 return_code = proc.wait() | |
| 37 if return_code: | |
| 38 for line in open( stderr_name ): | |
| 39 print(line, file=sys.stderr) | |
| 40 os.unlink( stderr_name ) #clean up | |
| 41 cleanup_before_exit( tmp_dir ) | |
| 42 raise Exception( msg_error ) | |
| 43 else: | |
| 44 os.unlink( stderr_name ) | |
| 45 | |
| 46 def index_bam_files( bam_filenames, tmp_dir ): | |
| 47 for bam_filename in bam_filenames: | |
| 48 bam_index_filename = "%s.bai" % bam_filename | |
| 49 print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename)) | |
| 50 if not os.path.exists( bam_index_filename ): | |
| 51 #need to index this bam file | |
| 52 command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) | |
| 53 my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename) | |
| 54 | |
| 55 def index_fasta_files( fasta_filenames, tmp_dir ): | |
| 56 for fasta_filename in fasta_filenames: | |
| 57 fasta_index_filename = "%s.fai" % fasta_filename | |
| 58 print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename)) | |
| 59 if not os.path.exists( fasta_index_filename ): | |
| 60 #need to index this bam file | |
| 61 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) | |
| 62 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) | |
| 63 | |
| 64 def __main__(): | |
| 65 #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD | |
| 66 #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD | |
| 67 | |
| 68 root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2" | |
| 69 expected_dir="for_tests" | |
| 70 job_dir=os.getcwd() | |
| 71 analysis_dir=job_dir + "/StrelkaAnalysis" | |
| 72 config_script=root_dir + "/configureStrelkaWorkflow.pl" | |
| 73 tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) | |
| 74 config_ini = "%s/config.ini" % (tmp_dir) | |
| 75 | |
| 76 print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) | |
| 77 | |
| 78 #manage parsing | |
| 79 parser = argparse.ArgumentParser() | |
| 80 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) | |
| 81 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) | |
| 82 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) | |
| 83 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) | |
| 84 parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False ) | |
| 85 parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False ) | |
| 86 parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False ) | |
| 87 parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False ) | |
| 88 parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False ) | |
| 89 parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False ) | |
| 90 parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False ) | |
| 91 parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False ) | |
| 92 parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False ) | |
| 93 parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False ) | |
| 94 parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False ) | |
| 95 parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False ) | |
| 96 parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False ) | |
| 97 parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False ) | |
| 98 parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False ) | |
| 99 parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False ) | |
| 100 parser.add_argument( '--binSize', help='path to tumor bam file', required = False ) | |
| 101 parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False ) | |
| 102 parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False ) | |
| 103 parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False ) | |
| 104 args = parser.parse_args() | |
| 105 | |
| 106 #verifying eveything's ok | |
| 107 if not os.path.isfile(config_script): | |
| 108 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") | |
| 109 print("configuring...", file=sys.stdout) | |
| 110 if os.path.exists(analysis_dir): | |
| 111 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") | |
| 112 | |
| 113 | |
| 114 # creating index if needed | |
| 115 os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] | |
| 116 bam_filenames = [ args.tumorBam, args.normalBam ] | |
| 117 index_bam_files( bam_filenames, tmp_dir ) | |
| 118 fasta_files = [ args.refFile ] | |
| 119 index_fasta_files( fasta_files, tmp_dir ) | |
| 120 | |
| 121 #creating config file if needed | |
| 122 if args.configFile == "Custom": | |
| 123 _create_config(vars(args), config_ini) | |
| 124 elif args.configFile == "Default": | |
| 125 cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini) | |
| 126 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | |
| 127 else: | |
| 128 if not os.path.exists(args.configFile): | |
| 129 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) | |
| 130 cmdbash="cp %s %s" % (args.configFile, config_ini) | |
| 131 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | |
| 132 | |
| 133 | |
| 134 | |
| 135 | |
| 136 #configuration of workflow | |
| 137 cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir) | |
| 138 print( "**** Starting configuration.") | |
| 139 print( "**** Configuration cmd: '" + cmd + "'") | |
| 140 my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !") | |
| 141 print("completed configuration") | |
| 142 | |
| 143 #run the workflow ! | |
| 144 cmd="make -C " + analysis_dir | |
| 145 print("**** starting workflow.") | |
| 146 print("**** workflow cmd: '" + cmd + "'") | |
| 147 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") | |
| 148 print("**** completed workflow execution") | |
| 149 | |
| 150 | |
| 151 | |
| 152 | |
| 153 | |
| 154 | |
| 155 | |
| 156 | |
| 157 | |
| 158 | |
| 159 | |
| 160 | |
| 161 | |
| 162 | |
| 163 #bam_filenames = [] | |
| 164 # if options.datasets: | |
| 165 # for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: | |
| 166 # gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function | |
| 167 # if dataset_arg: | |
| 168 # cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) | |
| 169 # if galaxy_ext == "bam": | |
| 170 # bam_filenames.append( gatk_filename ) | |
| 171 # #set up stdout and stderr output options | |
| 172 # stdout = open_file_from_option( options.stdout, mode = 'wb' ) | |
| 173 # stderr = open_file_from_option( options.stderr, mode = 'wb' ) | |
| 174 # #if no stderr file is specified, we'll use our own | |
| 175 # if stderr is None: | |
| 176 # stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) | |
| 177 # | |
| 178 # proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) | |
| 179 # return_code = proc.wait() | |
| 180 # | |
| 181 # if return_code: | |
| 182 # stderr_target = sys.stderr | |
| 183 # else: | |
| 184 # stderr_target = sys.stdout | |
| 185 # stderr.flush() | |
| 186 # stderr.seek(0) | |
| 187 # while True: | |
| 188 # chunk = stderr.read( CHUNK_SIZE ) | |
| 189 # if chunk: | |
| 190 # stderr_target.write( chunk ) | |
| 191 # else: | |
| 192 # break | |
| 193 # stderr.close() | |
| 194 # #generate html reports | |
| 195 # if options.html_report_from_directory: | |
| 196 # for ( html_filename, html_dir ) in options.html_report_from_directory: | |
| 197 # html_report_from_directory( open( html_filename, 'wb' ), html_dir ) | |
| 198 # | |
| 199 # cleanup_before_exit( tmp_dir ) | |
| 200 | |
| 201 if __name__=="__main__": __main__() |
