6
|
1 #!/usr/bin/env python
|
|
2 #Dan Blankenberg
|
|
3
|
|
4 """
|
|
5 A wrapper script for running the GenomeAnalysisTK.jar commands.
|
|
6 """
|
|
7
|
|
8 from __future__ import print_function
|
|
9 import sys, argparse, os, tempfile, subprocess, shutil
|
|
10 from binascii import unhexlify
|
|
11 from string import Template
|
|
12 from galaxy import eggs
|
|
13 #import pkg_resources; pkg_resources.require( "bx-python" )
|
|
14
|
|
15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is
|
|
16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
|
|
17 #DEFAULT_GATK_PREFIX = "gatk_file"
|
|
18 #CHUNK_SIZE = 2**20 #1mb
|
|
19 #
|
|
20 #
|
|
21 def cleanup_before_exit( tmp_dir ):
|
|
22 if tmp_dir and os.path.exists( tmp_dir ):
|
|
23 shutil.rmtree( tmp_dir )
|
|
24
|
|
25 def _create_config(args, config_path):
|
|
26 conf_file = open(config_path, "w")
|
|
27 conf_file.write("[user]\n")
|
|
28 for option in args:
|
7
|
29 if not option in ["tumorBam", "normalBam", "refFile", "configFile", "scriptPath"] and args[option]!=None:
|
6
|
30 conf_file.write("%s=%s\n" % (option, args[option]))
|
|
31 conf_file.close()
|
|
32
|
|
33 def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error):
|
|
34 stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name
|
|
35 proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) )
|
|
36 return_code = proc.wait()
|
|
37 if return_code:
|
|
38 for line in open( stderr_name ):
|
|
39 print(line, file=sys.stderr)
|
|
40 os.unlink( stderr_name ) #clean up
|
|
41 cleanup_before_exit( tmp_dir )
|
|
42 raise Exception( msg_error )
|
|
43 else:
|
|
44 os.unlink( stderr_name )
|
|
45
|
|
46 def index_bam_files( bam_filenames, tmp_dir ):
|
|
47 for bam_filename in bam_filenames:
|
|
48 bam_index_filename = "%s.bai" % bam_filename
|
|
49 print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename))
|
|
50 if not os.path.exists( bam_index_filename ):
|
|
51 #need to index this bam file
|
|
52 command = 'samtools index %s %s' % ( bam_filename, bam_index_filename )
|
|
53 my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename)
|
|
54
|
|
55 def index_fasta_files( fasta_filenames, tmp_dir ):
|
|
56 for fasta_filename in fasta_filenames:
|
|
57 fasta_index_filename = "%s.fai" % fasta_filename
|
|
58 print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename))
|
|
59 if not os.path.exists( fasta_index_filename ):
|
|
60 #need to index this bam file
|
|
61 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename )
|
|
62 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename)
|
|
63
|
|
64 def __main__():
|
|
65 #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD
|
|
66 #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD
|
|
67 #manage parsing
|
|
68 parser = argparse.ArgumentParser()
|
|
69 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False )
|
|
70 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False )
|
|
71 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False )
|
|
72 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False )
|
|
73 parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False )
|
|
74 parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False )
|
|
75 parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False )
|
|
76 parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False )
|
|
77 parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False )
|
|
78 parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False )
|
|
79 parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False )
|
|
80 parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False )
|
|
81 parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False )
|
|
82 parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False )
|
|
83 parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False )
|
|
84 parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False )
|
|
85 parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False )
|
|
86 parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False )
|
|
87 parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False )
|
|
88 parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False )
|
|
89 parser.add_argument( '--binSize', help='path to tumor bam file', required = False )
|
|
90 parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False )
|
|
91 parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False )
|
|
92 parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False )
|
7
|
93 parser.add_argument( '--scriptPath', help='path to tumor bam file', required = False )
|
6
|
94 args = parser.parse_args()
|
|
95
|
7
|
96 root_dir= SCRIPT_PATH
|
|
97 expected_dir="for_tests"
|
|
98 job_dir=os.getcwd()
|
|
99 analysis_dir=job_dir + "/StrelkaAnalysis"
|
|
100 config_script=root_dir + "/configureStrelkaWorkflow.pl"
|
|
101 tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' )
|
|
102 config_ini = "%s/config.ini" % (tmp_dir)
|
|
103
|
|
104 print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini)
|
|
105
|
|
106
|
6
|
107 #verifying eveything's ok
|
|
108 if not os.path.isfile(config_script):
|
|
109 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'")
|
|
110 print("configuring...", file=sys.stdout)
|
|
111 if os.path.exists(analysis_dir):
|
|
112 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen")
|
|
113
|
|
114
|
|
115 # creating index if needed
|
|
116 os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH']
|
|
117 bam_filenames = [ args.tumorBam, args.normalBam ]
|
|
118 index_bam_files( bam_filenames, tmp_dir )
|
|
119 fasta_files = [ args.refFile ]
|
|
120 index_fasta_files( fasta_files, tmp_dir )
|
|
121
|
|
122 #creating config file if needed
|
|
123 if args.configFile == "Custom":
|
|
124 _create_config(vars(args), config_ini)
|
|
125 elif args.configFile == "Default":
|
|
126 cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini)
|
|
127 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
|
|
128 else:
|
|
129 if not os.path.exists(args.configFile):
|
|
130 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr)
|
|
131 cmdbash="cp %s %s" % (args.configFile, config_ini)
|
|
132 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
|
|
133
|
|
134
|
|
135
|
|
136
|
|
137 #configuration of workflow
|
|
138 cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir)
|
|
139 print( "**** Starting configuration.")
|
|
140 print( "**** Configuration cmd: '" + cmd + "'")
|
|
141 my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !")
|
|
142 print("completed configuration")
|
|
143
|
|
144 #run the workflow !
|
|
145 cmd="make -C " + analysis_dir
|
|
146 print("**** starting workflow.")
|
|
147 print("**** workflow cmd: '" + cmd + "'")
|
|
148 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !")
|
|
149 print("**** completed workflow execution")
|
|
150
|
|
151
|
|
152
|
|
153
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
|
159
|
|
160
|
|
161
|
|
162
|
|
163
|
|
164 #bam_filenames = []
|
|
165 # if options.datasets:
|
|
166 # for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets:
|
|
167 # gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function
|
|
168 # if dataset_arg:
|
|
169 # cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename )
|
|
170 # if galaxy_ext == "bam":
|
|
171 # bam_filenames.append( gatk_filename )
|
|
172 # #set up stdout and stderr output options
|
|
173 # stdout = open_file_from_option( options.stdout, mode = 'wb' )
|
|
174 # stderr = open_file_from_option( options.stderr, mode = 'wb' )
|
|
175 # #if no stderr file is specified, we'll use our own
|
|
176 # if stderr is None:
|
|
177 # stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir )
|
|
178 #
|
|
179 # proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
|
|
180 # return_code = proc.wait()
|
|
181 #
|
|
182 # if return_code:
|
|
183 # stderr_target = sys.stderr
|
|
184 # else:
|
|
185 # stderr_target = sys.stdout
|
|
186 # stderr.flush()
|
|
187 # stderr.seek(0)
|
|
188 # while True:
|
|
189 # chunk = stderr.read( CHUNK_SIZE )
|
|
190 # if chunk:
|
|
191 # stderr_target.write( chunk )
|
|
192 # else:
|
|
193 # break
|
|
194 # stderr.close()
|
|
195 # #generate html reports
|
|
196 # if options.html_report_from_directory:
|
|
197 # for ( html_filename, html_dir ) in options.html_report_from_directory:
|
|
198 # html_report_from_directory( open( html_filename, 'wb' ), html_dir )
|
|
199 #
|
|
200 # cleanup_before_exit( tmp_dir )
|
|
201
|
|
202 if __name__=="__main__": __main__()
|