comparison strelka_wrapper.py @ 10:f66f23fcc83a

Modified layout
author mini
date Tue, 30 Sep 2014 11:08:39 +0200
parents 2cee4ed50058
children 137e05f24336
comparison
equal deleted inserted replaced
8:2cee4ed50058 10:f66f23fcc83a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #Dan Blankenberg 2 #Gregoire Seguin-Henry (Engineer IT)
3 #Amine Sbitti (Data Scientist)
4 #Ludovic Marie-Sainte (Project Manager)
5 #For Geviteam 2014
3 6
4 """ 7 """
5 A wrapper script for running the GenomeAnalysisTK.jar commands. 8 A wrapper script for running the GenomeAnalysisTK.jar commands.
6 """ 9 """
7 10
8 from __future__ import print_function 11 from __future__ import print_function
9 import sys, argparse, os, tempfile, subprocess, shutil 12 import sys, argparse, os, tempfile, subprocess, shutil
10 from binascii import unhexlify 13 from binascii import unhexlify
11 from string import Template 14 from string import Template
12 from galaxy import eggs 15 from galaxy import eggs
13 #import pkg_resources; pkg_resources.require( "bx-python" )
14 16
15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is
16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
17 #DEFAULT_GATK_PREFIX = "gatk_file"
18 #CHUNK_SIZE = 2**20 #1mb
19 #
20 #
21 def cleanup_before_exit( tmp_dir ): 17 def cleanup_before_exit( tmp_dir ):
22 if tmp_dir and os.path.exists( tmp_dir ): 18 if tmp_dir and os.path.exists( tmp_dir ):
23 shutil.rmtree( tmp_dir ) 19 shutil.rmtree( tmp_dir )
24 20
25 def _create_config(args, config_path): 21 def _create_config(args, config_path):
60 #need to index this bam file 56 #need to index this bam file
61 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) 57 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename )
62 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) 58 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename)
63 59
64 def __main__(): 60 def __main__():
65 #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD 61
66 #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD 62 #Manage options
67 #manage parsing
68 parser = argparse.ArgumentParser() 63 parser = argparse.ArgumentParser()
69 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) 64 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False )
70 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) 65 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False )
71 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) 66 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False )
72 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) 67 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False )
96 root_dir= args.scriptPath 91 root_dir= args.scriptPath
97 expected_dir="for_tests" 92 expected_dir="for_tests"
98 job_dir=os.getcwd() 93 job_dir=os.getcwd()
99 analysis_dir=job_dir + "/StrelkaAnalysis" 94 analysis_dir=job_dir + "/StrelkaAnalysis"
100 config_script=root_dir + "/configureStrelkaWorkflow.pl" 95 config_script=root_dir + "/configureStrelkaWorkflow.pl"
101 tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) 96 tmp_dir = tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' )
102 config_ini = "%s/config.ini" % (tmp_dir) 97 config_ini = "%s/config.ini" % (tmp_dir)
103 98
104 print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) 99 #print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini)
105 100
106 101
107 #verifying eveything's ok 102 #verifying eveything's ok
108 if not os.path.isfile(config_script): 103 if not os.path.isfile(config_script):
109 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") 104 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'")
111 if os.path.exists(analysis_dir): 106 if os.path.exists(analysis_dir):
112 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") 107 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen")
113 108
114 109
115 # creating index if needed 110 # creating index if needed
116 os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH']
117 bam_filenames = [ args.tumorBam, args.normalBam ] 111 bam_filenames = [ args.tumorBam, args.normalBam ]
118 index_bam_files( bam_filenames, tmp_dir ) 112 index_bam_files( bam_filenames, tmp_dir )
119 fasta_files = [ args.refFile ] 113 fasta_files = [ args.refFile ]
120 index_fasta_files( fasta_files, tmp_dir ) 114 index_fasta_files( fasta_files, tmp_dir )
121 115
127 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") 121 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
128 else: 122 else:
129 if not os.path.exists(args.configFile): 123 if not os.path.exists(args.configFile):
130 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) 124 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr)
131 cmdbash="cp %s %s" % (args.configFile, config_ini) 125 cmdbash="cp %s %s" % (args.configFile, config_ini)
132 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") 126 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of the selected config file")
133 127
134 128
135 129
136 130
137 #configuration of workflow 131 #configuration of workflow
146 print("**** starting workflow.") 140 print("**** starting workflow.")
147 print("**** workflow cmd: '" + cmd + "'") 141 print("**** workflow cmd: '" + cmd + "'")
148 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") 142 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !")
149 print("**** completed workflow execution") 143 print("**** completed workflow execution")
150 144
151
152
153
154
155
156
157
158
159
160
161
162
163
164 #bam_filenames = []
165 # if options.datasets:
166 # for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets:
167 # gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function
168 # if dataset_arg:
169 # cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename )
170 # if galaxy_ext == "bam":
171 # bam_filenames.append( gatk_filename )
172 # #set up stdout and stderr output options
173 # stdout = open_file_from_option( options.stdout, mode = 'wb' )
174 # stderr = open_file_from_option( options.stderr, mode = 'wb' )
175 # #if no stderr file is specified, we'll use our own
176 # if stderr is None:
177 # stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir )
178 #
179 # proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
180 # return_code = proc.wait()
181 #
182 # if return_code:
183 # stderr_target = sys.stderr
184 # else:
185 # stderr_target = sys.stdout
186 # stderr.flush()
187 # stderr.seek(0)
188 # while True:
189 # chunk = stderr.read( CHUNK_SIZE )
190 # if chunk:
191 # stderr_target.write( chunk )
192 # else:
193 # break
194 # stderr.close()
195 # #generate html reports
196 # if options.html_report_from_directory:
197 # for ( html_filename, html_dir ) in options.html_report_from_directory:
198 # html_report_from_directory( open( html_filename, 'wb' ), html_dir )
199 #
200 # cleanup_before_exit( tmp_dir )
201
202 if __name__=="__main__": __main__()