Mercurial > repos > mini > strelka
comparison strelka_wrapper.py @ 10:f66f23fcc83a
Modified layout
author | mini |
---|---|
date | Tue, 30 Sep 2014 11:08:39 +0200 |
parents | 2cee4ed50058 |
children | 137e05f24336 |
comparison
equal
deleted
inserted
replaced
8:2cee4ed50058 | 10:f66f23fcc83a |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #Dan Blankenberg | 2 #Gregoire Seguin-Henry (Engineer IT) |
3 #Amine Sbitti (Data Scientist) | |
4 #Ludovic Marie-Sainte (Project Manager) | |
5 #For Geviteam 2014 | |
3 | 6 |
4 """ | 7 """ |
5 A wrapper script for running the GenomeAnalysisTK.jar commands. | 8 A wrapper script for running the GenomeAnalysisTK.jar commands. |
6 """ | 9 """ |
7 | 10 |
8 from __future__ import print_function | 11 from __future__ import print_function |
9 import sys, argparse, os, tempfile, subprocess, shutil | 12 import sys, argparse, os, tempfile, subprocess, shutil |
10 from binascii import unhexlify | 13 from binascii import unhexlify |
11 from string import Template | 14 from string import Template |
12 from galaxy import eggs | 15 from galaxy import eggs |
13 #import pkg_resources; pkg_resources.require( "bx-python" ) | |
14 | 16 |
15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is | |
16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed | |
17 #DEFAULT_GATK_PREFIX = "gatk_file" | |
18 #CHUNK_SIZE = 2**20 #1mb | |
19 # | |
20 # | |
21 def cleanup_before_exit( tmp_dir ): | 17 def cleanup_before_exit( tmp_dir ): |
22 if tmp_dir and os.path.exists( tmp_dir ): | 18 if tmp_dir and os.path.exists( tmp_dir ): |
23 shutil.rmtree( tmp_dir ) | 19 shutil.rmtree( tmp_dir ) |
24 | 20 |
25 def _create_config(args, config_path): | 21 def _create_config(args, config_path): |
60 #need to index this bam file | 56 #need to index this bam file |
61 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) | 57 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) |
62 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) | 58 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) |
63 | 59 |
64 def __main__(): | 60 def __main__(): |
65 #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD | 61 |
66 #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD | 62 #Manage options |
67 #manage parsing | |
68 parser = argparse.ArgumentParser() | 63 parser = argparse.ArgumentParser() |
69 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) | 64 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) |
70 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) | 65 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) |
71 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) | 66 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) |
72 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) | 67 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) |
96 root_dir= args.scriptPath | 91 root_dir= args.scriptPath |
97 expected_dir="for_tests" | 92 expected_dir="for_tests" |
98 job_dir=os.getcwd() | 93 job_dir=os.getcwd() |
99 analysis_dir=job_dir + "/StrelkaAnalysis" | 94 analysis_dir=job_dir + "/StrelkaAnalysis" |
100 config_script=root_dir + "/configureStrelkaWorkflow.pl" | 95 config_script=root_dir + "/configureStrelkaWorkflow.pl" |
101 tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) | 96 tmp_dir = tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) |
102 config_ini = "%s/config.ini" % (tmp_dir) | 97 config_ini = "%s/config.ini" % (tmp_dir) |
103 | 98 |
104 print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) | 99 #print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) |
105 | 100 |
106 | 101 |
107 #verifying eveything's ok | 102 #verifying eveything's ok |
108 if not os.path.isfile(config_script): | 103 if not os.path.isfile(config_script): |
109 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") | 104 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") |
111 if os.path.exists(analysis_dir): | 106 if os.path.exists(analysis_dir): |
112 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") | 107 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") |
113 | 108 |
114 | 109 |
115 # creating index if needed | 110 # creating index if needed |
116 os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] | |
117 bam_filenames = [ args.tumorBam, args.normalBam ] | 111 bam_filenames = [ args.tumorBam, args.normalBam ] |
118 index_bam_files( bam_filenames, tmp_dir ) | 112 index_bam_files( bam_filenames, tmp_dir ) |
119 fasta_files = [ args.refFile ] | 113 fasta_files = [ args.refFile ] |
120 index_fasta_files( fasta_files, tmp_dir ) | 114 index_fasta_files( fasta_files, tmp_dir ) |
121 | 115 |
127 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | 121 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") |
128 else: | 122 else: |
129 if not os.path.exists(args.configFile): | 123 if not os.path.exists(args.configFile): |
130 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) | 124 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) |
131 cmdbash="cp %s %s" % (args.configFile, config_ini) | 125 cmdbash="cp %s %s" % (args.configFile, config_ini) |
132 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | 126 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of the selected config file") |
133 | 127 |
134 | 128 |
135 | 129 |
136 | 130 |
137 #configuration of workflow | 131 #configuration of workflow |
146 print("**** starting workflow.") | 140 print("**** starting workflow.") |
147 print("**** workflow cmd: '" + cmd + "'") | 141 print("**** workflow cmd: '" + cmd + "'") |
148 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") | 142 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") |
149 print("**** completed workflow execution") | 143 print("**** completed workflow execution") |
150 | 144 |
151 | |
152 | |
153 | |
154 | |
155 | |
156 | |
157 | |
158 | |
159 | |
160 | |
161 | |
162 | |
163 | |
164 #bam_filenames = [] | |
165 # if options.datasets: | |
166 # for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: | |
167 # gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function | |
168 # if dataset_arg: | |
169 # cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) | |
170 # if galaxy_ext == "bam": | |
171 # bam_filenames.append( gatk_filename ) | |
172 # #set up stdout and stderr output options | |
173 # stdout = open_file_from_option( options.stdout, mode = 'wb' ) | |
174 # stderr = open_file_from_option( options.stderr, mode = 'wb' ) | |
175 # #if no stderr file is specified, we'll use our own | |
176 # if stderr is None: | |
177 # stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) | |
178 # | |
179 # proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) | |
180 # return_code = proc.wait() | |
181 # | |
182 # if return_code: | |
183 # stderr_target = sys.stderr | |
184 # else: | |
185 # stderr_target = sys.stdout | |
186 # stderr.flush() | |
187 # stderr.seek(0) | |
188 # while True: | |
189 # chunk = stderr.read( CHUNK_SIZE ) | |
190 # if chunk: | |
191 # stderr_target.write( chunk ) | |
192 # else: | |
193 # break | |
194 # stderr.close() | |
195 # #generate html reports | |
196 # if options.html_report_from_directory: | |
197 # for ( html_filename, html_dir ) in options.html_report_from_directory: | |
198 # html_report_from_directory( open( html_filename, 'wb' ), html_dir ) | |
199 # | |
200 # cleanup_before_exit( tmp_dir ) | |
201 | |
202 if __name__=="__main__": __main__() |