Mercurial > repos > mini > strelka
comparison strelka_wrapper.py @ 10:f66f23fcc83a
Modified layout
| author | mini |
|---|---|
| date | Tue, 30 Sep 2014 11:08:39 +0200 |
| parents | 2cee4ed50058 |
| children | 137e05f24336 |
comparison
equal
deleted
inserted
replaced
| 8:2cee4ed50058 | 10:f66f23fcc83a |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 #Dan Blankenberg | 2 #Gregoire Seguin-Henry (Engineer IT) |
| 3 #Amine Sbitti (Data Scientist) | |
| 4 #Ludovic Marie-Sainte (Project Manager) | |
| 5 #For Geviteam 2014 | |
| 3 | 6 |
| 4 """ | 7 """ |
| 5 A wrapper script for running the GenomeAnalysisTK.jar commands. | 8 A wrapper script for running the GenomeAnalysisTK.jar commands. |
| 6 """ | 9 """ |
| 7 | 10 |
| 8 from __future__ import print_function | 11 from __future__ import print_function |
| 9 import sys, argparse, os, tempfile, subprocess, shutil | 12 import sys, argparse, os, tempfile, subprocess, shutil |
| 10 from binascii import unhexlify | 13 from binascii import unhexlify |
| 11 from string import Template | 14 from string import Template |
| 12 from galaxy import eggs | 15 from galaxy import eggs |
| 13 #import pkg_resources; pkg_resources.require( "bx-python" ) | |
| 14 | 16 |
| 15 #GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is | |
| 16 #GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed | |
| 17 #DEFAULT_GATK_PREFIX = "gatk_file" | |
| 18 #CHUNK_SIZE = 2**20 #1mb | |
| 19 # | |
| 20 # | |
| 21 def cleanup_before_exit( tmp_dir ): | 17 def cleanup_before_exit( tmp_dir ): |
| 22 if tmp_dir and os.path.exists( tmp_dir ): | 18 if tmp_dir and os.path.exists( tmp_dir ): |
| 23 shutil.rmtree( tmp_dir ) | 19 shutil.rmtree( tmp_dir ) |
| 24 | 20 |
| 25 def _create_config(args, config_path): | 21 def _create_config(args, config_path): |
| 60 #need to index this bam file | 56 #need to index this bam file |
| 61 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) | 57 command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) |
| 62 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) | 58 my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) |
| 63 | 59 |
| 64 def __main__(): | 60 def __main__(): |
| 65 #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD | 61 |
| 66 #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD | 62 #Manage options |
| 67 #manage parsing | |
| 68 parser = argparse.ArgumentParser() | 63 parser = argparse.ArgumentParser() |
| 69 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) | 64 parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) |
| 70 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) | 65 parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) |
| 71 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) | 66 parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) |
| 72 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) | 67 parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) |
| 96 root_dir= args.scriptPath | 91 root_dir= args.scriptPath |
| 97 expected_dir="for_tests" | 92 expected_dir="for_tests" |
| 98 job_dir=os.getcwd() | 93 job_dir=os.getcwd() |
| 99 analysis_dir=job_dir + "/StrelkaAnalysis" | 94 analysis_dir=job_dir + "/StrelkaAnalysis" |
| 100 config_script=root_dir + "/configureStrelkaWorkflow.pl" | 95 config_script=root_dir + "/configureStrelkaWorkflow.pl" |
| 101 tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) | 96 tmp_dir = tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) |
| 102 config_ini = "%s/config.ini" % (tmp_dir) | 97 config_ini = "%s/config.ini" % (tmp_dir) |
| 103 | 98 |
| 104 print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) | 99 #print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) |
| 105 | 100 |
| 106 | 101 |
| 107 #verifying eveything's ok | 102 #verifying eveything's ok |
| 108 if not os.path.isfile(config_script): | 103 if not os.path.isfile(config_script): |
| 109 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") | 104 sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") |
| 111 if os.path.exists(analysis_dir): | 106 if os.path.exists(analysis_dir): |
| 112 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") | 107 sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") |
| 113 | 108 |
| 114 | 109 |
| 115 # creating index if needed | 110 # creating index if needed |
| 116 os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] | |
| 117 bam_filenames = [ args.tumorBam, args.normalBam ] | 111 bam_filenames = [ args.tumorBam, args.normalBam ] |
| 118 index_bam_files( bam_filenames, tmp_dir ) | 112 index_bam_files( bam_filenames, tmp_dir ) |
| 119 fasta_files = [ args.refFile ] | 113 fasta_files = [ args.refFile ] |
| 120 index_fasta_files( fasta_files, tmp_dir ) | 114 index_fasta_files( fasta_files, tmp_dir ) |
| 121 | 115 |
| 127 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | 121 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") |
| 128 else: | 122 else: |
| 129 if not os.path.exists(args.configFile): | 123 if not os.path.exists(args.configFile): |
| 130 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) | 124 print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) |
| 131 cmdbash="cp %s %s" % (args.configFile, config_ini) | 125 cmdbash="cp %s %s" % (args.configFile, config_ini) |
| 132 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") | 126 my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of the selected config file") |
| 133 | 127 |
| 134 | 128 |
| 135 | 129 |
| 136 | 130 |
| 137 #configuration of workflow | 131 #configuration of workflow |
| 146 print("**** starting workflow.") | 140 print("**** starting workflow.") |
| 147 print("**** workflow cmd: '" + cmd + "'") | 141 print("**** workflow cmd: '" + cmd + "'") |
| 148 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") | 142 my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") |
| 149 print("**** completed workflow execution") | 143 print("**** completed workflow execution") |
| 150 | 144 |
| 151 | |
| 152 | |
| 153 | |
| 154 | |
| 155 | |
| 156 | |
| 157 | |
| 158 | |
| 159 | |
| 160 | |
| 161 | |
| 162 | |
| 163 | |
| 164 #bam_filenames = [] | |
| 165 # if options.datasets: | |
| 166 # for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: | |
| 167 # gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function | |
| 168 # if dataset_arg: | |
| 169 # cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) | |
| 170 # if galaxy_ext == "bam": | |
| 171 # bam_filenames.append( gatk_filename ) | |
| 172 # #set up stdout and stderr output options | |
| 173 # stdout = open_file_from_option( options.stdout, mode = 'wb' ) | |
| 174 # stderr = open_file_from_option( options.stderr, mode = 'wb' ) | |
| 175 # #if no stderr file is specified, we'll use our own | |
| 176 # if stderr is None: | |
| 177 # stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) | |
| 178 # | |
| 179 # proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) | |
| 180 # return_code = proc.wait() | |
| 181 # | |
| 182 # if return_code: | |
| 183 # stderr_target = sys.stderr | |
| 184 # else: | |
| 185 # stderr_target = sys.stdout | |
| 186 # stderr.flush() | |
| 187 # stderr.seek(0) | |
| 188 # while True: | |
| 189 # chunk = stderr.read( CHUNK_SIZE ) | |
| 190 # if chunk: | |
| 191 # stderr_target.write( chunk ) | |
| 192 # else: | |
| 193 # break | |
| 194 # stderr.close() | |
| 195 # #generate html reports | |
| 196 # if options.html_report_from_directory: | |
| 197 # for ( html_filename, html_dir ) in options.html_report_from_directory: | |
| 198 # html_report_from_directory( open( html_filename, 'wb' ), html_dir ) | |
| 199 # | |
| 200 # cleanup_before_exit( tmp_dir ) | |
| 201 | |
| 202 if __name__=="__main__": __main__() |
