| 
6
 | 
     1 #!/usr/bin/env python
 | 
| 
10
 | 
     2 #Gregoire Seguin-Henry (Engineer IT)
 | 
| 
 | 
     3 #Amine Sbitti (Data Scientist)
 | 
| 
 | 
     4 #Ludovic Marie-Sainte (Project Manager)
 | 
| 
 | 
     5 #For Geviteam 2014
 | 
| 
6
 | 
     6 
 | 
| 
 | 
     7 """
 | 
| 
 | 
     8 A wrapper script for running the GenomeAnalysisTK.jar commands.
 | 
| 
 | 
     9 """
 | 
| 
 | 
    10 from __future__ import print_function
 | 
| 
 | 
    11 import sys, argparse, os, tempfile, subprocess, shutil
 | 
| 
 | 
    12 from binascii import unhexlify
 | 
| 
 | 
    13 from string import Template
 | 
| 
 | 
    14 from galaxy import eggs
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 def cleanup_before_exit( tmp_dir ):
 | 
| 
 | 
    17     if tmp_dir and os.path.exists( tmp_dir ):
 | 
| 
 | 
    18         shutil.rmtree( tmp_dir )
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 def _create_config(args, config_path):
 | 
| 
 | 
    21     conf_file = open(config_path, "w")
 | 
| 
 | 
    22     conf_file.write("[user]\n")
 | 
| 
22
 | 
    23     args2 = vars(args)
 | 
| 
 | 
    24     for option in args2:
 | 
| 
 | 
    25         if not option in ["tumorBam", "normalBam", "refFile", "configFile", "scriptPath", "a", "b", "c", "d", "e", "extraStrelkaArguments"] and args2[option]!=None:
 | 
| 
 | 
    26 	    conf_file.write("%s=%s\n" % (option, args2[option]))
 | 
| 
 | 
    27     if args.extraStrelkaArguments == "yes":
 | 
| 
 | 
    28     	conf_file.write("extraStrelkaArguments=")
 | 
| 
 | 
    29    	if args.a:
 | 
| 
 | 
    30             conf_file.write("--ignore-conflicting-read-names ")
 | 
| 
 | 
    31     	if args.b != None:
 | 
| 
 | 
    32             conf_file.write("-used-allele-count-min-qscore %s " % (args.b))
 | 
| 
 | 
    33     	if args.c != None:
 | 
| 
 | 
    34             conf_file.write("--candidate-indel-input-vcf %s " % (args.c))
 | 
| 
 | 
    35     	if args.d != None:
 | 
| 
 | 
    36             conf_file.write("--force-output-vcf %s " % (args.d))
 | 
| 
 | 
    37     	if args.e != None:
 | 
| 
 | 
    38             conf_file.write("-min-small-candidate-indel-read-frac %s " % (args.e))
 | 
| 
 | 
    39     	conf_file.write("\n")
 | 
| 
6
 | 
    40     conf_file.close()
 | 
| 
 | 
    41 
 | 
| 
 | 
    42 def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error):
 | 
| 
 | 
    43     stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name
 | 
| 
 | 
    44     proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) )
 | 
| 
 | 
    45     return_code = proc.wait()                          
 | 
| 
 | 
    46     if return_code:
 | 
| 
 | 
    47 	for line in open( stderr_name ):
 | 
| 
 | 
    48            print(line, file=sys.stderr)
 | 
| 
 | 
    49 	os.unlink( stderr_name ) #clean up
 | 
| 
 | 
    50  	cleanup_before_exit( tmp_dir )
 | 
| 
 | 
    51  	raise Exception( msg_error )
 | 
| 
 | 
    52     else:
 | 
| 
 | 
    53         os.unlink( stderr_name )
 | 
| 
 | 
    54 
 | 
| 
 | 
    55 def index_bam_files( bam_filenames, tmp_dir ):
 | 
| 
 | 
    56     for bam_filename in bam_filenames:
 | 
| 
 | 
    57         bam_index_filename = "%s.bai" % bam_filename
 | 
| 
 | 
    58         print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename))
 | 
| 
 | 
    59         if not os.path.exists( bam_index_filename ):
 | 
| 
 | 
    60             #need to index this bam file
 | 
| 
 | 
    61             command = 'samtools index %s %s' % ( bam_filename, bam_index_filename )
 | 
| 
 | 
    62             my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename)
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 def index_fasta_files( fasta_filenames, tmp_dir ):
 | 
| 
 | 
    65     for fasta_filename in fasta_filenames:
 | 
| 
 | 
    66         fasta_index_filename = "%s.fai" % fasta_filename
 | 
| 
 | 
    67         print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename))
 | 
| 
 | 
    68         if not os.path.exists( fasta_index_filename ):
 | 
| 
 | 
    69             #need to index this bam file
 | 
| 
 | 
    70             command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename )
 | 
| 
 | 
    71             my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename)
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 def __main__():
 | 
| 
10
 | 
    74     #Manage options
 | 
| 
17
 | 
    75     print(os.environ['PATH'])
 | 
| 
6
 | 
    76     parser = argparse.ArgumentParser()                                             
 | 
| 
22
 | 
    77     parser.add_argument( '--tumorBam', help='path to tumor bam file', required = False )
 | 
| 
 | 
    78     parser.add_argument( '--normalBam', help='', required = False )   
 | 
| 
 | 
    79     parser.add_argument( '--refFile', help='', required = False )
 | 
| 
 | 
    80     parser.add_argument( '--configFile', help='', required = False )
 | 
| 
18
 | 
    81     parser.add_argument( '--depthFilterMultiple', help='', required = False )
 | 
| 
 | 
    82     parser.add_argument( '--snvMaxFilteredBasecallFrac', help='', required = False )
 | 
| 
 | 
    83     parser.add_argument( '--snvMaxSpanningDeletionFrac', help='', required = False )
 | 
| 
 | 
    84     parser.add_argument( '--indelMaxRefRepeat', help='', required = False )
 | 
| 
 | 
    85     parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='', required = False )
 | 
| 
 | 
    86     parser.add_argument( '--indelMaxIntHpolLength', help='', required = False )
 | 
| 
 | 
    87     parser.add_argument( '--ssnvPrior', help='', required = False )
 | 
| 
 | 
    88     parser.add_argument( '--sindelPrior', help='', required = False )
 | 
| 
 | 
    89     parser.add_argument( '--ssnvNoise', help='', required = False )
 | 
| 
 | 
    90     parser.add_argument( '--sindelNoise', help='', required = False )
 | 
| 
 | 
    91     parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='', required = False )
 | 
| 
 | 
    92     parser.add_argument( '--minTier1Mapq', help='', required = False )
 | 
| 
 | 
    93     parser.add_argument( '--minTier2Mapq', help='', required = False )
 | 
| 
 | 
    94     parser.add_argument( '--ssnvQuality_LowerBound', help='', required = False )
 | 
| 
 | 
    95     parser.add_argument( '--sindelQuality_LowerBound', help='', required = False )
 | 
| 
 | 
    96     parser.add_argument( '--isWriteRealignedBam', help='', required = False )
 | 
| 
6
 | 
    97     parser.add_argument( '--binSize', help='path to tumor bam file', required = False )
 | 
| 
18
 | 
    98     parser.add_argument( '--extraStrelkaArguments', help='', required = False )
 | 
| 
 | 
    99     parser.add_argument( '--isSkipDepthFilters', help='', required = False )
 | 
| 
 | 
   100     parser.add_argument( '--maxInputDepth', help='', required = False )
 | 
| 
 | 
   101     parser.add_argument( '--scriptPath', help='', required = False )
 | 
| 
22
 | 
   102     parser.add_argument( '-a', action="store_true", help='', required = False )
 | 
| 
 | 
   103     parser.add_argument( '-b', help='', required = False )
 | 
| 
 | 
   104     parser.add_argument( '-c', help='', required = False )
 | 
| 
 | 
   105     parser.add_argument( '-d', help='', required = False )
 | 
| 
 | 
   106     parser.add_argument( '-e', help='', required = False )
 | 
| 
6
 | 
   107     args = parser.parse_args()
 | 
| 
 | 
   108 
 | 
| 
8
 | 
   109     root_dir= args.scriptPath
 | 
| 
7
 | 
   110     expected_dir="for_tests"
 | 
| 
 | 
   111     job_dir=os.getcwd()
 | 
| 
 | 
   112     analysis_dir=job_dir + "/StrelkaAnalysis"
 | 
| 
 | 
   113     config_script=root_dir + "/configureStrelkaWorkflow.pl"
 | 
| 
9
 | 
   114     tmp_dir = tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' )
 | 
| 
7
 | 
   115     config_ini = "%s/config.ini" % (tmp_dir)
 | 
| 
 | 
   116 
 | 
| 
14
 | 
   117     print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" +  config_ini)
 | 
| 
7
 | 
   118 
 | 
| 
 | 
   119 
 | 
| 
6
 | 
   120     #verifying eveything's ok
 | 
| 
 | 
   121     if not os.path.isfile(config_script):
 | 
| 
 | 
   122     	sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'")
 | 
| 
 | 
   123     print("configuring...", file=sys.stdout)
 | 
| 
 | 
   124     if os.path.exists(analysis_dir):
 | 
| 
 | 
   125 	sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen")
 | 
| 
 | 
   126     
 | 
| 
 | 
   127 
 | 
| 
 | 
   128     # creating index if needed
 | 
| 
 | 
   129     bam_filenames = [ args.tumorBam, args.normalBam ]
 | 
| 
 | 
   130     index_bam_files( bam_filenames, tmp_dir )
 | 
| 
 | 
   131     fasta_files = [ args.refFile ]
 | 
| 
 | 
   132     index_fasta_files( fasta_files, tmp_dir )
 | 
| 
 | 
   133     
 | 
| 
 | 
   134     #creating config file if needed
 | 
| 
 | 
   135     if args.configFile == "Custom":
 | 
| 
22
 | 
   136     	_create_config(args, config_ini)
 | 
| 
18
 | 
   137     elif args.configFile in ["strelka_config_bwa_default.ini", "strelka_config_isaac_default.ini", "strelka_config_eland_default.ini"]:
 | 
| 
 | 
   138         cmdbash="cp %s %s" % (root_dir + "/lib/" + args.configFile, config_ini)
 | 
| 
6
 | 
   139         my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
 | 
| 
 | 
   140     else:
 | 
| 
 | 
   141     	if not os.path.exists(args.configFile):
 | 
| 
 | 
   142 	     print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr)
 | 
| 
 | 
   143     	cmdbash="cp %s %s" % (args.configFile, config_ini)
 | 
| 
9
 | 
   144         my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of the selected config file")
 | 
| 
6
 | 
   145 
 | 
| 
 | 
   146 
 | 
| 
 | 
   147 
 | 
| 
 | 
   148 
 | 
| 
 | 
   149     #configuration of workflow
 | 
| 
 | 
   150     cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir)
 | 
| 
 | 
   151     print( "**** Starting configuration.")
 | 
| 
 | 
   152     print( "**** Configuration cmd: '" + cmd + "'")
 | 
| 
 | 
   153     my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !")
 | 
| 
 | 
   154     print("completed configuration")
 | 
| 
 | 
   155     
 | 
| 
 | 
   156     #run the workflow !
 | 
| 
 | 
   157     cmd="make -C " + analysis_dir
 | 
| 
 | 
   158     print("**** starting workflow.")
 | 
| 
 | 
   159     print("**** workflow cmd: '" + cmd + "'")
 | 
| 
 | 
   160     my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !")   
 | 
| 
 | 
   161     print("**** completed workflow execution")
 | 
| 
17
 | 
   162     
 | 
| 
 | 
   163     cmdbash="cp %s %s" % (config_ini, analysis_dir + "/config.ini")
 | 
| 
 | 
   164     my_Popen(cmdbash, "copy_final_conf_file_err", tmp_dir, "Error during the copy of conf file after job is done, quite strange...")  
 | 
| 
 | 
   165 
 | 
| 
6
 | 
   166 
 | 
| 
14
 | 
   167 if __name__=='__main__':
 | 
| 
 | 
   168     __main__()
 |