| 0 | 1 #!/usr/bin/python | 
|  | 2 | 
|  | 3 # ----------------------------------------------------------------------# | 
|  | 4 # Copyright (c) 2011, Richard Lupat & Jason Li. | 
|  | 5 # | 
|  | 6 # > Source License < | 
|  | 7 # This file is part of CONTRA. | 
|  | 8 # | 
|  | 9 #    CONTRA is free software: you can redistribute it and/or modify | 
|  | 10 #    it under the terms of the GNU General Public License as published by | 
|  | 11 #    the Free Software Foundation, either version 3 of the License, or | 
|  | 12 #    (at your option) any later version. | 
|  | 13 # | 
|  | 14 #    CONTRA is distributed in the hope that it will be useful, | 
|  | 15 #    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 16 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 17 #    GNU General Public License for more details. | 
|  | 18 # | 
|  | 19 #    You should have received a copy of the GNU General Public License | 
|  | 20 #    along with CONTRA.  If not, see <http://www.gnu.org/licenses/>. | 
|  | 21 # | 
|  | 22 # | 
|  | 23 #-----------------------------------------------------------------------# | 
|  | 24 # Last Updated : 23 July 2012 16:43PM | 
|  | 25 | 
|  | 26 | 
|  | 27 import os | 
|  | 28 from optparse import OptionParser | 
|  | 29 import sys | 
|  | 30 import subprocess | 
|  | 31 import shlex | 
|  | 32 from multiprocessing import Process, Manager | 
|  | 33 | 
|  | 34 from scripts.assign_bin_number_v2 import * | 
|  | 35 from scripts.average_count import * | 
|  | 36 from scripts.cn_apply_threshold import * | 
|  | 37 from scripts.convert_gene_coordinate import * | 
|  | 38 from scripts.convert_targeted_regions import * | 
|  | 39 from scripts.split_chromosome import * | 
|  | 40 from scripts.vcf_out import * | 
|  | 41 from scripts.get_chr_length import * | 
|  | 42 from scripts.count_libsize import * | 
|  | 43 from scripts.target_breakdown import * | 
|  | 44 | 
|  | 45 #Absolute Path | 
|  | 46 scriptPath = os.path.realpath(os.path.dirname(sys.argv[0])) | 
|  | 47 | 
|  | 48 class Params: | 
|  | 49 	""" | 
|  | 50 	Class for top-level system parameters | 
|  | 51 	""" | 
|  | 52 | 
|  | 53 	def __init__(self): | 
|  | 54 		# command-line option definition | 
|  | 55 		self.parser = OptionParser() | 
|  | 56 		self.parser.add_option("-t", "--target", | 
|  | 57 			help="Target region definition file [REQUIRED] [BED Format]", | 
|  | 58 			action="store", type="string", dest="target") | 
|  | 59 		self.parser.add_option("-s", "--test", | 
|  | 60 			help="Alignment file for the test sample [REQUIRED] [BAM/SAM]", | 
|  | 61 			action="store", type="string", dest="test") | 
|  | 62 		self.parser.add_option("-c", "--control", | 
|  | 63 			help="Alignment file for the control sample [REQUIRED] [BAM/SAM]", | 
|  | 64 			action="store", type="string", dest="control") | 
|  | 65 		self.parser.add_option("-f", "--fasta", | 
|  | 66 			help="Reference Genome [REQUIRED][FASTA]", | 
|  | 67 			action="store", type="string", dest="fasta") | 
|  | 68 		self.parser.add_option("-o", "--outFolder", | 
|  | 69 			help="the output folder path name to store the output of analysis [REQUIRED]", | 
|  | 70 			action="store", type="string", dest="outFolder") | 
|  | 71 		self.parser.add_option("--numBin", | 
|  | 72 			help="Numbers of bins to group regions. User can specify multiple experiments with different number of bins (comma separated) [20]", | 
|  | 73 			action="store", type="string", dest="numBin", default="20") | 
|  | 74 		self.parser.add_option("--minReadDepth", | 
|  | 75 			help="The threshold for minimum read depth for each bases [10]", | 
|  | 76 			action="store", type="string", dest="minReadDepth", default=10) | 
|  | 77 		self.parser.add_option("--minNBases", | 
|  | 78 			help="The threshold for minimum number of bases for each target regions [10]", | 
|  | 79 			action="store", type="string", dest="minNBases", default= 10) | 
|  | 80 		self.parser.add_option("--sam", | 
|  | 81 			help="If the specified, test and control sample are in SAM [False]", | 
|  | 82 			action="store_true", dest="sam", default="False") | 
|  | 83 		self.parser.add_option("--bed", | 
|  | 84 			help="if specified, control will be in BED format [False]", | 
|  | 85 			action="store_true", dest = "bedInput", default="False") | 
|  | 86 		self.parser.add_option("--pval", | 
|  | 87 			help="The p-value threshold for filtering [0.05]. Applies to Adjusted P-Value.", | 
|  | 88 			action="store", type="string", dest="pval", default=0.05) | 
|  | 89 		self.parser.add_option("--sampleName", | 
|  | 90 			help ="The name to be appended to the front of default output name ['']", | 
|  | 91 			action="store", type="string", dest="sampleName", default='') | 
|  | 92 		self.parser.add_option("--nomultimapped", | 
|  | 93 			help="The option to remove multi-mapped reads [False]", | 
|  | 94 			action="store_true", dest="nomultimapped",default="False") | 
|  | 95 		self.parser.add_option("-p", "--plot", | 
|  | 96 			help="Plots log-ratio distribution for each bin [False]", | 
|  | 97 			action="store_true", dest="plot", default="False") | 
|  | 98 		self.parser.add_option("--minExon", | 
|  | 99 			help="Minimum number of Exons in one bin (if less than this, bin that contains small number of exons" | 
|  | 100 				+"will be moved to the adjacent bins) [2000] ", | 
|  | 101 			action="store", type="string", dest="minExon", default="2000") | 
|  | 102 		self.parser.add_option("--minControlRdForCall", | 
|  | 103 			help="Minimum control readdepth for call [5]", | 
|  | 104 			action="store", type="string", dest="minControl", default="5") | 
|  | 105 | 
|  | 106 		self.parser.add_option("--minTestRdForCall", | 
|  | 107 			help="Minimum test readdepth for call [0]", | 
|  | 108 			action="store", type="string", dest="minTest", default="0") | 
|  | 109 | 
|  | 110 		self.parser.add_option("--minAvgForCall", | 
|  | 111 			help="Minimum average coverage for call [20]", | 
|  | 112 			action="store", type="string", dest="minAvg", default="20") | 
|  | 113 | 
|  | 114 		self.parser.add_option("--maxRegionSize", | 
|  | 115 			help="Maximum Region Size in target region (for breaking large region into smaller region. By default, maxRegionSize 0 means no breakdown) [0]", | 
|  | 116 			action="store", type="string", dest="maxRegionSize", default="0") | 
|  | 117 | 
|  | 118 		self.parser.add_option("--targetRegionSize", | 
|  | 119 			help="Target Region Size for breakdown (if maxRegionSize is non zero) [200]", | 
|  | 120 			action="store", type="string", dest="targetRegionSize", default="200") | 
|  | 121 | 
|  | 122 		self.parser.add_option("-l", "--largeDeletion", | 
|  | 123                         help="if specified, CONTRA will run large deletion analysis (CBS). User must have DNAcopy R-library installed to run the analysis. [False]", | 
|  | 124                         action="store_true", dest = "large", default="False") | 
|  | 125 | 
|  | 126 		self.parser.add_option("--smallSegment", | 
|  | 127 			help="CBS segment size for calling large variations [1]", | 
|  | 128 			action="store", type="string", dest="smallSegment", default="1") | 
|  | 129 | 
|  | 130 		self.parser.add_option("--largeSegment", | 
|  | 131 			help="CBS segment size for calling large variations [25]", | 
|  | 132 			action="store", type="string", dest="largeSegment", default="25") | 
|  | 133 | 
|  | 134 		self.parser.add_option("--lrCallStart", | 
|  | 135 			help="Log ratios start range that will be used to call CNV [-0.3]", | 
|  | 136 			action="store", type="string", dest="lrs", default="-0.3") | 
|  | 137 | 
|  | 138 		self.parser.add_option("--lrCallEnd", | 
|  | 139 			help="Log ratios end range that will be used to call CNV [0.3]", | 
|  | 140 			action="store", type="string", dest="lre", default="0.3") | 
|  | 141 | 
|  | 142 		self.parser.add_option("--passSize", | 
|  | 143 			help="Size of exons that passed the p-value threshold compare to the original exon size [0.35]", | 
|  | 144 			action="store", type="string", dest="passSize", default="0.35") | 
|  | 145 | 
|  | 146 | 
|  | 147 		# required parameters list | 
|  | 148 		self.ERRORLIST = [] | 
|  | 149 | 
|  | 150 		# change system parameters based on any command line | 
|  | 151 		(options, args) = self.parser.parse_args() | 
|  | 152 		if options.target: | 
|  | 153 			self.TARGET = options.target | 
|  | 154 		else: | 
|  | 155 			#self.parser.print_help() | 
|  | 156 			#self.parser.error("--target not supplied") | 
|  | 157 			self.ERRORLIST.append("target") | 
|  | 158 | 
|  | 159 		if options.test: | 
|  | 160 			self.TEST = options.test | 
|  | 161 		else: | 
|  | 162 			#self.parser.error("--test not supplied") | 
|  | 163 			self.ERRORLIST.append("test") | 
|  | 164 | 
|  | 165 		if options.control: | 
|  | 166 			self.CONTROL = options.control | 
|  | 167 		else: | 
|  | 168 			#self.parser.error("--control not supplied") | 
|  | 169 			self.ERRORLIST.append("control") | 
|  | 170 | 
|  | 171 		if options.fasta: | 
|  | 172 			self.FASTA = options.fasta | 
|  | 173 		else: | 
|  | 174 			#self.parser.error("--fasta not supplied") | 
|  | 175 			self.ERRORLIST.append("fasta") | 
|  | 176 | 
|  | 177 		if options.outFolder: | 
|  | 178 			self.OUTFOLDER = options.outFolder | 
|  | 179 		else: | 
|  | 180 			#self.parser.error("--outFolder not supplied") | 
|  | 181 			self.ERRORLIST.append("outfolder") | 
|  | 182 | 
|  | 183 		if len(self.ERRORLIST) != 0: | 
|  | 184 			self.parser.print_help() | 
|  | 185 			self.parser.error("Missing required parameters") | 
|  | 186 | 
|  | 187 		if options.numBin: | 
|  | 188 			binsNumber = options.numBin.split(",") | 
|  | 189 			try: | 
|  | 190 				self.NUMBIN = [int(j) for j in binsNumber] | 
|  | 191 			except: | 
|  | 192 				self.NUMBIN = [20] | 
|  | 193 		if options.minReadDepth: | 
|  | 194 			self.MINREADDEPTH = int(options.minReadDepth) | 
|  | 195 		if options.minNBases: | 
|  | 196 			self.MINNBASES = int(options.minNBases) | 
|  | 197 		if options.sam: | 
|  | 198 			self.SAM = str(options.sam) | 
|  | 199 		if options.pval: | 
|  | 200 			self.PVAL = options.pval | 
|  | 201 		if options.sampleName: | 
|  | 202 			self.SAMPLENAME = options.sampleName | 
|  | 203 		else: | 
|  | 204 			self.SAMPLENAME = 'No-SampleName' | 
|  | 205 		if options.nomultimapped: | 
|  | 206 			self.NOMULTIMAPPED = str(options.nomultimapped) | 
|  | 207 		if options.plot: | 
|  | 208 			self.PLOT = str(options.plot) | 
|  | 209 		if options.bedInput: | 
|  | 210 			self.BEDINPUT = options.bedInput | 
|  | 211 		if options.minExon: | 
|  | 212 			self.MINEXON	= int(options.minExon) | 
|  | 213 		if options.minControl: | 
|  | 214 			self.MINCONTROL = options.minControl | 
|  | 215 		if options.minTest: | 
|  | 216 			self.MINTEST = options.minTest | 
|  | 217 		if options.minAvg: | 
|  | 218 			self.MINAVG  = options.minAvg | 
|  | 219 		if options.maxRegionSize: | 
|  | 220 			self.MAXREGIONSIZE = int(options.maxRegionSize) | 
|  | 221 		if options.targetRegionSize: | 
|  | 222 			self.TARGETREGIONSIZE = int(options.targetRegionSize) | 
|  | 223 		if options.large: | 
|  | 224 			self.LARGE	= str(options.large) | 
|  | 225 		if options.smallSegment: | 
|  | 226 			self.SMALLSEGMENT = options.smallSegment | 
|  | 227 		if options.largeSegment: | 
|  | 228 			self.LARGESEGMENT = options.largeSegment | 
|  | 229 		if options.lre: | 
|  | 230 			self.LRE	= options.lre | 
|  | 231 		if options.lrs: | 
|  | 232 			self.LRS	= options.lrs | 
|  | 233 		if options.passSize: | 
|  | 234 			self.PASSSIZE	= options.passSize | 
|  | 235 | 
|  | 236 	def repeat(self): | 
|  | 237 		# params test | 
|  | 238 		print "target		:", self.TARGET | 
|  | 239 		print "test		:", self.TEST | 
|  | 240 		print "control		:", self.CONTROL | 
|  | 241 		print "fasta		:", self.FASTA | 
|  | 242 		print "outfolder	:", self.OUTFOLDER | 
|  | 243 		print "numBin		:", self.NUMBIN | 
|  | 244 		print "minreaddepth	:", self.MINREADDEPTH | 
|  | 245 		print "minNBases	:", self.MINNBASES | 
|  | 246 		print "sam		:", self.SAM | 
|  | 247 		print "pval		:", self.PVAL | 
|  | 248 		print "sampleName	:", self.SAMPLENAME | 
|  | 249 		print "nomultimapped	:", self.NOMULTIMAPPED | 
|  | 250 		print "plot		:", self.PLOT | 
|  | 251 		print "bedInput		:", self.BEDINPUT | 
|  | 252 		print "minExon		:", self.MINEXON | 
|  | 253 		print "largeDeletion	:", self.LARGE | 
|  | 254 def checkOutputFolder(outF): | 
|  | 255 	print "Creating Output Folder :", | 
|  | 256 | 
|  | 257 	if outF[len(outF)-1] == "/": | 
|  | 258 		outF = outF[:len(outF)-1] | 
|  | 259 | 
|  | 260 	try: | 
|  | 261 		os.mkdir(outF) | 
|  | 262 	except: | 
|  | 263 		print "cannot create folder '%s'" %outF | 
|  | 264 		print "if folder already exist, please specify other folder" | 
|  | 265 		sys.exit(1) | 
|  | 266 | 
|  | 267 	try: | 
|  | 268 		os.mkdir(outF+"/table") | 
|  | 269 		os.mkdir(outF+"/plot") | 
|  | 270 		os.mkdir(outF+"/buf") | 
|  | 271 		os.mkdir(outF+"/buf/ctrData/") | 
|  | 272 		os.mkdir(outF+"/buf/testData/") | 
|  | 273 	except: | 
|  | 274 		print "[ERROR: CANNOT CREATE SUBFOLDERS]" | 
|  | 275 		sys.exit(1) | 
|  | 276 | 
|  | 277 	print " Done." | 
|  | 278 | 
|  | 279 	return outF | 
|  | 280 | 
|  | 281 | 
|  | 282 #BEDINPUT | 
|  | 283 def countTotalReads3(params, folder): | 
|  | 284 	tempFileName    = folder + "/temp.txt" | 
|  | 285         tempReadFile    = open(tempFileName, "w") | 
|  | 286 	libsize		= get_libsize(params.BEDINPUT) | 
|  | 287 	tempReadFile.write(libsize) | 
|  | 288         #tempReadFile.write(params.CONTROLREADCOUNT) | 
|  | 289         tempReadFile.close() | 
|  | 290 | 
|  | 291 | 
|  | 292 def countTotalReads(params, folder): | 
|  | 293 	if 'testData' in folder: | 
|  | 294 		inF = params.TEST | 
|  | 295 	else: | 
|  | 296 		inF = params.CONTROL | 
|  | 297 | 
|  | 298 	# Get Total ReadCount | 
|  | 299 	getreadcount = os.system("samtools view %s | wc -l > %s/temp.txt" %(inF,folder)) | 
|  | 300 | 
|  | 301 def samToBam(samfile, bamfile): | 
|  | 302 	args = shlex.split("samtools view -bS %s -o %s" %(samfile, bamfile)) | 
|  | 303 	samtobam = subprocess.call(args) | 
|  | 304 | 
|  | 305 	return bamfile | 
|  | 306 | 
|  | 307 def removeMultiMapped(inF, newBAM): | 
|  | 308         # Get New BAM Files with mapping quality > 0 | 
|  | 309         args = shlex.split("samtools view -bq 1 %s -o %s" %(inF, newBAM)) | 
|  | 310 	removeMM = subprocess.call(args) | 
|  | 311         print "Multi mapped reads removed. " | 
|  | 312 | 
|  | 313 | 
|  | 314 #BEDINPUT | 
|  | 315 def convertBamSimple(params, folder, targetList, genomeFile): | 
|  | 316 	if 'testData' in folder: | 
|  | 317                 inF = params.TEST | 
|  | 318                 print "Converting TEST Sample... " | 
|  | 319         else: | 
|  | 320                 inF = params.CONTROL | 
|  | 321                 print "Converting CONTROL Sample... " | 
|  | 322 | 
|  | 323 	#Copy file to working folder | 
|  | 324 	os.system("cp %s %s" %(inF, folder+"sample.BEDGRAPH")) | 
|  | 325 | 
|  | 326 	# Split Bedgraph by its chromosomes | 
|  | 327         splitByChromosome(folder) | 
|  | 328 | 
|  | 329         # Slice the coverage files to only cover the targeted regions | 
|  | 330         print "Getting targeted regions DOC..." | 
|  | 331         convertGeneCoordinate(targetList, folder) | 
|  | 332 | 
|  | 333 	# LIBSIZE | 
|  | 334 	libsize = str(get_libsize(folder+"geneRefCoverage2.txt")) | 
|  | 335 	tempLibSize = open(folder + "/temp.txt", "w") | 
|  | 336 	tempLibSize.write(libsize) | 
|  | 337 	tempLibSize.close() | 
|  | 338 | 
|  | 339 	print "Targeted regions pre-processing: Done" | 
|  | 340 | 
|  | 341 | 
|  | 342 def convertBam(params, folder, targetList, genomeFile): | 
|  | 343 	if 'testData' in folder: | 
|  | 344 		inF = params.TEST | 
|  | 345 		print "Converting TEST Sample... " | 
|  | 346 	else: | 
|  | 347 		inF = params.CONTROL | 
|  | 348 		print "Converting CONTROL Sample... " | 
|  | 349 | 
|  | 350 	# Convert BAM Files to BEDGRAPH | 
|  | 351 	bedgraph = folder + "sample.BEDGRAPH" | 
|  | 352 	args = shlex.split("genomeCoverageBed -ibam %s -bga -g %s" %(inF, genomeFile)) | 
|  | 353 	#output = subprocess.Popen(args, stdout = subprocess.PIPE).communicate()[0] | 
|  | 354 	iOutFile = open(bedgraph, "w") | 
|  | 355 	#iOutFile.write(output) | 
|  | 356 	output	= subprocess.Popen(args, stdout = iOutFile).wait() | 
|  | 357 	iOutFile.close() | 
|  | 358 | 
|  | 359 	# Split Bedgraph by its chromosomes | 
|  | 360 	splitByChromosome(folder) | 
|  | 361 | 
|  | 362 	# Slice the coverage files to only cover the targeted regions | 
|  | 363 	print "Getting targeted regions DOC..." | 
|  | 364 	convertGeneCoordinate(targetList, folder) | 
|  | 365 | 
|  | 366 	# LIBSIZE | 
|  | 367         libsize = str(get_libsize(folder+"geneRefCoverage2.txt")) | 
|  | 368 	tempLibSize = open(folder + "temp.txt", "w") | 
|  | 369 	tempLibSize.write(libsize) | 
|  | 370 	tempLibSize.close() | 
|  | 371 | 
|  | 372 	print "Targeted regions pre-processing: Done" | 
|  | 373 | 
|  | 374 def analysisPerBin(params, num_bin, outFolder, targetList): | 
|  | 375 	import shutil | 
|  | 376 | 
|  | 377 	bufLoc = outFolder + "/buf" | 
|  | 378 	# Assign bin number to the median and average file | 
|  | 379 	numBin = assignBin(num_bin, bufLoc+"/average.txt", bufLoc+"/bin", targetList, params.MINEXON) | 
|  | 380 | 
|  | 381 	#copy bin_boundary to plot folder | 
|  | 382 	#outBounFile = os.path.join(outFolder,  "plot", "bin_boundary"+str(num_bin)) | 
|  | 383 	#curBounFile = os.path.join(bufLoc, "bin" + str(num_bin) + ".boundaries.txt") | 
|  | 384 	#shutil.copy(curBounFile, outBounFile) | 
|  | 385 | 
|  | 386 | 
|  | 387 	print "Significance Test ...  " | 
|  | 388 	rScriptName = os.path.join(scriptPath, "scripts", "cn_analysis.v3.R") | 
|  | 389 	args = shlex.split("Rscript %s %s %s %s %s %s %s %s %s %s %s" | 
|  | 390 		%(rScriptName, num_bin, params.MINREADDEPTH, params.MINNBASES, outFolder, params.SAMPLENAME,params.PLOT, numBin, params.MINCONTROL, params.MINTEST, params.MINAVG)) | 
|  | 391 	rscr = subprocess.call(args) | 
|  | 392 | 
|  | 393 | 
|  | 394 	print "Generating Output Files ... " | 
|  | 395 	# Analysis of CNV | 
|  | 396 	tNameList = os.listdir(outFolder+"/table/") | 
|  | 397 	if num_bin > 1: | 
|  | 398 		tNameId = str(num_bin) + "bins" | 
|  | 399 	else: | 
|  | 400 		tNameId = str(num_bin) + "bin" | 
|  | 401         for tName in tNameList: | 
|  | 402 		if tNameId in tName: | 
|  | 403 			break | 
|  | 404 | 
|  | 405         if "CNATable" in tName: | 
|  | 406 		tName = tName[:len(tName)-4] | 
|  | 407 		tableName = outFolder + "/table/" + tName | 
|  | 408 		bufTable  = bufLoc + "/" + tName | 
|  | 409 		applyThreshold(tableName, bufTable, params.PVAL, 100000) #params.MAXGAP = 100000 | 
|  | 410 | 
|  | 411 		# Large Region CBS | 
|  | 412 		if (params.LARGE != "False"): | 
|  | 413 		  rScriptName2 = os.path.join(scriptPath, "scripts", "large_region_cbs.R") | 
|  | 414 | 
|  | 415 		  args = shlex.split("Rscript %s %s %s %s %s %s %s %s %s" | 
|  | 416 			%(rScriptName2, tableName+".txt", params.SMALLSEGMENT, params.LARGESEGMENT, params.PVAL, params.PASSSIZE, params.LRS, params.LRE, bufLoc)) | 
|  | 417 		  rscr2 = subprocess.call(args) | 
|  | 418 | 
|  | 419 		# Generate the DNA sequence (for VCF file) | 
|  | 420 		bedFile  = bufTable + ".BED" | 
|  | 421 		bedFasta = bufTable + ".fastaOut.txt" | 
|  | 422 		fastaFile = params.FASTA | 
|  | 423 		args = shlex.split("fastaFromBed -fi %s -bed %s -fo %s -name" | 
|  | 424 				%(fastaFile, bedFile, bedFasta)) | 
|  | 425 		fastaBED = subprocess.call(args) | 
|  | 426 | 
|  | 427 		# Write VCF | 
|  | 428 		print  "Creating VCF file ... " | 
|  | 429 		vcfFile = tableName + ".vcf" | 
|  | 430 		vcf_out(bedFasta, vcfFile) | 
|  | 431 | 
|  | 432 		print "%s created. " %(vcfFile) | 
|  | 433 | 
|  | 434         else: | 
|  | 435 		print "Table not found" | 
|  | 436 | 
|  | 437 def removeTempFolder(tempFolderPath): | 
|  | 438 	import shutil | 
|  | 439 | 
|  | 440 	shutil.rmtree(tempFolderPath) | 
|  | 441 | 
|  | 442 	print "Temp Folder Removed" | 
|  | 443 | 
|  | 444 | 
|  | 445 def main(): | 
|  | 446 	# option handling | 
|  | 447 	params = Params() | 
|  | 448 	params.repeat() | 
|  | 449 | 
|  | 450 	# output folder handling | 
|  | 451 	outFolder = checkOutputFolder(params.OUTFOLDER) | 
|  | 452 	bufLoc = outFolder + "/buf" | 
|  | 453 | 
|  | 454 	# convert target file | 
|  | 455 	sorted_target = os.path.join(bufLoc, "target.BED") | 
|  | 456 	os.system("sort -k1,1 -k2n %s > %s" %(params.TARGET, sorted_target)) | 
|  | 457 | 
|  | 458 	# target breakdown | 
|  | 459 	if params.MAXREGIONSIZE > 0: | 
|  | 460 		new_target = os.path.join(bufLoc, "target_breakdown.BED") | 
|  | 461 		target_breakdown(sorted_target, params.MAXREGIONSIZE, params.TARGETREGIONSIZE, new_target) | 
|  | 462 		sorted_target = new_target | 
|  | 463 | 
|  | 464 	targetList = convertTarget(sorted_target) | 
|  | 465 | 
|  | 466 	# convert sam to bam if -sam specified | 
|  | 467 	if (params.SAM == "True"): | 
|  | 468 		print "Pre-processing SAM files" | 
|  | 469 | 
|  | 470 		test_bam = bufLoc + "/test.BAM" | 
|  | 471 		ctr_bam  = bufLoc + "/control.BAM" | 
|  | 472 | 
|  | 473 		samTest = Process(target= samToBam, args=(params.TEST, test_bam)) | 
|  | 474 		if params.BEDINPUT == "False": | 
|  | 475 			samCtr = Process(target= samToBam, args=(params.CONTROL, ctr_bam)) | 
|  | 476 | 
|  | 477 		samTest.start() | 
|  | 478 		if params.BEDINPUT == "False": | 
|  | 479 			samCtr.start() | 
|  | 480 | 
|  | 481 		samTest.join() | 
|  | 482 		if params.BEDINPUT == "False": | 
|  | 483 			samCtr.join() | 
|  | 484 | 
|  | 485 		params.TEST = test_bam | 
|  | 486 		if params.BEDINPUT == "False": | 
|  | 487 			params.CONTROL = ctr_bam | 
|  | 488 | 
|  | 489 	# remove multi mapped reads if --nomultimapped is specified | 
|  | 490 	if (params.NOMULTIMAPPED == "True"): | 
|  | 491 		print "Removing multi-mapped reads" | 
|  | 492 | 
|  | 493 		test_bam = bufLoc + "/test_reliable.BAM" | 
|  | 494                 ctr_bam  = bufLoc + "/control_reliable.BAM" | 
|  | 495 | 
|  | 496                 bamTest = Process(target= removeMultiMapped, args=(params.TEST, test_bam)) | 
|  | 497 		if params.BEDINPUT == "False": | 
|  | 498                 	bamCtr = Process(target= removeMultiMapped, args=(params.CONTROL, ctr_bam)) | 
|  | 499 | 
|  | 500                 bamTest.start() | 
|  | 501 		if params.BEDINPUT == "False": | 
|  | 502 	                bamCtr.start() | 
|  | 503 | 
|  | 504                 bamTest.join() | 
|  | 505 		if params.BEDINPUT == "False": | 
|  | 506 	                bamCtr.join() | 
|  | 507 | 
|  | 508                 params.TEST = test_bam | 
|  | 509 		if params.BEDINPUT == "False": | 
|  | 510 	                params.CONTROL = ctr_bam | 
|  | 511 | 
|  | 512 	# Get Chromosome Length | 
|  | 513 	genomeFile = bufLoc + '/sample.Genome' | 
|  | 514 	get_genome(params.TEST, genomeFile) | 
|  | 515 | 
|  | 516 	# spawn bam converting scripts | 
|  | 517 	pTest = Process(target= convertBam, | 
|  | 518 			args=(params, bufLoc+'/testData/', targetList, genomeFile)) | 
|  | 519 | 
|  | 520 	#BEDINPUT | 
|  | 521 	if params.BEDINPUT == "False": | 
|  | 522 | 
|  | 523 		cTest = Process(target= convertBam, | 
|  | 524 			args=(params, bufLoc+'/ctrData/' , targetList, genomeFile)) | 
|  | 525 	else: | 
|  | 526 		cTest = Process(target= convertBamSimple, | 
|  | 527 			args=(params, bufLoc+'/ctrData/', targetList, genomeFile)) | 
|  | 528 	# start the processes | 
|  | 529 	pTest.start() | 
|  | 530 	cTest.start() | 
|  | 531 | 
|  | 532 	# wait for all the processes to finish before continuing | 
|  | 533 	pTest.join() | 
|  | 534 	cTest.join() | 
|  | 535 | 
|  | 536 	# Get the read depth count from temporary folder | 
|  | 537 	for folder in [bufLoc+'/testData/', bufLoc+'/ctrData/']: | 
|  | 538 		if 'testData' in folder: | 
|  | 539 			t1 = int(file.readlines(open(folder+"temp.txt"))[0].strip("\n")) | 
|  | 540 		else: | 
|  | 541 			n1 = int(file.readlines(open(folder+"temp.txt"))[0].strip("\n")) | 
|  | 542 	print "Test file read depth 	= ", t1 | 
|  | 543 	print "Control file read depth 	= ", n1 | 
|  | 544 	print "Pre-processing Completed. " | 
|  | 545 | 
|  | 546 	# Get the Average of the Log Ratio | 
|  | 547 	print "Getting the Log Ratio ... " | 
|  | 548 	testListName = bufLoc + '/testData/geneRefCoverage.txt' | 
|  | 549 	controlListName = bufLoc + '/ctrData/geneRefCoverage.txt' | 
|  | 550 	avOut = bufLoc + "/average.txt" | 
|  | 551 	averageCount(testListName, controlListName, avOut, t1, n1, params.MINREADDEPTH, params.MINNBASES) | 
|  | 552 | 
|  | 553 	# Analysis. [Bin, significance test, large deletion, vcf output] | 
|  | 554 	print "Binning ... " | 
|  | 555 	binProc = [] | 
|  | 556 	for numBin in params.NUMBIN: | 
|  | 557 		binProc.append(Process(target= analysisPerBin, args=(params,numBin,outFolder,targetList))) | 
|  | 558 | 
|  | 559 	for proc in binProc: | 
|  | 560 		proc.start() | 
|  | 561 | 
|  | 562 	for proc in binProc: | 
|  | 563 		proc.join() | 
|  | 564 | 
|  | 565 	# Removed Temp Folder | 
|  | 566 	removeTempFolder(bufLoc) | 
|  | 567 | 
|  | 568 if __name__ == "__main__": | 
|  | 569 	main() | 
|  | 570 	print "Done... " |