0
|
1 #!/usr/bin/python
|
|
2
|
|
3 # ----------------------------------------------------------------------#
|
|
4 # Copyright (c) 2011, Richard Lupat & Jason Li.
|
|
5 #
|
|
6 # > Source License <
|
|
7 # This file is part of CONTRA.
|
|
8 #
|
|
9 # CONTRA is free software: you can redistribute it and/or modify
|
|
10 # it under the terms of the GNU General Public License as published by
|
|
11 # the Free Software Foundation, either version 3 of the License, or
|
|
12 # (at your option) any later version.
|
|
13 #
|
|
14 # CONTRA is distributed in the hope that it will be useful,
|
|
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17 # GNU General Public License for more details.
|
|
18 #
|
|
19 # You should have received a copy of the GNU General Public License
|
|
20 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
|
|
21 #
|
|
22 #
|
|
23 #-----------------------------------------------------------------------#
|
|
24 # Last Updated : 23 July 2012 16:43PM
|
|
25
|
|
26
|
|
27 import os
|
|
28 from optparse import OptionParser
|
|
29 import sys
|
|
30 import subprocess
|
|
31 import shlex
|
|
32 from multiprocessing import Process, Manager
|
|
33
|
|
34 from scripts.assign_bin_number_v2 import *
|
|
35 from scripts.average_count import *
|
|
36 from scripts.cn_apply_threshold import *
|
|
37 from scripts.convert_gene_coordinate import *
|
|
38 from scripts.convert_targeted_regions import *
|
|
39 from scripts.split_chromosome import *
|
|
40 from scripts.vcf_out import *
|
|
41 from scripts.get_chr_length import *
|
|
42 from scripts.count_libsize import *
|
|
43 from scripts.target_breakdown import *
|
|
44
|
|
45 #Absolute Path
|
|
46 scriptPath = os.path.realpath(os.path.dirname(sys.argv[0]))
|
|
47
|
|
48 class Params:
|
|
49 """
|
|
50 Class for top-level system parameters
|
|
51 """
|
|
52
|
|
53 def __init__(self):
|
|
54 # command-line option definition
|
|
55 self.parser = OptionParser()
|
|
56 self.parser.add_option("-t", "--target",
|
|
57 help="Target region definition file [REQUIRED] [BED Format]",
|
|
58 action="store", type="string", dest="target")
|
|
59 self.parser.add_option("-s", "--test",
|
|
60 help="Alignment file for the test sample [REQUIRED] [BAM/SAM]",
|
|
61 action="store", type="string", dest="test")
|
|
62 self.parser.add_option("-c", "--control",
|
|
63 help="Alignment file for the control sample [REQUIRED] [BAM/SAM]",
|
|
64 action="store", type="string", dest="control")
|
|
65 self.parser.add_option("-f", "--fasta",
|
|
66 help="Reference Genome [REQUIRED][FASTA]",
|
|
67 action="store", type="string", dest="fasta")
|
|
68 self.parser.add_option("-o", "--outFolder",
|
|
69 help="the output folder path name to store the output of analysis [REQUIRED]",
|
|
70 action="store", type="string", dest="outFolder")
|
|
71 self.parser.add_option("--numBin",
|
|
72 help="Numbers of bins to group regions. User can specify multiple experiments with different number of bins (comma separated) [20]",
|
|
73 action="store", type="string", dest="numBin", default="20")
|
|
74 self.parser.add_option("--minReadDepth",
|
|
75 help="The threshold for minimum read depth for each bases [10]",
|
|
76 action="store", type="string", dest="minReadDepth", default=10)
|
|
77 self.parser.add_option("--minNBases",
|
|
78 help="The threshold for minimum number of bases for each target regions [10]",
|
|
79 action="store", type="string", dest="minNBases", default= 10)
|
|
80 self.parser.add_option("--sam",
|
|
81 help="If the specified, test and control sample are in SAM [False]",
|
|
82 action="store_true", dest="sam", default="False")
|
|
83 self.parser.add_option("--bed",
|
|
84 help="if specified, control will be in BED format [False]",
|
|
85 action="store_true", dest = "bedInput", default="False")
|
|
86 self.parser.add_option("--pval",
|
|
87 help="The p-value threshold for filtering [0.05]. Applies to Adjusted P-Value.",
|
|
88 action="store", type="string", dest="pval", default=0.05)
|
|
89 self.parser.add_option("--sampleName",
|
|
90 help ="The name to be appended to the front of default output name ['']",
|
|
91 action="store", type="string", dest="sampleName", default='')
|
|
92 self.parser.add_option("--nomultimapped",
|
|
93 help="The option to remove multi-mapped reads [False]",
|
|
94 action="store_true", dest="nomultimapped",default="False")
|
|
95 self.parser.add_option("-p", "--plot",
|
|
96 help="Plots log-ratio distribution for each bin [False]",
|
|
97 action="store_true", dest="plot", default="False")
|
|
98 self.parser.add_option("--minExon",
|
|
99 help="Minimum number of Exons in one bin (if less than this, bin that contains small number of exons"
|
|
100 +"will be moved to the adjacent bins) [2000] ",
|
|
101 action="store", type="string", dest="minExon", default="2000")
|
|
102 self.parser.add_option("--minControlRdForCall",
|
|
103 help="Minimum control readdepth for call [5]",
|
|
104 action="store", type="string", dest="minControl", default="5")
|
|
105
|
|
106 self.parser.add_option("--minTestRdForCall",
|
|
107 help="Minimum test readdepth for call [0]",
|
|
108 action="store", type="string", dest="minTest", default="0")
|
|
109
|
|
110 self.parser.add_option("--minAvgForCall",
|
|
111 help="Minimum average coverage for call [20]",
|
|
112 action="store", type="string", dest="minAvg", default="20")
|
|
113
|
|
114 self.parser.add_option("--maxRegionSize",
|
|
115 help="Maximum Region Size in target region (for breaking large region into smaller region. By default, maxRegionSize 0 means no breakdown) [0]",
|
|
116 action="store", type="string", dest="maxRegionSize", default="0")
|
|
117
|
|
118 self.parser.add_option("--targetRegionSize",
|
|
119 help="Target Region Size for breakdown (if maxRegionSize is non zero) [200]",
|
|
120 action="store", type="string", dest="targetRegionSize", default="200")
|
|
121
|
|
122 self.parser.add_option("-l", "--largeDeletion",
|
|
123 help="if specified, CONTRA will run large deletion analysis (CBS). User must have DNAcopy R-library installed to run the analysis. [False]",
|
|
124 action="store_true", dest = "large", default="False")
|
|
125
|
|
126 self.parser.add_option("--smallSegment",
|
|
127 help="CBS segment size for calling large variations [1]",
|
|
128 action="store", type="string", dest="smallSegment", default="1")
|
|
129
|
|
130 self.parser.add_option("--largeSegment",
|
|
131 help="CBS segment size for calling large variations [25]",
|
|
132 action="store", type="string", dest="largeSegment", default="25")
|
|
133
|
|
134 self.parser.add_option("--lrCallStart",
|
|
135 help="Log ratios start range that will be used to call CNV [-0.3]",
|
|
136 action="store", type="string", dest="lrs", default="-0.3")
|
|
137
|
|
138 self.parser.add_option("--lrCallEnd",
|
|
139 help="Log ratios end range that will be used to call CNV [0.3]",
|
|
140 action="store", type="string", dest="lre", default="0.3")
|
|
141
|
|
142 self.parser.add_option("--passSize",
|
|
143 help="Size of exons that passed the p-value threshold compare to the original exon size [0.35]",
|
|
144 action="store", type="string", dest="passSize", default="0.35")
|
|
145
|
|
146
|
|
147 # required parameters list
|
|
148 self.ERRORLIST = []
|
|
149
|
|
150 # change system parameters based on any command line
|
|
151 (options, args) = self.parser.parse_args()
|
|
152 if options.target:
|
|
153 self.TARGET = options.target
|
|
154 else:
|
|
155 #self.parser.print_help()
|
|
156 #self.parser.error("--target not supplied")
|
|
157 self.ERRORLIST.append("target")
|
|
158
|
|
159 if options.test:
|
|
160 self.TEST = options.test
|
|
161 else:
|
|
162 #self.parser.error("--test not supplied")
|
|
163 self.ERRORLIST.append("test")
|
|
164
|
|
165 if options.control:
|
|
166 self.CONTROL = options.control
|
|
167 else:
|
|
168 #self.parser.error("--control not supplied")
|
|
169 self.ERRORLIST.append("control")
|
|
170
|
|
171 if options.fasta:
|
|
172 self.FASTA = options.fasta
|
|
173 else:
|
|
174 #self.parser.error("--fasta not supplied")
|
|
175 self.ERRORLIST.append("fasta")
|
|
176
|
|
177 if options.outFolder:
|
|
178 self.OUTFOLDER = options.outFolder
|
|
179 else:
|
|
180 #self.parser.error("--outFolder not supplied")
|
|
181 self.ERRORLIST.append("outfolder")
|
|
182
|
|
183 if len(self.ERRORLIST) != 0:
|
|
184 self.parser.print_help()
|
|
185 self.parser.error("Missing required parameters")
|
|
186
|
|
187 if options.numBin:
|
|
188 binsNumber = options.numBin.split(",")
|
|
189 try:
|
|
190 self.NUMBIN = [int(j) for j in binsNumber]
|
|
191 except:
|
|
192 self.NUMBIN = [20]
|
|
193 if options.minReadDepth:
|
|
194 self.MINREADDEPTH = int(options.minReadDepth)
|
|
195 if options.minNBases:
|
|
196 self.MINNBASES = int(options.minNBases)
|
|
197 if options.sam:
|
|
198 self.SAM = str(options.sam)
|
|
199 if options.pval:
|
|
200 self.PVAL = options.pval
|
|
201 if options.sampleName:
|
|
202 self.SAMPLENAME = options.sampleName
|
|
203 else:
|
|
204 self.SAMPLENAME = 'No-SampleName'
|
|
205 if options.nomultimapped:
|
|
206 self.NOMULTIMAPPED = str(options.nomultimapped)
|
|
207 if options.plot:
|
|
208 self.PLOT = str(options.plot)
|
|
209 if options.bedInput:
|
|
210 self.BEDINPUT = options.bedInput
|
|
211 if options.minExon:
|
|
212 self.MINEXON = int(options.minExon)
|
|
213 if options.minControl:
|
|
214 self.MINCONTROL = options.minControl
|
|
215 if options.minTest:
|
|
216 self.MINTEST = options.minTest
|
|
217 if options.minAvg:
|
|
218 self.MINAVG = options.minAvg
|
|
219 if options.maxRegionSize:
|
|
220 self.MAXREGIONSIZE = int(options.maxRegionSize)
|
|
221 if options.targetRegionSize:
|
|
222 self.TARGETREGIONSIZE = int(options.targetRegionSize)
|
|
223 if options.large:
|
|
224 self.LARGE = str(options.large)
|
|
225 if options.smallSegment:
|
|
226 self.SMALLSEGMENT = options.smallSegment
|
|
227 if options.largeSegment:
|
|
228 self.LARGESEGMENT = options.largeSegment
|
|
229 if options.lre:
|
|
230 self.LRE = options.lre
|
|
231 if options.lrs:
|
|
232 self.LRS = options.lrs
|
|
233 if options.passSize:
|
|
234 self.PASSSIZE = options.passSize
|
|
235
|
|
236 def repeat(self):
|
|
237 # params test
|
|
238 print "target :", self.TARGET
|
|
239 print "test :", self.TEST
|
|
240 print "control :", self.CONTROL
|
|
241 print "fasta :", self.FASTA
|
|
242 print "outfolder :", self.OUTFOLDER
|
|
243 print "numBin :", self.NUMBIN
|
|
244 print "minreaddepth :", self.MINREADDEPTH
|
|
245 print "minNBases :", self.MINNBASES
|
|
246 print "sam :", self.SAM
|
|
247 print "pval :", self.PVAL
|
|
248 print "sampleName :", self.SAMPLENAME
|
|
249 print "nomultimapped :", self.NOMULTIMAPPED
|
|
250 print "plot :", self.PLOT
|
|
251 print "bedInput :", self.BEDINPUT
|
|
252 print "minExon :", self.MINEXON
|
|
253 print "largeDeletion :", self.LARGE
|
|
254 def checkOutputFolder(outF):
|
|
255 print "Creating Output Folder :",
|
|
256
|
|
257 if outF[len(outF)-1] == "/":
|
|
258 outF = outF[:len(outF)-1]
|
|
259
|
|
260 try:
|
|
261 os.mkdir(outF)
|
|
262 except:
|
|
263 print "cannot create folder '%s'" %outF
|
|
264 print "if folder already exist, please specify other folder"
|
|
265 sys.exit(1)
|
|
266
|
|
267 try:
|
|
268 os.mkdir(outF+"/table")
|
|
269 os.mkdir(outF+"/plot")
|
|
270 os.mkdir(outF+"/buf")
|
|
271 os.mkdir(outF+"/buf/ctrData/")
|
|
272 os.mkdir(outF+"/buf/testData/")
|
|
273 except:
|
|
274 print "[ERROR: CANNOT CREATE SUBFOLDERS]"
|
|
275 sys.exit(1)
|
|
276
|
|
277 print " Done."
|
|
278
|
|
279 return outF
|
|
280
|
|
281
|
|
282 #BEDINPUT
|
|
283 def countTotalReads3(params, folder):
|
|
284 tempFileName = folder + "/temp.txt"
|
|
285 tempReadFile = open(tempFileName, "w")
|
|
286 libsize = get_libsize(params.BEDINPUT)
|
|
287 tempReadFile.write(libsize)
|
|
288 #tempReadFile.write(params.CONTROLREADCOUNT)
|
|
289 tempReadFile.close()
|
|
290
|
|
291
|
|
292 def countTotalReads(params, folder):
|
|
293 if 'testData' in folder:
|
|
294 inF = params.TEST
|
|
295 else:
|
|
296 inF = params.CONTROL
|
|
297
|
|
298 # Get Total ReadCount
|
|
299 getreadcount = os.system("samtools view %s | wc -l > %s/temp.txt" %(inF,folder))
|
|
300
|
|
301 def samToBam(samfile, bamfile):
|
|
302 args = shlex.split("samtools view -bS %s -o %s" %(samfile, bamfile))
|
|
303 samtobam = subprocess.call(args)
|
|
304
|
|
305 return bamfile
|
|
306
|
|
307 def removeMultiMapped(inF, newBAM):
|
|
308 # Get New BAM Files with mapping quality > 0
|
|
309 args = shlex.split("samtools view -bq 1 %s -o %s" %(inF, newBAM))
|
|
310 removeMM = subprocess.call(args)
|
|
311 print "Multi mapped reads removed. "
|
|
312
|
|
313
|
|
314 #BEDINPUT
|
|
315 def convertBamSimple(params, folder, targetList, genomeFile):
|
|
316 if 'testData' in folder:
|
|
317 inF = params.TEST
|
|
318 print "Converting TEST Sample... "
|
|
319 else:
|
|
320 inF = params.CONTROL
|
|
321 print "Converting CONTROL Sample... "
|
|
322
|
|
323 #Copy file to working folder
|
|
324 os.system("cp %s %s" %(inF, folder+"sample.BEDGRAPH"))
|
|
325
|
|
326 # Split Bedgraph by its chromosomes
|
|
327 splitByChromosome(folder)
|
|
328
|
|
329 # Slice the coverage files to only cover the targeted regions
|
|
330 print "Getting targeted regions DOC..."
|
|
331 convertGeneCoordinate(targetList, folder)
|
|
332
|
|
333 # LIBSIZE
|
|
334 libsize = str(get_libsize(folder+"geneRefCoverage2.txt"))
|
|
335 tempLibSize = open(folder + "/temp.txt", "w")
|
|
336 tempLibSize.write(libsize)
|
|
337 tempLibSize.close()
|
|
338
|
|
339 print "Targeted regions pre-processing: Done"
|
|
340
|
|
341
|
|
342 def convertBam(params, folder, targetList, genomeFile):
|
|
343 if 'testData' in folder:
|
|
344 inF = params.TEST
|
|
345 print "Converting TEST Sample... "
|
|
346 else:
|
|
347 inF = params.CONTROL
|
|
348 print "Converting CONTROL Sample... "
|
|
349
|
|
350 # Convert BAM Files to BEDGRAPH
|
|
351 bedgraph = folder + "sample.BEDGRAPH"
|
|
352 args = shlex.split("genomeCoverageBed -ibam %s -bga -g %s" %(inF, genomeFile))
|
|
353 #output = subprocess.Popen(args, stdout = subprocess.PIPE).communicate()[0]
|
|
354 iOutFile = open(bedgraph, "w")
|
|
355 #iOutFile.write(output)
|
|
356 output = subprocess.Popen(args, stdout = iOutFile).wait()
|
|
357 iOutFile.close()
|
|
358
|
|
359 # Split Bedgraph by its chromosomes
|
|
360 splitByChromosome(folder)
|
|
361
|
|
362 # Slice the coverage files to only cover the targeted regions
|
|
363 print "Getting targeted regions DOC..."
|
|
364 convertGeneCoordinate(targetList, folder)
|
|
365
|
|
366 # LIBSIZE
|
|
367 libsize = str(get_libsize(folder+"geneRefCoverage2.txt"))
|
|
368 tempLibSize = open(folder + "temp.txt", "w")
|
|
369 tempLibSize.write(libsize)
|
|
370 tempLibSize.close()
|
|
371
|
|
372 print "Targeted regions pre-processing: Done"
|
|
373
|
|
374 def analysisPerBin(params, num_bin, outFolder, targetList):
|
|
375 import shutil
|
|
376
|
|
377 bufLoc = outFolder + "/buf"
|
|
378 # Assign bin number to the median and average file
|
|
379 numBin = assignBin(num_bin, bufLoc+"/average.txt", bufLoc+"/bin", targetList, params.MINEXON)
|
|
380
|
|
381 #copy bin_boundary to plot folder
|
|
382 #outBounFile = os.path.join(outFolder, "plot", "bin_boundary"+str(num_bin))
|
|
383 #curBounFile = os.path.join(bufLoc, "bin" + str(num_bin) + ".boundaries.txt")
|
|
384 #shutil.copy(curBounFile, outBounFile)
|
|
385
|
|
386
|
|
387 print "Significance Test ... "
|
|
388 rScriptName = os.path.join(scriptPath, "scripts", "cn_analysis.v3.R")
|
|
389 args = shlex.split("Rscript %s %s %s %s %s %s %s %s %s %s %s"
|
|
390 %(rScriptName, num_bin, params.MINREADDEPTH, params.MINNBASES, outFolder, params.SAMPLENAME,params.PLOT, numBin, params.MINCONTROL, params.MINTEST, params.MINAVG))
|
|
391 rscr = subprocess.call(args)
|
|
392
|
|
393
|
|
394 print "Generating Output Files ... "
|
|
395 # Analysis of CNV
|
|
396 tNameList = os.listdir(outFolder+"/table/")
|
|
397 if num_bin > 1:
|
|
398 tNameId = str(num_bin) + "bins"
|
|
399 else:
|
|
400 tNameId = str(num_bin) + "bin"
|
|
401 for tName in tNameList:
|
|
402 if tNameId in tName:
|
|
403 break
|
|
404
|
|
405 if "CNATable" in tName:
|
|
406 tName = tName[:len(tName)-4]
|
|
407 tableName = outFolder + "/table/" + tName
|
|
408 bufTable = bufLoc + "/" + tName
|
|
409 applyThreshold(tableName, bufTable, params.PVAL, 100000) #params.MAXGAP = 100000
|
|
410
|
|
411 # Large Region CBS
|
|
412 if (params.LARGE != "False"):
|
|
413 rScriptName2 = os.path.join(scriptPath, "scripts", "large_region_cbs.R")
|
|
414
|
|
415 args = shlex.split("Rscript %s %s %s %s %s %s %s %s %s"
|
|
416 %(rScriptName2, tableName+".txt", params.SMALLSEGMENT, params.LARGESEGMENT, params.PVAL, params.PASSSIZE, params.LRS, params.LRE, bufLoc))
|
|
417 rscr2 = subprocess.call(args)
|
|
418
|
|
419 # Generate the DNA sequence (for VCF file)
|
|
420 bedFile = bufTable + ".BED"
|
|
421 bedFasta = bufTable + ".fastaOut.txt"
|
|
422 fastaFile = params.FASTA
|
|
423 args = shlex.split("fastaFromBed -fi %s -bed %s -fo %s -name"
|
|
424 %(fastaFile, bedFile, bedFasta))
|
|
425 fastaBED = subprocess.call(args)
|
|
426
|
|
427 # Write VCF
|
|
428 print "Creating VCF file ... "
|
|
429 vcfFile = tableName + ".vcf"
|
|
430 vcf_out(bedFasta, vcfFile)
|
|
431
|
|
432 print "%s created. " %(vcfFile)
|
|
433
|
|
434 else:
|
|
435 print "Table not found"
|
|
436
|
|
437 def removeTempFolder(tempFolderPath):
|
|
438 import shutil
|
|
439
|
|
440 shutil.rmtree(tempFolderPath)
|
|
441
|
|
442 print "Temp Folder Removed"
|
|
443
|
|
444
|
|
445 def main():
|
|
446 # option handling
|
|
447 params = Params()
|
|
448 params.repeat()
|
|
449
|
|
450 # output folder handling
|
|
451 outFolder = checkOutputFolder(params.OUTFOLDER)
|
|
452 bufLoc = outFolder + "/buf"
|
|
453
|
|
454 # convert target file
|
|
455 sorted_target = os.path.join(bufLoc, "target.BED")
|
|
456 os.system("sort -k1,1 -k2n %s > %s" %(params.TARGET, sorted_target))
|
|
457
|
|
458 # target breakdown
|
|
459 if params.MAXREGIONSIZE > 0:
|
|
460 new_target = os.path.join(bufLoc, "target_breakdown.BED")
|
|
461 target_breakdown(sorted_target, params.MAXREGIONSIZE, params.TARGETREGIONSIZE, new_target)
|
|
462 sorted_target = new_target
|
|
463
|
|
464 targetList = convertTarget(sorted_target)
|
|
465
|
|
466 # convert sam to bam if -sam specified
|
|
467 if (params.SAM == "True"):
|
|
468 print "Pre-processing SAM files"
|
|
469
|
|
470 test_bam = bufLoc + "/test.BAM"
|
|
471 ctr_bam = bufLoc + "/control.BAM"
|
|
472
|
|
473 samTest = Process(target= samToBam, args=(params.TEST, test_bam))
|
|
474 if params.BEDINPUT == "False":
|
|
475 samCtr = Process(target= samToBam, args=(params.CONTROL, ctr_bam))
|
|
476
|
|
477 samTest.start()
|
|
478 if params.BEDINPUT == "False":
|
|
479 samCtr.start()
|
|
480
|
|
481 samTest.join()
|
|
482 if params.BEDINPUT == "False":
|
|
483 samCtr.join()
|
|
484
|
|
485 params.TEST = test_bam
|
|
486 if params.BEDINPUT == "False":
|
|
487 params.CONTROL = ctr_bam
|
|
488
|
|
489 # remove multi mapped reads if --nomultimapped is specified
|
|
490 if (params.NOMULTIMAPPED == "True"):
|
|
491 print "Removing multi-mapped reads"
|
|
492
|
|
493 test_bam = bufLoc + "/test_reliable.BAM"
|
|
494 ctr_bam = bufLoc + "/control_reliable.BAM"
|
|
495
|
|
496 bamTest = Process(target= removeMultiMapped, args=(params.TEST, test_bam))
|
|
497 if params.BEDINPUT == "False":
|
|
498 bamCtr = Process(target= removeMultiMapped, args=(params.CONTROL, ctr_bam))
|
|
499
|
|
500 bamTest.start()
|
|
501 if params.BEDINPUT == "False":
|
|
502 bamCtr.start()
|
|
503
|
|
504 bamTest.join()
|
|
505 if params.BEDINPUT == "False":
|
|
506 bamCtr.join()
|
|
507
|
|
508 params.TEST = test_bam
|
|
509 if params.BEDINPUT == "False":
|
|
510 params.CONTROL = ctr_bam
|
|
511
|
|
512 # Get Chromosome Length
|
|
513 genomeFile = bufLoc + '/sample.Genome'
|
|
514 get_genome(params.TEST, genomeFile)
|
|
515
|
|
516 # spawn bam converting scripts
|
|
517 pTest = Process(target= convertBam,
|
|
518 args=(params, bufLoc+'/testData/', targetList, genomeFile))
|
|
519
|
|
520 #BEDINPUT
|
|
521 if params.BEDINPUT == "False":
|
|
522
|
|
523 cTest = Process(target= convertBam,
|
|
524 args=(params, bufLoc+'/ctrData/' , targetList, genomeFile))
|
|
525 else:
|
|
526 cTest = Process(target= convertBamSimple,
|
|
527 args=(params, bufLoc+'/ctrData/', targetList, genomeFile))
|
|
528 # start the processes
|
|
529 pTest.start()
|
|
530 cTest.start()
|
|
531
|
|
532 # wait for all the processes to finish before continuing
|
|
533 pTest.join()
|
|
534 cTest.join()
|
|
535
|
|
536 # Get the read depth count from temporary folder
|
|
537 for folder in [bufLoc+'/testData/', bufLoc+'/ctrData/']:
|
|
538 if 'testData' in folder:
|
|
539 t1 = int(file.readlines(open(folder+"temp.txt"))[0].strip("\n"))
|
|
540 else:
|
|
541 n1 = int(file.readlines(open(folder+"temp.txt"))[0].strip("\n"))
|
|
542 print "Test file read depth = ", t1
|
|
543 print "Control file read depth = ", n1
|
|
544 print "Pre-processing Completed. "
|
|
545
|
|
546 # Get the Average of the Log Ratio
|
|
547 print "Getting the Log Ratio ... "
|
|
548 testListName = bufLoc + '/testData/geneRefCoverage.txt'
|
|
549 controlListName = bufLoc + '/ctrData/geneRefCoverage.txt'
|
|
550 avOut = bufLoc + "/average.txt"
|
|
551 averageCount(testListName, controlListName, avOut, t1, n1, params.MINREADDEPTH, params.MINNBASES)
|
|
552
|
|
553 # Analysis. [Bin, significance test, large deletion, vcf output]
|
|
554 print "Binning ... "
|
|
555 binProc = []
|
|
556 for numBin in params.NUMBIN:
|
|
557 binProc.append(Process(target= analysisPerBin, args=(params,numBin,outFolder,targetList)))
|
|
558
|
|
559 for proc in binProc:
|
|
560 proc.start()
|
|
561
|
|
562 for proc in binProc:
|
|
563 proc.join()
|
|
564
|
|
565 # Removed Temp Folder
|
|
566 removeTempFolder(bufLoc)
|
|
567
|
|
568 if __name__ == "__main__":
|
|
569 main()
|
|
570 print "Done... "
|