annotate Contra/contra.py @ 3:94362f37962e

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:43:53 -0400
parents 7564f3b1e675
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
1 #!/usr/bin/python
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
3 # ----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
4 # Copyright (c) 2011, Richard Lupat & Jason Li.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
5 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
6 # > Source License <
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
7 # This file is part of CONTRA.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
8 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
9 # CONTRA is free software: you can redistribute it and/or modify
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
10 # it under the terms of the GNU General Public License as published by
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
11 # the Free Software Foundation, either version 3 of the License, or
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
12 # (at your option) any later version.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
13 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
14 # CONTRA is distributed in the hope that it will be useful,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
17 # GNU General Public License for more details.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
18 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
19 # You should have received a copy of the GNU General Public License
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
20 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
21 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
22 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
23 #-----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
24 # Last Updated : 23 July 2012 16:43PM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
25
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
26
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
27 import os
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
28 from optparse import OptionParser
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
29 import sys
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
30 import subprocess
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
31 import shlex
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
32 from multiprocessing import Process, Manager
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
33
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
34 from scripts.assign_bin_number_v2 import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
35 from scripts.average_count import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
36 from scripts.cn_apply_threshold import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
37 from scripts.convert_gene_coordinate import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
38 from scripts.convert_targeted_regions import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
39 from scripts.split_chromosome import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
40 from scripts.vcf_out import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
41 from scripts.get_chr_length import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
42 from scripts.count_libsize import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
43 from scripts.target_breakdown import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
44
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
45 #Absolute Path
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
46 scriptPath = os.path.realpath(os.path.dirname(sys.argv[0]))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
47
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
48 class Params:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
49 """
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
50 Class for top-level system parameters
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
51 """
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
52
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
53 def __init__(self):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
54 # command-line option definition
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
55 self.parser = OptionParser()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
56 self.parser.add_option("-t", "--target",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
57 help="Target region definition file [REQUIRED] [BED Format]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
58 action="store", type="string", dest="target")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
59 self.parser.add_option("-s", "--test",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
60 help="Alignment file for the test sample [REQUIRED] [BAM/SAM]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
61 action="store", type="string", dest="test")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
62 self.parser.add_option("-c", "--control",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
63 help="Alignment file for the control sample [REQUIRED] [BAM/SAM]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
64 action="store", type="string", dest="control")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
65 self.parser.add_option("-f", "--fasta",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
66 help="Reference Genome [REQUIRED][FASTA]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
67 action="store", type="string", dest="fasta")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
68 self.parser.add_option("-o", "--outFolder",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
69 help="the output folder path name to store the output of analysis [REQUIRED]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
70 action="store", type="string", dest="outFolder")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
71 self.parser.add_option("--numBin",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
72 help="Numbers of bins to group regions. User can specify multiple experiments with different number of bins (comma separated) [20]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
73 action="store", type="string", dest="numBin", default="20")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
74 self.parser.add_option("--minReadDepth",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
75 help="The threshold for minimum read depth for each bases [10]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
76 action="store", type="string", dest="minReadDepth", default=10)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
77 self.parser.add_option("--minNBases",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
78 help="The threshold for minimum number of bases for each target regions [10]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
79 action="store", type="string", dest="minNBases", default= 10)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
80 self.parser.add_option("--sam",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
81 help="If the specified, test and control sample are in SAM [False]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
82 action="store_true", dest="sam", default="False")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
83 self.parser.add_option("--bed",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
84 help="if specified, control will be in BED format [False]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
85 action="store_true", dest = "bedInput", default="False")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
86 self.parser.add_option("--pval",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
87 help="The p-value threshold for filtering [0.05]. Applies to Adjusted P-Value.",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
88 action="store", type="string", dest="pval", default=0.05)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
89 self.parser.add_option("--sampleName",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
90 help ="The name to be appended to the front of default output name ['']",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
91 action="store", type="string", dest="sampleName", default='')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
92 self.parser.add_option("--nomultimapped",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
93 help="The option to remove multi-mapped reads [False]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
94 action="store_true", dest="nomultimapped",default="False")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
95 self.parser.add_option("-p", "--plot",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
96 help="Plots log-ratio distribution for each bin [False]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
97 action="store_true", dest="plot", default="False")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
98 self.parser.add_option("--minExon",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
99 help="Minimum number of Exons in one bin (if less than this, bin that contains small number of exons"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
100 +"will be moved to the adjacent bins) [2000] ",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
101 action="store", type="string", dest="minExon", default="2000")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
102 self.parser.add_option("--minControlRdForCall",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
103 help="Minimum control readdepth for call [5]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
104 action="store", type="string", dest="minControl", default="5")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
105
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
106 self.parser.add_option("--minTestRdForCall",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
107 help="Minimum test readdepth for call [0]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
108 action="store", type="string", dest="minTest", default="0")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
109
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
110 self.parser.add_option("--minAvgForCall",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
111 help="Minimum average coverage for call [20]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
112 action="store", type="string", dest="minAvg", default="20")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
113
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
114 self.parser.add_option("--maxRegionSize",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
115 help="Maximum Region Size in target region (for breaking large region into smaller region. By default, maxRegionSize 0 means no breakdown) [0]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
116 action="store", type="string", dest="maxRegionSize", default="0")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
117
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
118 self.parser.add_option("--targetRegionSize",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
119 help="Target Region Size for breakdown (if maxRegionSize is non zero) [200]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
120 action="store", type="string", dest="targetRegionSize", default="200")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
121
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
122 self.parser.add_option("-l", "--largeDeletion",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
123 help="if specified, CONTRA will run large deletion analysis (CBS). User must have DNAcopy R-library installed to run the analysis. [False]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
124 action="store_true", dest = "large", default="False")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
125
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
126 self.parser.add_option("--smallSegment",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
127 help="CBS segment size for calling large variations [1]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
128 action="store", type="string", dest="smallSegment", default="1")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
129
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
130 self.parser.add_option("--largeSegment",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
131 help="CBS segment size for calling large variations [25]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
132 action="store", type="string", dest="largeSegment", default="25")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
133
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
134 self.parser.add_option("--lrCallStart",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
135 help="Log ratios start range that will be used to call CNV [-0.3]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
136 action="store", type="string", dest="lrs", default="-0.3")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
137
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
138 self.parser.add_option("--lrCallEnd",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
139 help="Log ratios end range that will be used to call CNV [0.3]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
140 action="store", type="string", dest="lre", default="0.3")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
141
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
142 self.parser.add_option("--passSize",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
143 help="Size of exons that passed the p-value threshold compare to the original exon size [0.35]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
144 action="store", type="string", dest="passSize", default="0.35")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
145
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
146
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
147 # required parameters list
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
148 self.ERRORLIST = []
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
149
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
150 # change system parameters based on any command line
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
151 (options, args) = self.parser.parse_args()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
152 if options.target:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
153 self.TARGET = options.target
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
154 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
155 #self.parser.print_help()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
156 #self.parser.error("--target not supplied")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
157 self.ERRORLIST.append("target")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
158
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
159 if options.test:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
160 self.TEST = options.test
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
161 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
162 #self.parser.error("--test not supplied")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
163 self.ERRORLIST.append("test")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
164
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
165 if options.control:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
166 self.CONTROL = options.control
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
167 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
168 #self.parser.error("--control not supplied")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
169 self.ERRORLIST.append("control")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
170
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
171 if options.fasta:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
172 self.FASTA = options.fasta
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
173 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
174 #self.parser.error("--fasta not supplied")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
175 self.ERRORLIST.append("fasta")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
176
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
177 if options.outFolder:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
178 self.OUTFOLDER = options.outFolder
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
179 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
180 #self.parser.error("--outFolder not supplied")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
181 self.ERRORLIST.append("outfolder")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
182
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
183 if len(self.ERRORLIST) != 0:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
184 self.parser.print_help()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
185 self.parser.error("Missing required parameters")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
186
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
187 if options.numBin:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
188 binsNumber = options.numBin.split(",")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
189 try:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
190 self.NUMBIN = [int(j) for j in binsNumber]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
191 except:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
192 self.NUMBIN = [20]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
193 if options.minReadDepth:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
194 self.MINREADDEPTH = int(options.minReadDepth)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
195 if options.minNBases:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
196 self.MINNBASES = int(options.minNBases)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
197 if options.sam:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
198 self.SAM = str(options.sam)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
199 if options.pval:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
200 self.PVAL = options.pval
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
201 if options.sampleName:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
202 self.SAMPLENAME = options.sampleName
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
203 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
204 self.SAMPLENAME = 'No-SampleName'
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
205 if options.nomultimapped:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
206 self.NOMULTIMAPPED = str(options.nomultimapped)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
207 if options.plot:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
208 self.PLOT = str(options.plot)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
209 if options.bedInput:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
210 self.BEDINPUT = options.bedInput
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
211 if options.minExon:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
212 self.MINEXON = int(options.minExon)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
213 if options.minControl:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
214 self.MINCONTROL = options.minControl
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
215 if options.minTest:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
216 self.MINTEST = options.minTest
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
217 if options.minAvg:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
218 self.MINAVG = options.minAvg
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
219 if options.maxRegionSize:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
220 self.MAXREGIONSIZE = int(options.maxRegionSize)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
221 if options.targetRegionSize:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
222 self.TARGETREGIONSIZE = int(options.targetRegionSize)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
223 if options.large:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
224 self.LARGE = str(options.large)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
225 if options.smallSegment:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
226 self.SMALLSEGMENT = options.smallSegment
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
227 if options.largeSegment:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
228 self.LARGESEGMENT = options.largeSegment
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
229 if options.lre:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
230 self.LRE = options.lre
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
231 if options.lrs:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
232 self.LRS = options.lrs
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
233 if options.passSize:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
234 self.PASSSIZE = options.passSize
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
235
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
236 def repeat(self):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
237 # params test
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
238 print "target :", self.TARGET
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
239 print "test :", self.TEST
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
240 print "control :", self.CONTROL
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
241 print "fasta :", self.FASTA
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
242 print "outfolder :", self.OUTFOLDER
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
243 print "numBin :", self.NUMBIN
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
244 print "minreaddepth :", self.MINREADDEPTH
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
245 print "minNBases :", self.MINNBASES
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
246 print "sam :", self.SAM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
247 print "pval :", self.PVAL
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
248 print "sampleName :", self.SAMPLENAME
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
249 print "nomultimapped :", self.NOMULTIMAPPED
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
250 print "plot :", self.PLOT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
251 print "bedInput :", self.BEDINPUT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
252 print "minExon :", self.MINEXON
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
253 print "largeDeletion :", self.LARGE
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
254 def checkOutputFolder(outF):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
255 print "Creating Output Folder :",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
256
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
257 if outF[len(outF)-1] == "/":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
258 outF = outF[:len(outF)-1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
259
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
260 try:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
261 os.mkdir(outF)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
262 except:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
263 print "cannot create folder '%s'" %outF
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
264 print "if folder already exist, please specify other folder"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
265 sys.exit(1)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
266
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
267 try:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
268 os.mkdir(outF+"/table")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
269 os.mkdir(outF+"/plot")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
270 os.mkdir(outF+"/buf")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
271 os.mkdir(outF+"/buf/ctrData/")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
272 os.mkdir(outF+"/buf/testData/")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
273 except:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
274 print "[ERROR: CANNOT CREATE SUBFOLDERS]"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
275 sys.exit(1)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
276
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
277 print " Done."
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
278
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
279 return outF
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
280
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
281
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
282 #BEDINPUT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
283 def countTotalReads3(params, folder):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
284 tempFileName = folder + "/temp.txt"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
285 tempReadFile = open(tempFileName, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
286 libsize = get_libsize(params.BEDINPUT)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
287 tempReadFile.write(libsize)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
288 #tempReadFile.write(params.CONTROLREADCOUNT)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
289 tempReadFile.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
290
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
291
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
292 def countTotalReads(params, folder):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
293 if 'testData' in folder:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
294 inF = params.TEST
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
295 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
296 inF = params.CONTROL
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
297
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
298 # Get Total ReadCount
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
299 getreadcount = os.system("samtools view %s | wc -l > %s/temp.txt" %(inF,folder))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
300
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
301 def samToBam(samfile, bamfile):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
302 args = shlex.split("samtools view -bS %s -o %s" %(samfile, bamfile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
303 samtobam = subprocess.call(args)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
304
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
305 return bamfile
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
306
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
307 def removeMultiMapped(inF, newBAM):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
308 # Get New BAM Files with mapping quality > 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
309 args = shlex.split("samtools view -bq 1 %s -o %s" %(inF, newBAM))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
310 removeMM = subprocess.call(args)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
311 print "Multi mapped reads removed. "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
312
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
313
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
314 #BEDINPUT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
315 def convertBamSimple(params, folder, targetList, genomeFile):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
316 if 'testData' in folder:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
317 inF = params.TEST
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
318 print "Converting TEST Sample... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
319 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
320 inF = params.CONTROL
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
321 print "Converting CONTROL Sample... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
322
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
323 #Copy file to working folder
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
324 os.system("cp %s %s" %(inF, folder+"sample.BEDGRAPH"))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
325
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
326 # Split Bedgraph by its chromosomes
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
327 splitByChromosome(folder)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
328
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
329 # Slice the coverage files to only cover the targeted regions
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
330 print "Getting targeted regions DOC..."
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
331 convertGeneCoordinate(targetList, folder)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
332
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
333 # LIBSIZE
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
334 libsize = str(get_libsize(folder+"geneRefCoverage2.txt"))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
335 tempLibSize = open(folder + "/temp.txt", "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
336 tempLibSize.write(libsize)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
337 tempLibSize.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
338
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
339 print "Targeted regions pre-processing: Done"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
340
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
341
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
342 def convertBam(params, folder, targetList, genomeFile):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
343 if 'testData' in folder:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
344 inF = params.TEST
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
345 print "Converting TEST Sample... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
346 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
347 inF = params.CONTROL
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
348 print "Converting CONTROL Sample... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
349
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
350 # Convert BAM Files to BEDGRAPH
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
351 bedgraph = folder + "sample.BEDGRAPH"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
352 args = shlex.split("genomeCoverageBed -ibam %s -bga -g %s" %(inF, genomeFile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
353 #output = subprocess.Popen(args, stdout = subprocess.PIPE).communicate()[0]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
354 iOutFile = open(bedgraph, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
355 #iOutFile.write(output)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
356 output = subprocess.Popen(args, stdout = iOutFile).wait()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
357 iOutFile.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
358
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
359 # Split Bedgraph by its chromosomes
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
360 splitByChromosome(folder)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
361
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
362 # Slice the coverage files to only cover the targeted regions
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
363 print "Getting targeted regions DOC..."
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
364 convertGeneCoordinate(targetList, folder)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
365
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
366 # LIBSIZE
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
367 libsize = str(get_libsize(folder+"geneRefCoverage2.txt"))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
368 tempLibSize = open(folder + "temp.txt", "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
369 tempLibSize.write(libsize)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
370 tempLibSize.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
371
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
372 print "Targeted regions pre-processing: Done"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
373
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
374 def analysisPerBin(params, num_bin, outFolder, targetList):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
375 import shutil
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
376
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
377 bufLoc = outFolder + "/buf"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
378 # Assign bin number to the median and average file
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
379 numBin = assignBin(num_bin, bufLoc+"/average.txt", bufLoc+"/bin", targetList, params.MINEXON)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
380
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
381 #copy bin_boundary to plot folder
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
382 #outBounFile = os.path.join(outFolder, "plot", "bin_boundary"+str(num_bin))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
383 #curBounFile = os.path.join(bufLoc, "bin" + str(num_bin) + ".boundaries.txt")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
384 #shutil.copy(curBounFile, outBounFile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
385
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
386
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
387 print "Significance Test ... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
388 rScriptName = os.path.join(scriptPath, "scripts", "cn_analysis.v3.R")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
389 args = shlex.split("Rscript %s %s %s %s %s %s %s %s %s %s %s"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
390 %(rScriptName, num_bin, params.MINREADDEPTH, params.MINNBASES, outFolder, params.SAMPLENAME,params.PLOT, numBin, params.MINCONTROL, params.MINTEST, params.MINAVG))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
391 rscr = subprocess.call(args)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
392
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
393
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
394 print "Generating Output Files ... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
395 # Analysis of CNV
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
396 tNameList = os.listdir(outFolder+"/table/")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
397 if num_bin > 1:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
398 tNameId = str(num_bin) + "bins"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
399 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
400 tNameId = str(num_bin) + "bin"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
401 for tName in tNameList:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
402 if tNameId in tName:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
403 break
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
404
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
405 if "CNATable" in tName:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
406 tName = tName[:len(tName)-4]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
407 tableName = outFolder + "/table/" + tName
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
408 bufTable = bufLoc + "/" + tName
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
409 applyThreshold(tableName, bufTable, params.PVAL, 100000) #params.MAXGAP = 100000
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
410
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
411 # Large Region CBS
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
412 if (params.LARGE != "False"):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
413 rScriptName2 = os.path.join(scriptPath, "scripts", "large_region_cbs.R")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
414
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
415 args = shlex.split("Rscript %s %s %s %s %s %s %s %s %s"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
416 %(rScriptName2, tableName+".txt", params.SMALLSEGMENT, params.LARGESEGMENT, params.PVAL, params.PASSSIZE, params.LRS, params.LRE, bufLoc))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
417 rscr2 = subprocess.call(args)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
418
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
419 # Generate the DNA sequence (for VCF file)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
420 bedFile = bufTable + ".BED"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
421 bedFasta = bufTable + ".fastaOut.txt"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
422 fastaFile = params.FASTA
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
423 args = shlex.split("fastaFromBed -fi %s -bed %s -fo %s -name"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
424 %(fastaFile, bedFile, bedFasta))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
425 fastaBED = subprocess.call(args)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
426
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
427 # Write VCF
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
428 print "Creating VCF file ... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
429 vcfFile = tableName + ".vcf"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
430 vcf_out(bedFasta, vcfFile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
431
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
432 print "%s created. " %(vcfFile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
433
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
434 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
435 print "Table not found"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
436
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
437 def removeTempFolder(tempFolderPath):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
438 import shutil
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
439
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
440 shutil.rmtree(tempFolderPath)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
441
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
442 print "Temp Folder Removed"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
443
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
444
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
445 def main():
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
446 # option handling
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
447 params = Params()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
448 params.repeat()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
449
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
450 # output folder handling
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
451 outFolder = checkOutputFolder(params.OUTFOLDER)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
452 bufLoc = outFolder + "/buf"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
453
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
454 # convert target file
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
455 sorted_target = os.path.join(bufLoc, "target.BED")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
456 os.system("sort -k1,1 -k2n %s > %s" %(params.TARGET, sorted_target))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
457
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
458 # target breakdown
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
459 if params.MAXREGIONSIZE > 0:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
460 new_target = os.path.join(bufLoc, "target_breakdown.BED")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
461 target_breakdown(sorted_target, params.MAXREGIONSIZE, params.TARGETREGIONSIZE, new_target)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
462 sorted_target = new_target
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
463
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
464 targetList = convertTarget(sorted_target)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
465
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
466 # convert sam to bam if -sam specified
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
467 if (params.SAM == "True"):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
468 print "Pre-processing SAM files"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
469
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
470 test_bam = bufLoc + "/test.BAM"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
471 ctr_bam = bufLoc + "/control.BAM"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
472
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
473 samTest = Process(target= samToBam, args=(params.TEST, test_bam))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
474 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
475 samCtr = Process(target= samToBam, args=(params.CONTROL, ctr_bam))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
476
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
477 samTest.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
478 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
479 samCtr.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
480
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
481 samTest.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
482 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
483 samCtr.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
484
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
485 params.TEST = test_bam
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
486 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
487 params.CONTROL = ctr_bam
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
488
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
489 # remove multi mapped reads if --nomultimapped is specified
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
490 if (params.NOMULTIMAPPED == "True"):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
491 print "Removing multi-mapped reads"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
492
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
493 test_bam = bufLoc + "/test_reliable.BAM"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
494 ctr_bam = bufLoc + "/control_reliable.BAM"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
495
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
496 bamTest = Process(target= removeMultiMapped, args=(params.TEST, test_bam))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
497 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
498 bamCtr = Process(target= removeMultiMapped, args=(params.CONTROL, ctr_bam))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
499
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
500 bamTest.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
501 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
502 bamCtr.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
503
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
504 bamTest.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
505 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
506 bamCtr.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
507
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
508 params.TEST = test_bam
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
509 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
510 params.CONTROL = ctr_bam
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
511
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
512 # Get Chromosome Length
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
513 genomeFile = bufLoc + '/sample.Genome'
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
514 get_genome(params.TEST, genomeFile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
515
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
516 # spawn bam converting scripts
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
517 pTest = Process(target= convertBam,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
518 args=(params, bufLoc+'/testData/', targetList, genomeFile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
519
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
520 #BEDINPUT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
521 if params.BEDINPUT == "False":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
522
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
523 cTest = Process(target= convertBam,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
524 args=(params, bufLoc+'/ctrData/' , targetList, genomeFile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
525 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
526 cTest = Process(target= convertBamSimple,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
527 args=(params, bufLoc+'/ctrData/', targetList, genomeFile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
528 # start the processes
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
529 pTest.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
530 cTest.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
531
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
532 # wait for all the processes to finish before continuing
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
533 pTest.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
534 cTest.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
535
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
536 # Get the read depth count from temporary folder
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
537 for folder in [bufLoc+'/testData/', bufLoc+'/ctrData/']:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
538 if 'testData' in folder:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
539 t1 = int(file.readlines(open(folder+"temp.txt"))[0].strip("\n"))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
540 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
541 n1 = int(file.readlines(open(folder+"temp.txt"))[0].strip("\n"))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
542 print "Test file read depth = ", t1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
543 print "Control file read depth = ", n1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
544 print "Pre-processing Completed. "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
545
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
546 # Get the Average of the Log Ratio
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
547 print "Getting the Log Ratio ... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
548 testListName = bufLoc + '/testData/geneRefCoverage.txt'
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
549 controlListName = bufLoc + '/ctrData/geneRefCoverage.txt'
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
550 avOut = bufLoc + "/average.txt"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
551 averageCount(testListName, controlListName, avOut, t1, n1, params.MINREADDEPTH, params.MINNBASES)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
552
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
553 # Analysis. [Bin, significance test, large deletion, vcf output]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
554 print "Binning ... "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
555 binProc = []
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
556 for numBin in params.NUMBIN:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
557 binProc.append(Process(target= analysisPerBin, args=(params,numBin,outFolder,targetList)))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
558
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
559 for proc in binProc:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
560 proc.start()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
561
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
562 for proc in binProc:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
563 proc.join()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
564
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
565 # Removed Temp Folder
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
566 removeTempFolder(bufLoc)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
567
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
568 if __name__ == "__main__":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
569 main()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
570 print "Done... "