0
|
1 # ----------------------------------------------------------------------#
|
|
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
|
|
3 #
|
|
4 # > Source License <
|
|
5 # This file is part of CONTRA.
|
|
6 #
|
|
7 # CONTRA is free software: you can redistribute it and/or modify
|
|
8 # it under the terms of the GNU General Public License as published by
|
|
9 # the Free Software Foundation, either version 3 of the License, or
|
|
10 # (at your option) any later version.
|
|
11 #
|
|
12 # CONTRA is distributed in the hope that it will be useful,
|
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 # GNU General Public License for more details.
|
|
16 #
|
|
17 # You should have received a copy of the GNU General Public License
|
|
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
|
|
19 #
|
|
20 #
|
|
21 #-----------------------------------------------------------------------#
|
|
22 # Last Updated : 03 Oct 2011 11:00AM
|
|
23
|
|
24 import sys
|
|
25
|
|
26 def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri):
|
|
27 id = str(id)
|
|
28 n = str(n)
|
|
29 output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n")
|
|
30
|
|
31 def outputwrite2(output2, chr,c,sOri, eOri):
|
|
32 output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n")
|
|
33
|
|
34
|
|
35 def convertGeneCoordinate(targetList, bufLocFolder):
|
|
36 inputfile2 = bufLocFolder + "chr/"
|
|
37 outputfile = bufLocFolder + "geneRefCoverage.txt"
|
|
38 outputfile2= bufLocFolder + "geneRefCoverage2.txt"
|
|
39
|
|
40 output= open(outputfile,"w")
|
|
41 output2 = open(outputfile2 , "w")
|
|
42
|
|
43 rn = 1
|
|
44 prevchr = ""
|
|
45 for target in targetList:
|
|
46 chr = target.chr
|
|
47 starts = target.oriStart.split(",")
|
|
48 ends = target.oriEnd.split(",")
|
|
49
|
|
50 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
|
|
51 continue
|
|
52
|
|
53 if (prevchr != chr):
|
|
54 print chr #progress checking
|
|
55 prevchr = chr
|
|
56 t = 0
|
|
57 covFile = file.readlines(open(inputfile2+chr+".txt","r"))
|
|
58
|
|
59 for n in range(0,target.numberExon):
|
|
60 if t >= len(covFile):
|
|
61 break
|
|
62 cov = covFile[t].split()
|
|
63 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
|
|
64 if (int(cov[1]) > int(starts[n])):
|
|
65 t-=1
|
|
66 else:
|
|
67 t+=1
|
|
68 cov = covFile[t].split()
|
|
69
|
|
70 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))):
|
|
71 # print output #
|
|
72 if (rn == 1):
|
|
73 prev = target.id
|
|
74 nID = target.id
|
|
75 if (nID != prev):
|
|
76 rn = 1
|
|
77 ref1 = str(rn)
|
|
78 ref2 = str(int(cov[2]) - int(starts[n]) + rn)
|
|
79 outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2])
|
|
80
|
|
81 outputwrite2(output2, chr,cov[3],starts[n],cov[2])
|
|
82
|
|
83
|
|
84 rn = int(ref2)
|
|
85 prev = nID
|
|
86 # -- #
|
|
87
|
|
88 # get to the next line of inputfile#
|
|
89 t+= 1
|
|
90 cov = covFile[t].split()
|
|
91 starts[n] = cov[1]
|
|
92
|
|
93 #print output #
|
|
94 if (t == 0) and (t >= len(covFile)):
|
|
95 continue
|
|
96
|
|
97 if (rn == 1):
|
|
98 prev = target.id
|
|
99 nID = target.id
|
|
100 if (nID != prev):
|
|
101 rn = 1
|
|
102 ref1 = str(rn)
|
|
103 ref2 = str(int(ends[n]) - int(starts[n]) + rn)
|
|
104 outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n])
|
|
105
|
|
106 outputwrite2(output2, chr, cov[3], starts[n], ends[n])
|
|
107
|
|
108
|
|
109 rn = int(ref2)
|
|
110 prev = nID
|
|
111 # -- #
|
|
112 output.close()
|
|
113 output2.close()
|
|
114
|
|
115
|
|
116 def convertGeneCoordinate2(targetList, bufLocFolder):
|
|
117 inputfile2 = bufLocFolder + "chr/"
|
|
118 outputfile = bufLocFolder + "geneRefCoverage.txt"
|
|
119 outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt"
|
|
120
|
|
121 output= open(outputfile,"w")
|
|
122 output_avg = open(outputfile_avg,"w")
|
|
123
|
|
124 rn = 1
|
|
125 prevchr = ""
|
|
126 for target in targetList:
|
|
127 chr = target.chr
|
|
128 starts = target.oriStart.split(",")
|
|
129 ends = target.oriEnd.split(",")
|
|
130 target_ttl_readdepth = 0
|
|
131 starts_leftmost = starts[0]
|
|
132
|
|
133
|
|
134 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
|
|
135 continue
|
|
136
|
|
137 if (prevchr != chr):
|
|
138 print chr #progress checking
|
|
139 prevchr = chr
|
|
140 t = 0
|
|
141 covFile = file.readlines(open(inputfile2+chr+".txt","r"))
|
|
142
|
|
143 for n in range(0,target.numberExon):
|
|
144 if t >= len(covFile):
|
|
145 break
|
|
146 cov = covFile[t].split()
|
|
147 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
|
|
148 if (int(cov[1]) > int(starts[n])): t-=1
|
|
149 else:
|
|
150 t+=1
|
|
151 cov = covFile[t].split()
|
|
152
|
|
153 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))):
|
|
154 # print output #
|
|
155 if (rn == 1):
|
|
156 prev = target.id
|
|
157 nID = target.id
|
|
158 if (nID != prev):
|
|
159 rn = 1
|
|
160 ref1 = str(rn)
|
|
161 ref2 = str(int(cov[2]) - int(starts[n]) + rn)
|
|
162 outputwrite2(output, chr,cov[3],starts[n],cov[2])
|
|
163 tmprange=int(cov[2])-int(starts[n])+1
|
|
164 target_ttl_readdepth+=int(cov[3])*tmprange
|
|
165 #target_length+=tmprange
|
|
166
|
|
167 rn = int(ref2)
|
|
168 prev = nID
|
|
169 # -- #
|
|
170
|
|
171 # get to the next line of inputfile#
|
|
172 t+= 1
|
|
173 cov = covFile[t].split()
|
|
174 starts[n] = cov[1]
|
|
175
|
|
176 #print output #
|
|
177 if (t == 0) and (t >= len(covFile)):
|
|
178 continue
|
|
179
|
|
180 if (rn == 1):
|
|
181 prev = target.id
|
|
182 nID = target.id
|
|
183 if (nID != prev):
|
|
184 rn = 1
|
|
185 ref1 = str(rn)
|
|
186 ref2 = str(int(ends[n]) - int(starts[n]) + rn)
|
|
187 outputwrite2(output, chr, cov[3], starts[n], ends[n])
|
|
188 tmprange=int(ends[n])-int(starts[n])+1
|
|
189 target_ttl_readdepth+=int(cov[3])*tmprange
|
|
190 #target_length+=tmprange
|
|
191 target_length = int(ends[n])-int(starts_leftmost)+1
|
|
192 output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n")
|
|
193
|
|
194 rn = int(ref2)
|
|
195 prev = nID
|
|
196 # -- #
|
|
197 output.close()
|
|
198 output_avg.close()
|
|
199
|
|
200
|
|
201
|
|
202
|
|
203
|
|
204
|
|
205
|
|
206
|
|
207
|
|
208
|
|
209
|
|
210
|
|
211
|
|
212
|
|
213
|
|
214
|
|
215
|
|
216
|
|
217
|
|
218
|