comparison Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:31:43 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7564f3b1e675
1 # ----------------------------------------------------------------------#
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
3 #
4 # > Source License <
5 # This file is part of CONTRA.
6 #
7 # CONTRA is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # CONTRA is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 #-----------------------------------------------------------------------#
22 # Last Updated : 03 Oct 2011 11:00AM
23
24 import sys
25
26 def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri):
27 id = str(id)
28 n = str(n)
29 output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n")
30
31 def outputwrite2(output2, chr,c,sOri, eOri):
32 output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n")
33
34
35 def convertGeneCoordinate(targetList, bufLocFolder):
36 inputfile2 = bufLocFolder + "chr/"
37 outputfile = bufLocFolder + "geneRefCoverage.txt"
38 outputfile2= bufLocFolder + "geneRefCoverage2.txt"
39
40 output= open(outputfile,"w")
41 output2 = open(outputfile2 , "w")
42
43 rn = 1
44 prevchr = ""
45 for target in targetList:
46 chr = target.chr
47 starts = target.oriStart.split(",")
48 ends = target.oriEnd.split(",")
49
50 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
51 continue
52
53 if (prevchr != chr):
54 print chr #progress checking
55 prevchr = chr
56 t = 0
57 covFile = file.readlines(open(inputfile2+chr+".txt","r"))
58
59 for n in range(0,target.numberExon):
60 if t >= len(covFile):
61 break
62 cov = covFile[t].split()
63 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
64 if (int(cov[1]) > int(starts[n])):
65 t-=1
66 else:
67 t+=1
68 cov = covFile[t].split()
69
70 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))):
71 # print output #
72 if (rn == 1):
73 prev = target.id
74 nID = target.id
75 if (nID != prev):
76 rn = 1
77 ref1 = str(rn)
78 ref2 = str(int(cov[2]) - int(starts[n]) + rn)
79 outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2])
80
81 outputwrite2(output2, chr,cov[3],starts[n],cov[2])
82
83
84 rn = int(ref2)
85 prev = nID
86 # -- #
87
88 # get to the next line of inputfile#
89 t+= 1
90 cov = covFile[t].split()
91 starts[n] = cov[1]
92
93 #print output #
94 if (t == 0) and (t >= len(covFile)):
95 continue
96
97 if (rn == 1):
98 prev = target.id
99 nID = target.id
100 if (nID != prev):
101 rn = 1
102 ref1 = str(rn)
103 ref2 = str(int(ends[n]) - int(starts[n]) + rn)
104 outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n])
105
106 outputwrite2(output2, chr, cov[3], starts[n], ends[n])
107
108
109 rn = int(ref2)
110 prev = nID
111 # -- #
112 output.close()
113 output2.close()
114
115
116 def convertGeneCoordinate2(targetList, bufLocFolder):
117 inputfile2 = bufLocFolder + "chr/"
118 outputfile = bufLocFolder + "geneRefCoverage.txt"
119 outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt"
120
121 output= open(outputfile,"w")
122 output_avg = open(outputfile_avg,"w")
123
124 rn = 1
125 prevchr = ""
126 for target in targetList:
127 chr = target.chr
128 starts = target.oriStart.split(",")
129 ends = target.oriEnd.split(",")
130 target_ttl_readdepth = 0
131 starts_leftmost = starts[0]
132
133
134 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
135 continue
136
137 if (prevchr != chr):
138 print chr #progress checking
139 prevchr = chr
140 t = 0
141 covFile = file.readlines(open(inputfile2+chr+".txt","r"))
142
143 for n in range(0,target.numberExon):
144 if t >= len(covFile):
145 break
146 cov = covFile[t].split()
147 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
148 if (int(cov[1]) > int(starts[n])): t-=1
149 else:
150 t+=1
151 cov = covFile[t].split()
152
153 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))):
154 # print output #
155 if (rn == 1):
156 prev = target.id
157 nID = target.id
158 if (nID != prev):
159 rn = 1
160 ref1 = str(rn)
161 ref2 = str(int(cov[2]) - int(starts[n]) + rn)
162 outputwrite2(output, chr,cov[3],starts[n],cov[2])
163 tmprange=int(cov[2])-int(starts[n])+1
164 target_ttl_readdepth+=int(cov[3])*tmprange
165 #target_length+=tmprange
166
167 rn = int(ref2)
168 prev = nID
169 # -- #
170
171 # get to the next line of inputfile#
172 t+= 1
173 cov = covFile[t].split()
174 starts[n] = cov[1]
175
176 #print output #
177 if (t == 0) and (t >= len(covFile)):
178 continue
179
180 if (rn == 1):
181 prev = target.id
182 nID = target.id
183 if (nID != prev):
184 rn = 1
185 ref1 = str(rn)
186 ref2 = str(int(ends[n]) - int(starts[n]) + rn)
187 outputwrite2(output, chr, cov[3], starts[n], ends[n])
188 tmprange=int(ends[n])-int(starts[n])+1
189 target_ttl_readdepth+=int(cov[3])*tmprange
190 #target_length+=tmprange
191 target_length = int(ends[n])-int(starts_leftmost)+1
192 output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n")
193
194 rn = int(ref2)
195 prev = nID
196 # -- #
197 output.close()
198 output_avg.close()
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218