Mercurial > repos > fcaramia > contra
comparison Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675
Uploaded
author | fcaramia |
---|---|
date | Thu, 13 Sep 2012 02:31:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7564f3b1e675 |
---|---|
1 # ----------------------------------------------------------------------# | |
2 # Copyright (c) 2011, Richard Lupat & Jason Li. | |
3 # | |
4 # > Source License < | |
5 # This file is part of CONTRA. | |
6 # | |
7 # CONTRA is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 # | |
12 # CONTRA is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU General Public License for more details. | |
16 # | |
17 # You should have received a copy of the GNU General Public License | |
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>. | |
19 # | |
20 # | |
21 #-----------------------------------------------------------------------# | |
22 # Last Updated : 03 Oct 2011 11:00AM | |
23 | |
24 import sys | |
25 | |
26 def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri): | |
27 id = str(id) | |
28 n = str(n) | |
29 output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n") | |
30 | |
31 def outputwrite2(output2, chr,c,sOri, eOri): | |
32 output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n") | |
33 | |
34 | |
35 def convertGeneCoordinate(targetList, bufLocFolder): | |
36 inputfile2 = bufLocFolder + "chr/" | |
37 outputfile = bufLocFolder + "geneRefCoverage.txt" | |
38 outputfile2= bufLocFolder + "geneRefCoverage2.txt" | |
39 | |
40 output= open(outputfile,"w") | |
41 output2 = open(outputfile2 , "w") | |
42 | |
43 rn = 1 | |
44 prevchr = "" | |
45 for target in targetList: | |
46 chr = target.chr | |
47 starts = target.oriStart.split(",") | |
48 ends = target.oriEnd.split(",") | |
49 | |
50 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): | |
51 continue | |
52 | |
53 if (prevchr != chr): | |
54 print chr #progress checking | |
55 prevchr = chr | |
56 t = 0 | |
57 covFile = file.readlines(open(inputfile2+chr+".txt","r")) | |
58 | |
59 for n in range(0,target.numberExon): | |
60 if t >= len(covFile): | |
61 break | |
62 cov = covFile[t].split() | |
63 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): | |
64 if (int(cov[1]) > int(starts[n])): | |
65 t-=1 | |
66 else: | |
67 t+=1 | |
68 cov = covFile[t].split() | |
69 | |
70 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): | |
71 # print output # | |
72 if (rn == 1): | |
73 prev = target.id | |
74 nID = target.id | |
75 if (nID != prev): | |
76 rn = 1 | |
77 ref1 = str(rn) | |
78 ref2 = str(int(cov[2]) - int(starts[n]) + rn) | |
79 outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2]) | |
80 | |
81 outputwrite2(output2, chr,cov[3],starts[n],cov[2]) | |
82 | |
83 | |
84 rn = int(ref2) | |
85 prev = nID | |
86 # -- # | |
87 | |
88 # get to the next line of inputfile# | |
89 t+= 1 | |
90 cov = covFile[t].split() | |
91 starts[n] = cov[1] | |
92 | |
93 #print output # | |
94 if (t == 0) and (t >= len(covFile)): | |
95 continue | |
96 | |
97 if (rn == 1): | |
98 prev = target.id | |
99 nID = target.id | |
100 if (nID != prev): | |
101 rn = 1 | |
102 ref1 = str(rn) | |
103 ref2 = str(int(ends[n]) - int(starts[n]) + rn) | |
104 outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n]) | |
105 | |
106 outputwrite2(output2, chr, cov[3], starts[n], ends[n]) | |
107 | |
108 | |
109 rn = int(ref2) | |
110 prev = nID | |
111 # -- # | |
112 output.close() | |
113 output2.close() | |
114 | |
115 | |
116 def convertGeneCoordinate2(targetList, bufLocFolder): | |
117 inputfile2 = bufLocFolder + "chr/" | |
118 outputfile = bufLocFolder + "geneRefCoverage.txt" | |
119 outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt" | |
120 | |
121 output= open(outputfile,"w") | |
122 output_avg = open(outputfile_avg,"w") | |
123 | |
124 rn = 1 | |
125 prevchr = "" | |
126 for target in targetList: | |
127 chr = target.chr | |
128 starts = target.oriStart.split(",") | |
129 ends = target.oriEnd.split(",") | |
130 target_ttl_readdepth = 0 | |
131 starts_leftmost = starts[0] | |
132 | |
133 | |
134 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): | |
135 continue | |
136 | |
137 if (prevchr != chr): | |
138 print chr #progress checking | |
139 prevchr = chr | |
140 t = 0 | |
141 covFile = file.readlines(open(inputfile2+chr+".txt","r")) | |
142 | |
143 for n in range(0,target.numberExon): | |
144 if t >= len(covFile): | |
145 break | |
146 cov = covFile[t].split() | |
147 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): | |
148 if (int(cov[1]) > int(starts[n])): t-=1 | |
149 else: | |
150 t+=1 | |
151 cov = covFile[t].split() | |
152 | |
153 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): | |
154 # print output # | |
155 if (rn == 1): | |
156 prev = target.id | |
157 nID = target.id | |
158 if (nID != prev): | |
159 rn = 1 | |
160 ref1 = str(rn) | |
161 ref2 = str(int(cov[2]) - int(starts[n]) + rn) | |
162 outputwrite2(output, chr,cov[3],starts[n],cov[2]) | |
163 tmprange=int(cov[2])-int(starts[n])+1 | |
164 target_ttl_readdepth+=int(cov[3])*tmprange | |
165 #target_length+=tmprange | |
166 | |
167 rn = int(ref2) | |
168 prev = nID | |
169 # -- # | |
170 | |
171 # get to the next line of inputfile# | |
172 t+= 1 | |
173 cov = covFile[t].split() | |
174 starts[n] = cov[1] | |
175 | |
176 #print output # | |
177 if (t == 0) and (t >= len(covFile)): | |
178 continue | |
179 | |
180 if (rn == 1): | |
181 prev = target.id | |
182 nID = target.id | |
183 if (nID != prev): | |
184 rn = 1 | |
185 ref1 = str(rn) | |
186 ref2 = str(int(ends[n]) - int(starts[n]) + rn) | |
187 outputwrite2(output, chr, cov[3], starts[n], ends[n]) | |
188 tmprange=int(ends[n])-int(starts[n])+1 | |
189 target_ttl_readdepth+=int(cov[3])*tmprange | |
190 #target_length+=tmprange | |
191 target_length = int(ends[n])-int(starts_leftmost)+1 | |
192 output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n") | |
193 | |
194 rn = int(ref2) | |
195 prev = nID | |
196 # -- # | |
197 output.close() | |
198 output_avg.close() | |
199 | |
200 | |
201 | |
202 | |
203 | |
204 | |
205 | |
206 | |
207 | |
208 | |
209 | |
210 | |
211 | |
212 | |
213 | |
214 | |
215 | |
216 | |
217 | |
218 |