annotate SMART/Java/Python/structure/Transcript.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents
children 169d364ddd91
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
1 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
2 # Copyright INRA-URGI 2009-2010
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
3 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
4 # This software is governed by the CeCILL license under French law and
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
5 # abiding by the rules of distribution of free software. You can use,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
6 # modify and/ or redistribute the software under the terms of the CeCILL
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
7 # license as circulated by CEA, CNRS and INRIA at the following URL
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
8 # "http://www.cecill.info".
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
9 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
10 # As a counterpart to the access to the source code and rights to copy,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
11 # modify and redistribute granted by the license, users are provided only
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
12 # with a limited warranty and the software's author, the holder of the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
13 # economic rights, and the successive licensors have only limited
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
14 # liability.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
15 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
16 # In this respect, the user's attention is drawn to the risks associated
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
17 # with loading, using, modifying and/or developing or reproducing the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
18 # software by the user in light of its specific status of free software,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
19 # that may mean that it is complicated to manipulate, and that also
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
20 # therefore means that it is reserved for developers and experienced
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
21 # professionals having in-depth computer knowledge. Users are therefore
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
22 # encouraged to load and test the software's suitability as regards their
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
23 # requirements in conditions enabling the security of their systems and/or
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
24 # data to be ensured and, more generally, to use and operate it in the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
25 # same conditions as regards security.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
26 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
27 # The fact that you are presently reading this means that you have had
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
28 # knowledge of the CeCILL license and that you accept its terms.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
29 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
30 import sys
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
31 from SMART.Java.Python.structure.Interval import Interval
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
32 from SMART.Java.Python.structure.Sequence import Sequence
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
33
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
34
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
35 class Transcript(Interval):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
36 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
37 A class that models an transcript, considered as a specialized interval (the bounds of the transcript) that contains exons (also represented as intervals)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
38 @ivar exons: a list of exons (intervals)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
39 @type exons: list of L{Interval{Interval}}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
40 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
41
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
42 def __init__(self, transcript = None, verbosity = 0):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
43 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
44 Constructor
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
45 @param transcript: transcript to be copied
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
46 @type transcript: class L{Transcript<Transcript>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
47 @param verbosity: verbosity
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
48 @type verbosity: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
49 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
50 super(Transcript, self).__init__(None, verbosity)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
51 self.exons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
52 self.introns = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
53 if transcript != None:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
54 self.copy(transcript)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
55
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
56
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
57 def copy(self, transcript):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
58 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
59 Copy method
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
60 @param transcript: transcript to be copied
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
61 @type transcript: class L{Transcript<Transcript>} or L{Interval<Interval>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
62 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
63 super(Transcript, self).copy(transcript)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
64 if transcript.__class__.__name__ == "Transcript":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
65 exons = transcript.getExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
66 if len(exons) > 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
67 for exon in exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
68 exonCopy = Interval(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
69 self.addExon(exonCopy)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
70
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
71
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
72 def setDirection(self, direction):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
73 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
74 Set the direction of the interval
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
75 Possibly parse different formats
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
76 Impact all exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
77 @param direction: direction of the transcript (+ / -)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
78 @type direction: int or string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
79 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
80 super(Transcript, self).setDirection(direction)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
81 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
82 exon.setDirection(direction)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
83
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
84
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
85 def setChromosome(self, chromosome):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
86 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
87 Set the chromosome
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
88 @param chromosome: chromosome on which the transcript is
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
89 @type chromosome: string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
90 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
91 super(Transcript, self).setChromosome(chromosome)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
92 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
93 exon.setChromosome(chromosome)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
94
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
95
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
96 def addExon(self, exon):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
97 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
98 Add an exon to the list of exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
99 @param exon: a new exon
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
100 @type exon: class L{Interval<Interval>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
101 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
102 if not self.exons and not exon.overlapWith(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
103 firstExon = Interval()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
104 firstExon.setStart(self.getStart())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
105 firstExon.setEnd(self.getEnd())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
106 firstExon.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
107 firstExon.setChromosome(self.getChromosome())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
108 self.exons.append(firstExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
109 newExon = Interval(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
110 newExon.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
111 self.exons.append(newExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
112 if newExon.getStart() < self.getStart():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
113 self.setStart(newExon.getStart())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
114 if newExon.getEnd() > self.getEnd():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
115 self.setEnd(newExon.getEnd())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
116
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
117
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
118 def setStart(self, start):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
119 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
120 Set the new start, move the first exon accordingly (if exists)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
121 @param start: the new start
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
122 @type start: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
123 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
124 super(Transcript, self).setStart(start)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
125 if self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
126 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
127 self.exons[0].setStart(start)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
128
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
129
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
130 def setEnd(self, end):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
131 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
132 Set the new end, move the last exon accordingly (if exists)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
133 @param end: the new end
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
134 @type end: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
135 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
136 super(Transcript, self).setEnd(end)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
137 if self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
138 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
139 self.exons[-1].setEnd(end)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
140
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
141
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
142 def reverse(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
143 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
144 Reverse the strand of the transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
145 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
146 super(Transcript, self).reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
147 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
148 exon.reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
149
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
150
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
151 def getUniqueName(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
152 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
153 Try to give a unique name by possibly adding occurrence
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
154 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
155 if "nbOccurrences" in self.tags and "occurrence" in self.tags and self.tags["nbOccurrences"] != 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
156 return "%s-%d" % (self.name, self.tags["occurrence"])
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
157 return self.name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
158
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
159
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
160 def getNbExons(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
161 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
162 Get the number of exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
163 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
164 return max(1, len(self.exons))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
165
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
166
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
167 def getExon(self, i):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
168 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
169 Get a specific exon
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
170 @param i: the rank of the exon
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
171 @type i: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
172 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
173 if len(self.exons) == 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
174 if i != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
175 raise Exception("Cannot get exon #%i while there is no exon in the transcript" % (i))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
176 return self
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
177 return self.exons[i]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
178
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
179
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
180 def getExons(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
181 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
182 Get all the exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
183 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
184 if len(self.exons) == 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
185 return [Interval(self)]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
186 return self.exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
187
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
188
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
189 def getIntrons(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
190 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
191 Get all the introns
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
192 Compute introns on the fly
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
193 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
194 if self.introns != None:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
195 return self.introns
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
196 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
197 self.introns = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
198 exonStart = self.getExon(0)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
199 for cpt, exonEnd in enumerate(self.exons[1:]):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
200 intron = Interval()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
201 intron.setName("%s_intron%d" % (self.getName(), cpt+1))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
202 intron.setChromosome(self.getChromosome())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
203 intron.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
204 if self.getDirection() == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
205 intron.setEnd(exonEnd.getStart() - 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
206 intron.setStart(exonStart.getEnd() + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
207 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
208 intron.setStart(exonEnd.getEnd() + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
209 intron.setEnd(exonStart.getStart() - 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
210 intron.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
211 if intron.getSize() > 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
212 self.introns.append(intron)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
213 exonStart = exonEnd
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
214 intron.setSize(intron.getEnd() - intron.getStart() + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
215 return self.introns
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
216
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
217
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
218 def getSize(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
219 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
220 Get the size of the transcript (i.e. the number of nucleotides)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
221 Compute size on the fly
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
222 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
223 if len(self.exons) == 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
224 return self.getSizeWithIntrons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
225 size = 0
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
226 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
227 size += exon.getSize()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
228 return size
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
229
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
230
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
231 def getSizeWithIntrons(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
232 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
233 Get the size of the interval (i.e. distance from start to end)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
234 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
235 return super(Transcript, self).getSize()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
236
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
237
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
238 def overlapWithExon(self, transcript, nbNucleotides = 1):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
239 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
240 Check if the exons of this transcript overlap with the exons of another transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
241 @param transcript: transcript to be compared to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
242 @type transcript: class L{Transcript<Transcript>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
243 @param nbNucleotides: minimum number of nucleotides to declare and overlap
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
244 @type nbNucleotides: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
245 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
246 if not self.overlapWith(transcript, nbNucleotides):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
247 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
248 for thisExon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
249 for thatExon in transcript.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
250 if thisExon.overlapWith(thatExon, nbNucleotides):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
251 return True
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
252 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
253
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
254
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
255 def include(self, transcript):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
256 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
257 Whether this transcript includes the other one
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
258 @param transcript: object to be compared to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
259 @type transcript: class L{Transcript<Transcript>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
260 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
261 if not super(Transcript, self).include(transcript):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
262 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
263 for thatExon in transcript.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
264 for thisExon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
265 if thisExon.include(thatExon):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
266 break
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
267 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
268 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
269 return True
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
270
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
271
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
272 def merge(self, transcript, normalization = False):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
273 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
274 Merge with another transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
275 Merge exons if they overlap, otherwise add exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
276 @param transcript: transcript to be merged to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
277 @type transcript: class L{Transcript<Transcript>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
278 @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
279 @type normalization: boolean
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
280 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
281 if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
282 raise Exception("Cannot merge '%s' with '%s'!" % (self, transcript))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
283
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
284 theseExons = self.getExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
285 thoseExons = transcript.getExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
286
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
287 for thatExon in thoseExons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
288 toBeRemoved = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
289 for thisIndex, thisExon in enumerate(theseExons):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
290 if thisExon.overlapWith(thatExon):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
291 thatExon.merge(thisExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
292 toBeRemoved.append(thisIndex)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
293 theseExons.append(thatExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
294 for thisIndex in reversed(toBeRemoved):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
295 del theseExons[thisIndex]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
296 self.removeExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
297 self.setStart(min(self.getStart(), transcript.getStart()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
298 self.setEnd(max(self.getEnd(), transcript.getEnd()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
299 if len(theseExons) > 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
300 for thisExon in theseExons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
301 self.addExon(thisExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
302
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
303 self.setName("%s--%s" % (self.getUniqueName(), transcript.getUniqueName()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
304 super(Transcript, self).merge(transcript, normalization)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
305
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
306
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
307 def getDifference(self, transcript, sameStrand = False):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
308 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
309 Get the difference between this cluster and another one
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
310 @param transcript: object to be compared to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
311 @type transcript: class L{Transcript<Transcript>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
312 @param sameStrand: do the comparison iff the transcripts are on the same strand
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
313 @type sameStrand: boolean
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
314 @return: a transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
315 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
316 newTranscript = Transcript()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
317 newTranscript.copy(self)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
318 if self.getChromosome() != transcript.getChromosome():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
319 return newTranscript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
320 if not self.overlapWith(transcript):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
321 return newTranscript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
322 if sameStrand and self.getDirection() != transcript.getDirection():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
323 return newTranscript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
324 newTranscript.removeExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
325 if transcript.getEnd() > newTranscript.getStart():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
326 newTranscript.setStart(transcript.getEnd() + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
327 if transcript.getStart() < newTranscript.getEnd():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
328 newTranscript.setEnd(transcript.getStart() + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
329 theseExons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
330 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
331 exonCopy = Interval()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
332 exonCopy.copy(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
333 theseExons.append(exonCopy)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
334 for thatExon in transcript.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
335 newExons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
336 for thisExon in theseExons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
337 newExons.extend(thisExon.getDifference(thatExon))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
338 theseExons = newExons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
339 if not theseExons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
340 return None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
341 newStart, newEnd = theseExons[0].getStart(), theseExons[0].getEnd()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
342 for thisExon in theseExons[1:]:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
343 newStart = min(newStart, thisExon.getStart())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
344 newEnd = max(newEnd, thisExon.getEnd())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
345 newTranscript.setEnd(newEnd)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
346 newTranscript.setStart(newStart)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
347 newTranscript.exons = theseExons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
348 return newTranscript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
349
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
350
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
351 def getIntersection(self, transcript):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
352 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
353 Get the intersection between this transcript and another one
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
354 @param transcript: object to be compared to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
355 @type transcript: class L{Transcript<Transcript>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
356 @return: an other transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
357 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
358 if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
359 return None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
360 newTranscript = Transcript()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
361 newTranscript.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
362 newTranscript.setChromosome(self.getChromosome())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
363 newTranscript.setName("%s_intersect_%s" % (self.getName(), transcript.getName()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
364 newExons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
365 for thisExon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
366 for thatExon in transcript.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
367 newExon = thisExon.getIntersection(thatExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
368 if newExon != None:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
369 newExons.append(newExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
370 if not newExons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
371 return None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
372 newTranscript.exons = newExons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
373 return newTranscript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
374
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
375
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
376 def getSqlVariables(cls):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
377 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
378 Get the properties of the object that should be saved in a database
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
379 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
380 variables = Interval.getSqlVariables()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
381 variables.append("exons")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
382 return variables
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
383 getSqlVariables = classmethod(getSqlVariables)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
384
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
385
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
386 def setSqlValues(self, array):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
387 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
388 Set the values of the properties of this object as given by a results line of a SQL query
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
389 @param array: the values to be copied
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
390 @type array: a list
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
391 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
392 super(Transcript, self).setSqlValues(array)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
393 mergedExons = array[8]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
394 if not mergedExons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
395 return
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
396 for exonCount, splittedExon in enumerate(mergedExons.split(",")):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
397 start, end = splittedExon.split("-")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
398 exon = Interval()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
399 exon.setChromosome(self.getChromosome())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
400 exon.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
401 exon.setName("%s_exon%d" % (self.getName(), exonCount+1))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
402 exon.setStart(int(start))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
403 exon.setEnd(int(end))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
404 self.addExon(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
405
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
406
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
407 def getSqlValues(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
408 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
409 Get the values of the properties that should be saved in a database
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
410 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
411 values = super(Transcript, self).getSqlValues()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
412 values["size"] = self.getSize()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
413 if self.getNbExons() == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
414 values["exons"] = ""
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
415 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
416 values["exons"] = ",".join(["%d-%d" % (exon.getStart(), exon.getEnd()) for exon in self.getExons()])
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
417 return values
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
418
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
419
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
420 def getSqlTypes(cls):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
421 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
422 Get the types of the properties that should be saved in a database
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
423 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
424 types = Interval.getSqlTypes()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
425 types["exons"] = "varchar"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
426 return types
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
427 getSqlTypes = classmethod(getSqlTypes)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
428
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
429
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
430 def getSqlSizes(cls):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
431 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
432 Get the sizes of the properties that should be saved in a database
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
433 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
434 sizes = Interval.getSqlSizes()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
435 sizes["exons"] = 10000
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
436 return sizes
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
437 getSqlSizes = classmethod(getSqlSizes)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
438
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
439
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
440 def sortExons(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
441 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
442 Sort the exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
443 Increasing order if transcript is on strand "+", decreasing otherwise
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
444 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
445 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
446 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
447 exons = self.getExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
448 exons.reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
449 self.exons = exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
450
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
451
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
452 def sortExonsIncreasing(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
453 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
454 Sort the exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
455 Increasing order
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
456 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
457 exons = self.getExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
458 sortedExons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
459 while len(exons) > 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
460 minExon = exons[0]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
461 for index in range(1, len(exons)):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
462 if minExon.getStart() > exons[index].getStart():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
463 minExon = exons[index]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
464 sortedExons.append(minExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
465 exons.remove(minExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
466 self.exons = sortedExons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
467
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
468
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
469 def extendStart(self, size):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
470 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
471 Extend the transcript by the 5' end
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
472 @param size: the size to be extended
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
473 @type size: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
474 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
475 if len(self.exons) != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
476 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
477 if self.getDirection() == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
478 self.exons[0].setStart(max(0, self.exons[0].getStart() - size))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
479 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
480 self.exons[0].setEnd(self.exons[0].getEnd() + size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
481 super(Transcript, self).extendStart(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
482 self.bin = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
483
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
484
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
485 def extendEnd(self, size):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
486 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
487 Extend the transcript by the 3' end
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
488 @param size: the size to be extended
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
489 @type size: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
490 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
491 if len(self.exons) != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
492 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
493 if self.getDirection() == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
494 self.exons[-1].setEnd(self.exons[-1].getEnd() + size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
495 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
496 self.exons[-1].setStart(max(0, self.exons[-1].getStart() - size))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
497 super(Transcript, self).extendEnd(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
498 self.bin = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
499
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
500
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
501 def extendExons(self, size):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
502 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
503 Extend all the exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
504 @param size: the size to be extended
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
505 @type size: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
506 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
507 if len(self.exons) != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
508 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
509 exons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
510 previousExon = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
511 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
512 exon.extendStart(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
513 exon.extendEnd(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
514 exon.setDirection(self.getDirection())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
515 if previousExon != None and previousExon.overlapWith(exon):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
516 previousExon.merge(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
517 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
518 if previousExon != None:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
519 exons.append(previousExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
520 previousExon = exon
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
521 exons.append(previousExon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
522 self.exons = exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
523 super(Transcript, self).extendStart(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
524 super(Transcript, self).extendEnd(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
525 self.bin = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
526
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
527
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
528 def restrictStart(self, size = 1):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
529 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
530 Restrict the transcript by some nucleotides, start from its start position
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
531 Remove the exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
532 @param size: the size to be restricted to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
533 @type size: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
534 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
535 newExons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
536 if self.getDirection() == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
537 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
538 if exon.getStart() <= self.getStart() + size - 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
539 if exon.getEnd() > self.getStart() + size - 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
540 exon.setEnd(self.getStart() + size - 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
541 newExons.append(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
542 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
543 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
544 if exon.getEnd() >= self.getEnd() - size + 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
545 if exon.getStart() < self.getEnd() - size + 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
546 exon.setStart(self.getEnd() - size + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
547 newExons.append(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
548 super(Transcript, self).restrictStart(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
549 self.exons = newExons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
550
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
551
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
552 def restrictEnd(self, size = 1):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
553 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
554 Restrict the transcript by some nucleotides, end from its end position
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
555 Remove the exons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
556 @param size: the size to be restricted to
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
557 @type size: int
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
558 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
559 newExons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
560 if self.getDirection() == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
561 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
562 if exon.getEnd() >= self.getEnd() - size + 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
563 if exon.getStart() < self.getEnd() - size + 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
564 exon.setStart(self.getEnd() - size + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
565 newExons.append(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
566 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
567 for exon in self.exons:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
568 if exon.getEnd() >= self.getEnd() - size + 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
569 if exon.getStart() < self.getEnd() - size + 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
570 exon.setEnd(self.getEnd() - size + 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
571 newExons.append(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
572 super(Transcript, self).restrictEnd(size)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
573 self.exons = newExons
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
574
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
575
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
576 def removeExons(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
577 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
578 Remove the exons and transforms the current transcript into a mere interval
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
579 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
580 self.exons = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
581 self.bin = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
582
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
583
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
584 def printGtf(self, title):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
585 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
586 Export this transcript using GTF2.2 format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
587 @param title: the title of the transcripts
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
588 @type title: string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
589 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
590 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
591 transcriptId = self.getUniqueName()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
592 geneId = "%s_gene" % (transcriptId)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
593 direction = "+"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
594 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
595 direction = "-"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
596 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
597 string = ""
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
598 for i, exon in enumerate(self.getExons()):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
599 exonCopy = Interval()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
600 exonCopy.copy(exon)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
601 if "ID" in exonCopy.getTagValues():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
602 del exonCopy.tags["ID"]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
603 feature = "exon"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
604 if "feature" in exonCopy.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
605 feature = exonCopy.getTagValue("feature")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
606 del exonCopy.tags["feature"]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
607 score = "."
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
608 if "score" in exonCopy.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
609 score = "%d" % (int(exonCopy.getTagValue("score")))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
610 del exonCopy.tags["score"]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
611 if "Parent" in exonCopy.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
612 del exonCopy.tags["Parent"]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
613 exonCopy.setName("%s_part%d" % (self.getName(), i+1))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
614 comment = exonCopy.getTagValues("; ", " ", "\"")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
615 string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\ttranscript_id \"%s\"; gene_id \"%s\"; %s\n" % (exonCopy.getChromosome(), title, feature, exonCopy.getStart(), exonCopy.getEnd(), score, direction, transcriptId, geneId, comment)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
616 return string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
617
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
618
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
619 def printGff2(self, title):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
620 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
621 Export this transcript using GFF2 format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
622 @param title: the title of the transcripts
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
623 @type title: string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
624 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
625 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
626 direction = "+"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
627 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
628 direction = "-"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
629 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
630 comment = self.getTagValues()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
631 if comment != None:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
632 comment = ";%s" % (comment)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
633 score = "."
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
634 if "score" in self.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
635 score = "%d" % (int(self.getTagValue("score")))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
636 feature = "transcript"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
637 if "feature" in self.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
638 feature = self.getTagValue("feature")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
639 string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tGENE %s%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, self.name, comment)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
640 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
641 if "score" in exon.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
642 score = "%d" % (int(self.getTagValue("score")))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
643 string += "%s\t%s\t_exon\t%d\t%d\t%s\t%s\t.\tGENE %s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.name)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
644 return string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
645
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
646
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
647 def printGff3(self, title):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
648 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
649 Export this transcript using GFF3 format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
650 @param title: the title of the transcripts
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
651 @type title: string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
652 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
653 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
654 direction = "+"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
655 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
656 direction = "-"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
657 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
658 if "ID" not in self.getTagValues():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
659 self.setTagValue("ID", self.getUniqueName())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
660 feature = "transcript"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
661 tags = self.tags
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
662 if "feature" in self.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
663 feature = self.getTagValue("feature")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
664 del self.tags["feature"]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
665 score = "."
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
666 if "score" in self.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
667 score = "%d" % (int(self.getTagValue("score")))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
668 del self.tags["score"]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
669 comment = self.getTagValues(";", "=")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
670 string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, comment)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
671 if len(self.exons) > 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
672 for i, exon in enumerate(self.getExons()):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
673 if "score" in exon.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
674 score = "%d" % (int(exon.getTagValue("score")))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
675 string += "%s\t%s\texon\t%d\t%d\t%s\t%s\t.\tID=%s-exon%d;Name=%s-exon%d;Parent=%s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.getTagValue("ID"), i+1, self.name, i+1, self.getTagValue("ID"))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
676 self.tags = tags
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
677 return string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
678
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
679
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
680 def printEmbl(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
681 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
682 Export this transcript using EMBL format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
683 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
684 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
685 if len(self.exons) <= 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
686 position = "%d..%d" % (self.getStart(), self.getEnd())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
687 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
688 positions = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
689 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
690 positions.append("%d..%d" % (self.getStart(), self.getEnd()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
691 position = ",".join(positions)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
692 position = "join(%s)" % (position)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
693 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
694 position = "complement(%s)" % (position)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
695 feature = "misc_feature"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
696 if "feature" in self.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
697 if not self.getTagValue("feature").startswith("S-MART"):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
698 feature = self.getTagValue("feature")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
699 string = "FT %s %s\n" % (feature, position)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
700 if "Name" in self.getTagNames():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
701 string += "FT /label=\"%s\"\n" % (self.getTagValue("Name"))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
702 return string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
703
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
704
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
705 def printBed(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
706 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
707 Export this transcript using BED format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
708 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
709 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
710 name = self.name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
711 if "nbOccurrences" in self.getTagNames() and self.getTagValue("nbOccurrences") != 1 and self.getTagValue("occurrences"):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
712 name = "%s-%d" % (name, self.getTagValue("occurrence"))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
713 comment = self.getTagValues(";", "=")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
714 sizes = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
715 starts = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
716 direction = "+"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
717 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
718 direction = "-"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
719 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
720 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
721 sizes.append("%d" % (exon.getSize()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
722 starts.append("%d" % (exon.getStart() - self.getStart()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
723 return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome(), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
724
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
725
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
726 def printSam(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
727 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
728 Export this transcript using SAM format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
729 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
730 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
731 name = self.name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
732 flag = 0 if self.getDirection() == 1 else 0x10
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
733 chromosome = self.getChromosome()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
734 genomeStart = self.getStart()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
735 quality = 255
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
736 mate = "*"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
737 mateGenomeStart = 0
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
738 gapSize = 0
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
739 sequence = "*"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
740 qualityString = "*"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
741 tags = "NM:i:0"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
742
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
743 lastExonEnd = None
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
744 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
745 exon = self.getExons()[0]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
746 cigar = "%dM" % (self.getExons()[0].getSize())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
747 lastExonEnd = exon.getEnd()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
748 for i, exon in enumerate(self.getExons()):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
749 if i == 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
750 continue
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
751 cigar += "%dN" % (exon.getStart() - lastExonEnd - 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
752 cigar += "%dM" % (exon.getSize())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
753
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
754 return "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n" % (name, flag, chromosome, genomeStart, quality, cigar, mate, mateGenomeStart, gapSize, sequence, qualityString, tags)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
755
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
756
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
757 def printUcsc(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
758 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
759 Export this transcript using UCSC BED format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
760 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
761 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
762 if self.getChromosome().find("Het") != -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
763 return ""
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
764 name = self.name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
765 comment = self.getTagValues(";", "")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
766 sizes = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
767 starts = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
768 direction = "+"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
769 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
770 direction = "-"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
771 self.sortExonsIncreasing()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
772 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
773 sizes.append("%d" % (exon.getSize()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
774 starts.append("%d" % (exon.getStart() - self.getStart()))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
775 return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome().replace("arm_", "chr"), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
776
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
777
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
778 def printGBrowseReference(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
779 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
780 Export this transcript using GBrowse format (1st line only)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
781 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
782 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
783 return "reference = %s\n" % (self.getChromosome())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
784
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
785
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
786 def printGBrowseLine(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
787 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
788 Export this transcript using GBrowse format (2nd line only)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
789 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
790 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
791 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
792 coordinates = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
793 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
794 coordinates.append(exon.printCoordinates())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
795 coordinatesString = ",".join(coordinates)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
796 comment = self.getTagValues(";", "=")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
797 if comment:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
798 comment = "\t\"%s\"" % (comment)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
799 return "User_data\t%s\t%s%s\n" % (self.name, coordinatesString, comment)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
800
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
801
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
802 def printGBrowse(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
803 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
804 Export this transcript using GBrowse format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
805 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
806 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
807 return "%s%s" % (self.printGBrowseReference(), self.printGBrowseLine())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
808
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
809
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
810 def printCsv(self):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
811 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
812 Export this transcript using CSV format
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
813 @return: a string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
814 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
815 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
816 string = "%s,%d,%d,\"%s\"," % (self.getChromosome(), self.getStart(), self.getEnd(), "+" if self.getDirection() == 1 else "-")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
817 if len(self.getExons()) == 1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
818 string += "None"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
819 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
820 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
821 string += "%d-%d " % (exon.getStart(), exon.getEnd())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
822 for tag in sorted(self.tags.keys()):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
823 string += ",%s=%s" % (tag, str(self.tags[tag]))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
824 string += "\n"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
825 return string
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
826
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
827
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
828 def extractSequence(self, parser):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
829 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
830 Get the sequence corresponding to this transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
831 @param parser: a parser to a FASTA file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
832 @type parser: class L{SequenceListParser<SequenceListParser>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
833 @return: an instance of L{Sequence<Sequence>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
834 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
835 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
836 name = self.name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
837 if "ID" in self.getTagNames() and self.getTagValue("ID") != self.name:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
838 name += ":%s" % (self.getTagValue("ID"))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
839 sequence = Sequence(name)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
840 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
841 sequence.concatenate(exon.extractSequence(parser))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
842 return sequence
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
843
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
844
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
845 def extractWigData(self, parser):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
846 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
847 Get some wig data corresponding to this transcript
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
848 @param parser: a parser to a wig file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
849 @type parser: class L{WigParser<WigParser>}
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
850 @return: a sequence of float
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
851 """
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
852 self.sortExons()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
853 if parser.strands:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
854 strands = (-1, 1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
855 values = dict([(strand, []) for strand in strands])
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
856 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
857 theseValues = exon.extractWigData(parser)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
858 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
859 for strand in strands:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
860 theseValues[strand].reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
861 for strand in strands:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
862 values[strand].extend(theseValues[strand])
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
863 if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
864 for strand in strands:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
865 values[strand].reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
866 return values
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
867 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
868 values = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
869 for exon in self.getExons():
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
870 theseValues = exon.extractWigData(parser)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
871 #if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
872 # theseValues.reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
873 values.extend(theseValues)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
874 #if self.getDirection() == -1:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
875 # values.reverse()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
876 return values