| 6 | 1 # | 
|  | 2 # Copyright INRA-URGI 2009-2010 | 
|  | 3 # | 
|  | 4 # This software is governed by the CeCILL license under French law and | 
|  | 5 # abiding by the rules of distribution of free software. You can use, | 
|  | 6 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 7 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 8 # "http://www.cecill.info". | 
|  | 9 # | 
|  | 10 # As a counterpart to the access to the source code and rights to copy, | 
|  | 11 # modify and redistribute granted by the license, users are provided only | 
|  | 12 # with a limited warranty and the software's author, the holder of the | 
|  | 13 # economic rights, and the successive licensors have only limited | 
|  | 14 # liability. | 
|  | 15 # | 
|  | 16 # In this respect, the user's attention is drawn to the risks associated | 
|  | 17 # with loading, using, modifying and/or developing or reproducing the | 
|  | 18 # software by the user in light of its specific status of free software, | 
|  | 19 # that may mean that it is complicated to manipulate, and that also | 
|  | 20 # therefore means that it is reserved for developers and experienced | 
|  | 21 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 22 # encouraged to load and test the software's suitability as regards their | 
|  | 23 # requirements in conditions enabling the security of their systems and/or | 
|  | 24 # data to be ensured and, more generally, to use and operate it in the | 
|  | 25 # same conditions as regards security. | 
|  | 26 # | 
|  | 27 # The fact that you are presently reading this means that you have had | 
|  | 28 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 29 # | 
|  | 30 import sys | 
|  | 31 from SMART.Java.Python.structure.Interval import Interval | 
|  | 32 from SMART.Java.Python.structure.Sequence import Sequence | 
|  | 33 | 
|  | 34 | 
|  | 35 class Transcript(Interval): | 
|  | 36 	""" | 
|  | 37 	A class that models an transcript, considered as a specialized interval (the bounds of the transcript) that contains exons (also represented as intervals) | 
|  | 38 	@ivar exons: a list of exons (intervals) | 
|  | 39 	@type exons: list of L{Interval{Interval}} | 
|  | 40 	""" | 
|  | 41 | 
|  | 42 	def __init__(self, transcript = None, verbosity = 0): | 
|  | 43 		""" | 
|  | 44 		Constructor | 
|  | 45 		@param transcript: transcript to be copied | 
|  | 46 		@type  transcript: class L{Transcript<Transcript>} | 
|  | 47 		@param verbosity:  verbosity | 
|  | 48 		@type  verbosity:  int | 
|  | 49 		""" | 
|  | 50 		super(Transcript, self).__init__(None, verbosity) | 
|  | 51 		self.exons   = [] | 
|  | 52 		self.introns = None | 
|  | 53 		if transcript != None: | 
|  | 54 			self.copy(transcript) | 
|  | 55 | 
|  | 56 | 
|  | 57 	def copy(self, transcript): | 
|  | 58 		""" | 
|  | 59 		Copy method | 
|  | 60 		@param transcript: transcript to be copied | 
|  | 61 		@type	transcript: class L{Transcript<Transcript>} or L{Interval<Interval>} | 
|  | 62 		""" | 
|  | 63 		super(Transcript, self).copy(transcript) | 
|  | 64 		if transcript.__class__.__name__ == "Transcript": | 
|  | 65 			exons = transcript.getExons() | 
|  | 66 			if len(exons) > 1: | 
|  | 67 				for exon in exons: | 
|  | 68 					exonCopy = Interval(exon) | 
|  | 69 					self.addExon(exonCopy) | 
|  | 70 | 
|  | 71 | 
|  | 72 	def setDirection(self, direction): | 
|  | 73 		""" | 
|  | 74 		Set the direction of the interval | 
|  | 75 		Possibly parse different formats | 
|  | 76 		Impact all exons | 
|  | 77 		@param direction: direction of the transcript (+ / -) | 
|  | 78 		@type	direction: int or string | 
|  | 79 		""" | 
|  | 80 		super(Transcript, self).setDirection(direction) | 
|  | 81 		for exon in self.exons: | 
|  | 82 			exon.setDirection(direction) | 
|  | 83 | 
|  | 84 | 
|  | 85 	def setChromosome(self, chromosome): | 
|  | 86 		""" | 
|  | 87 		Set the chromosome | 
|  | 88 		@param chromosome: chromosome on which the transcript is | 
|  | 89 		@type  chromosome: string | 
|  | 90 		""" | 
|  | 91 		super(Transcript, self).setChromosome(chromosome) | 
|  | 92 		for exon in self.exons: | 
|  | 93 			exon.setChromosome(chromosome) | 
|  | 94 | 
|  | 95 | 
|  | 96 	def addExon(self, exon): | 
|  | 97 		""" | 
|  | 98 		Add an exon to the list of exons | 
|  | 99 		@param exon: a new exon | 
|  | 100 		@type  exon: class L{Interval<Interval>} | 
|  | 101 		""" | 
|  | 102 		if not self.exons and not exon.overlapWith(self): | 
|  | 103 			firstExon = Interval() | 
|  | 104 			firstExon.setStart(self.getStart()) | 
|  | 105 			firstExon.setEnd(self.getEnd()) | 
|  | 106 			firstExon.setDirection(self.getDirection()) | 
|  | 107 			firstExon.setChromosome(self.getChromosome()) | 
|  | 108 			self.exons.append(firstExon) | 
|  | 109 		newExon = Interval(exon) | 
|  | 110 		newExon.setDirection(self.getDirection()) | 
|  | 111 		self.exons.append(newExon) | 
|  | 112 		if newExon.getStart() < self.getStart(): | 
|  | 113 			self.setStart(newExon.getStart()) | 
|  | 114 		if newExon.getEnd() > self.getEnd(): | 
|  | 115 			self.setEnd(newExon.getEnd()) | 
|  | 116 | 
|  | 117 | 
|  | 118 	def setStart(self, start): | 
|  | 119 		""" | 
|  | 120 		Set the new start, move the first exon accordingly (if exists) | 
|  | 121 		@param start: the new start | 
|  | 122 		@type  start: int | 
|  | 123 		""" | 
|  | 124 		super(Transcript, self).setStart(start) | 
|  | 125 		if self.exons: | 
|  | 126 			self.sortExonsIncreasing() | 
|  | 127 			self.exons[0].setStart(start) | 
|  | 128 | 
|  | 129 | 
|  | 130 	def setEnd(self, end): | 
|  | 131 		""" | 
|  | 132 		Set the new end, move the last exon accordingly (if exists) | 
|  | 133 		@param end: the new end | 
|  | 134 		@type  end: int | 
|  | 135 		""" | 
|  | 136 		super(Transcript, self).setEnd(end) | 
|  | 137 		if self.exons: | 
|  | 138 			self.sortExonsIncreasing() | 
|  | 139 			self.exons[-1].setEnd(end) | 
|  | 140 | 
|  | 141 | 
|  | 142 	def reverse(self): | 
|  | 143 		""" | 
|  | 144 		Reverse the strand of the transcript | 
|  | 145 		""" | 
|  | 146 		super(Transcript, self).reverse() | 
|  | 147 		for exon in self.exons: | 
|  | 148 			exon.reverse() | 
|  | 149 | 
|  | 150 | 
|  | 151 	def getUniqueName(self): | 
|  | 152 		""" | 
|  | 153 		Try to give a unique name by possibly adding occurrence | 
|  | 154 		""" | 
|  | 155 		if "nbOccurrences" in self.tags and "occurrence" in self.tags and self.tags["nbOccurrences"] != 1: | 
|  | 156 			return "%s-%d" % (self.name, self.tags["occurrence"]) | 
|  | 157 		return self.name | 
|  | 158 | 
|  | 159 | 
|  | 160 	def getNbExons(self): | 
|  | 161 		""" | 
|  | 162 		Get the number of exons | 
|  | 163 		""" | 
|  | 164 		return max(1, len(self.exons)) | 
|  | 165 | 
|  | 166 | 
|  | 167 	def getExon(self, i): | 
|  | 168 		""" | 
|  | 169 		Get a specific exon | 
|  | 170 		@param i: the rank of the exon | 
|  | 171 		@type  i: int | 
|  | 172 		""" | 
|  | 173 		if len(self.exons) == 0: | 
|  | 174 			if i != 0: | 
|  | 175 				raise Exception("Cannot get exon #%i while there is no exon in the transcript" % (i)) | 
|  | 176 			return self | 
|  | 177 		return self.exons[i] | 
|  | 178 | 
|  | 179 | 
|  | 180 	def getExons(self): | 
|  | 181 		""" | 
|  | 182 		Get all the exons | 
|  | 183 		""" | 
|  | 184 		if len(self.exons) == 0: | 
|  | 185 			return [Interval(self)] | 
|  | 186 		return self.exons | 
|  | 187 | 
|  | 188 | 
|  | 189 	def getIntrons(self): | 
|  | 190 		""" | 
|  | 191 		Get all the introns | 
|  | 192 		Compute introns on the fly | 
|  | 193 		""" | 
|  | 194 		if self.introns != None: | 
|  | 195 			return self.introns | 
|  | 196 		self.sortExons() | 
|  | 197 		self.introns = [] | 
|  | 198 		exonStart = self.getExon(0) | 
|  | 199 		for cpt, exonEnd in enumerate(self.exons[1:]): | 
|  | 200 			intron = Interval() | 
|  | 201 			intron.setName("%s_intron%d" % (self.getName(), cpt+1)) | 
|  | 202 			intron.setChromosome(self.getChromosome()) | 
|  | 203 			intron.setDirection(self.getDirection()) | 
|  | 204 			if self.getDirection() == 1: | 
|  | 205 				intron.setEnd(exonEnd.getStart() - 1) | 
|  | 206 				intron.setStart(exonStart.getEnd() + 1) | 
|  | 207 			else: | 
|  | 208 				intron.setStart(exonEnd.getEnd() + 1) | 
|  | 209 				intron.setEnd(exonStart.getStart() - 1) | 
|  | 210 			intron.setDirection(self.getDirection()) | 
|  | 211 			if intron.getSize() > 0: | 
|  | 212 				self.introns.append(intron) | 
|  | 213 			exonStart = exonEnd | 
|  | 214 			intron.setSize(intron.getEnd() - intron.getStart() + 1) | 
|  | 215 		return self.introns | 
|  | 216 | 
|  | 217 | 
|  | 218 	def getSize(self): | 
|  | 219 		""" | 
|  | 220 		Get the size of the transcript (i.e. the number of nucleotides) | 
|  | 221 		Compute size on the fly | 
|  | 222 		""" | 
|  | 223 		if len(self.exons) == 0: | 
|  | 224 			return self.getSizeWithIntrons() | 
|  | 225 		size = 0 | 
|  | 226 		for exon in self.exons: | 
|  | 227 			size += exon.getSize() | 
|  | 228 		return size | 
|  | 229 | 
|  | 230 | 
|  | 231 	def getSizeWithIntrons(self): | 
|  | 232 		""" | 
|  | 233 		Get the size of the interval (i.e. distance from start to end) | 
|  | 234 		""" | 
|  | 235 		return super(Transcript, self).getSize() | 
|  | 236 | 
|  | 237 | 
|  | 238 	def overlapWithExon(self, transcript, nbNucleotides = 1): | 
|  | 239 		""" | 
|  | 240 		Check if the exons of this transcript overlap with the exons of another transcript | 
|  | 241 		@param transcript:		transcript to be compared to | 
|  | 242 		@type	transcript:		class L{Transcript<Transcript>} | 
|  | 243 		@param nbNucleotides: minimum number of nucleotides to declare and overlap | 
|  | 244 		@type	nbNucleotides: int | 
|  | 245 		""" | 
|  | 246 		if not self.overlapWith(transcript, nbNucleotides): | 
|  | 247 			return False | 
|  | 248 		for thisExon in self.getExons(): | 
|  | 249 			for thatExon in transcript.getExons(): | 
|  | 250 				if thisExon.overlapWith(thatExon, nbNucleotides): | 
|  | 251 					return True | 
|  | 252 		return False | 
|  | 253 | 
|  | 254 | 
|  | 255 	def include(self, transcript): | 
|  | 256 		""" | 
|  | 257 		Whether this transcript includes the other one | 
|  | 258 		@param transcript: object to be compared to | 
|  | 259 		@type  transcript: class L{Transcript<Transcript>} | 
|  | 260 		""" | 
|  | 261 		if not super(Transcript, self).include(transcript): | 
|  | 262 			return False | 
|  | 263 		for thatExon in transcript.getExons(): | 
|  | 264 			for thisExon in self.getExons(): | 
|  | 265 				if thisExon.include(thatExon): | 
|  | 266 					break | 
|  | 267 			else: | 
|  | 268 				return False | 
|  | 269 		return True | 
|  | 270 | 
|  | 271 | 
|  | 272 	def merge(self, transcript, normalization = False): | 
|  | 273 		""" | 
|  | 274 		Merge with another transcript | 
|  | 275 		Merge exons if they overlap, otherwise add exons | 
|  | 276 		@param transcript:		transcript to be merged to | 
|  | 277 		@type	transcript:		class L{Transcript<Transcript>} | 
|  | 278 		@param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements | 
|  | 279 		@type	normalization: boolean | 
|  | 280 		""" | 
|  | 281 		if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection(): | 
|  | 282 			raise Exception("Cannot merge '%s' with '%s'!" % (self, transcript)) | 
|  | 283 | 
|  | 284 		theseExons = self.getExons() | 
|  | 285 		thoseExons = transcript.getExons() | 
|  | 286 | 
|  | 287 		for thatExon in thoseExons: | 
|  | 288 			toBeRemoved = [] | 
|  | 289 			for thisIndex, thisExon in enumerate(theseExons): | 
|  | 290 				if thisExon.overlapWith(thatExon): | 
|  | 291 					thatExon.merge(thisExon) | 
|  | 292 					toBeRemoved.append(thisIndex) | 
|  | 293 			theseExons.append(thatExon) | 
|  | 294 			for thisIndex in reversed(toBeRemoved): | 
|  | 295 				del theseExons[thisIndex] | 
|  | 296 		self.removeExons() | 
|  | 297 		self.setStart(min(self.getStart(), transcript.getStart())) | 
|  | 298 		self.setEnd(max(self.getEnd(), transcript.getEnd())) | 
|  | 299 		if len(theseExons) > 1: | 
|  | 300 			for thisExon in theseExons: | 
|  | 301 				self.addExon(thisExon) | 
|  | 302 | 
|  | 303 		self.setName("%s--%s" % (self.getUniqueName(), transcript.getUniqueName())) | 
|  | 304 		super(Transcript, self).merge(transcript, normalization) | 
|  | 305 | 
|  | 306 | 
|  | 307 	def getDifference(self, transcript, sameStrand = False): | 
|  | 308 		""" | 
|  | 309 		Get the difference between this cluster and another one | 
|  | 310 		@param transcript: object to be compared to | 
|  | 311 		@type  transcript: class L{Transcript<Transcript>} | 
|  | 312 		@param sameStrand: do the comparison iff the transcripts are on the same strand | 
|  | 313 		@type  sameStrand: boolean | 
|  | 314 		@return:		   a transcript | 
|  | 315 		""" | 
|  | 316 		newTranscript = Transcript() | 
|  | 317 		newTranscript.copy(self) | 
|  | 318 		if self.getChromosome() != transcript.getChromosome(): | 
|  | 319 			return newTranscript | 
|  | 320 		if not self.overlapWith(transcript): | 
|  | 321 			return newTranscript | 
|  | 322 		if sameStrand and self.getDirection() != transcript.getDirection(): | 
|  | 323 			return newTranscript | 
|  | 324 		newTranscript.removeExons() | 
|  | 325 		if transcript.getEnd() > newTranscript.getStart(): | 
|  | 326 			newTranscript.setStart(transcript.getEnd() + 1) | 
|  | 327 		if transcript.getStart() < newTranscript.getEnd(): | 
|  | 328 			newTranscript.setEnd(transcript.getStart() + 1) | 
|  | 329 		theseExons = [] | 
|  | 330 		for exon in self.getExons(): | 
|  | 331 			exonCopy = Interval() | 
|  | 332 			exonCopy.copy(exon) | 
|  | 333 			theseExons.append(exonCopy) | 
|  | 334 		for thatExon in transcript.getExons(): | 
|  | 335 			newExons = [] | 
|  | 336 			for thisExon in theseExons: | 
|  | 337 				newExons.extend(thisExon.getDifference(thatExon)) | 
|  | 338 			theseExons = newExons | 
|  | 339 		if not theseExons: | 
|  | 340 			return None | 
|  | 341 		newStart, newEnd = theseExons[0].getStart(), theseExons[0].getEnd() | 
|  | 342 		for thisExon in theseExons[1:]: | 
|  | 343 			newStart = min(newStart, thisExon.getStart()) | 
|  | 344 			newEnd   = max(newEnd,   thisExon.getEnd()) | 
|  | 345 		newTranscript.setEnd(newEnd) | 
|  | 346 		newTranscript.setStart(newStart) | 
|  | 347 		newTranscript.exons = theseExons | 
|  | 348 		return newTranscript | 
|  | 349 | 
|  | 350 | 
|  | 351 	def getSqlVariables(cls): | 
|  | 352 		""" | 
|  | 353 		Get the properties of the object that should be saved in a database | 
|  | 354 		""" | 
|  | 355 		variables = Interval.getSqlVariables() | 
|  | 356 		variables.append("exons") | 
|  | 357 		return variables | 
|  | 358 	getSqlVariables = classmethod(getSqlVariables) | 
|  | 359 | 
|  | 360 | 
|  | 361 	def setSqlValues(self, array): | 
|  | 362 		""" | 
|  | 363 		Set the values of the properties of this object as given by a results line of a SQL query | 
|  | 364 		@param array: the values to be copied | 
|  | 365 		@type	array: a list | 
|  | 366 		""" | 
|  | 367 		super(Transcript, self).setSqlValues(array) | 
|  | 368 		mergedExons = array[8] | 
|  | 369 		if not mergedExons: | 
|  | 370 			return | 
|  | 371 		for exonCount, splittedExon in enumerate(mergedExons.split(",")): | 
|  | 372 			start, end = splittedExon.split("-") | 
|  | 373 			exon = Interval() | 
|  | 374 			exon.setChromosome(self.getChromosome()) | 
|  | 375 			exon.setDirection(self.getDirection()) | 
|  | 376 			exon.setName("%s_exon%d" % (self.getName(), exonCount+1)) | 
|  | 377 			exon.setStart(int(start)) | 
|  | 378 			exon.setEnd(int(end)) | 
|  | 379 			self.addExon(exon) | 
|  | 380 | 
|  | 381 | 
|  | 382 	def getSqlValues(self): | 
|  | 383 		""" | 
|  | 384 		Get the values of the properties that should be saved in a database | 
|  | 385 		""" | 
|  | 386 		values = super(Transcript, self).getSqlValues() | 
|  | 387 		values["size"] = self.getSize() | 
|  | 388 		if self.getNbExons() == 1: | 
|  | 389 			values["exons"] = "" | 
|  | 390 		else: | 
|  | 391 			values["exons"] = ",".join(["%d-%d" % (exon.getStart(), exon.getEnd()) for exon in self.getExons()]) | 
|  | 392 		return values | 
|  | 393 | 
|  | 394 | 
|  | 395 	def getSqlTypes(cls): | 
|  | 396 		""" | 
|  | 397 		Get the types of the properties that should be saved in a database | 
|  | 398 		""" | 
|  | 399 		types		  = Interval.getSqlTypes() | 
|  | 400 		types["exons"] = "varchar" | 
|  | 401 		return types | 
|  | 402 	getSqlTypes = classmethod(getSqlTypes) | 
|  | 403 | 
|  | 404 | 
|  | 405 	def getSqlSizes(cls): | 
|  | 406 		""" | 
|  | 407 		Get the sizes of the properties that should be saved in a database | 
|  | 408 		""" | 
|  | 409 		sizes		  = Interval.getSqlSizes() | 
|  | 410 		sizes["exons"] = 10000 | 
|  | 411 		return sizes | 
|  | 412 	getSqlSizes = classmethod(getSqlSizes) | 
|  | 413 | 
|  | 414 | 
|  | 415 	def sortExons(self): | 
|  | 416 		""" | 
|  | 417 		Sort the exons | 
|  | 418 		Increasing order if transcript is on strand "+", decreasing otherwise | 
|  | 419 		""" | 
|  | 420 		self.sortExonsIncreasing() | 
|  | 421 		if self.getDirection() == -1: | 
|  | 422 			exons = self.getExons() | 
|  | 423 			exons.reverse() | 
|  | 424 			self.exons = exons | 
|  | 425 | 
|  | 426 | 
|  | 427 	def sortExonsIncreasing(self): | 
|  | 428 		""" | 
|  | 429 		Sort the exons | 
|  | 430 		Increasing order | 
|  | 431 		""" | 
|  | 432 		exons = self.getExons() | 
|  | 433 		sortedExons = [] | 
|  | 434 		while len(exons) > 0: | 
|  | 435 			minExon = exons[0] | 
|  | 436 			for index in range(1, len(exons)): | 
|  | 437 				if minExon.getStart() > exons[index].getStart(): | 
|  | 438 					minExon = exons[index] | 
|  | 439 			sortedExons.append(minExon) | 
|  | 440 			exons.remove(minExon) | 
|  | 441 		self.exons = sortedExons | 
|  | 442 | 
|  | 443 | 
|  | 444 	def extendStart(self, size): | 
|  | 445 		""" | 
|  | 446 		Extend the transcript by the 5' end | 
|  | 447 		@param size: the size to be extended | 
|  | 448 		@type	size: int | 
|  | 449 		""" | 
|  | 450 		if len(self.exons) != 0: | 
|  | 451 			self.sortExons() | 
|  | 452 			if self.getDirection() == 1: | 
|  | 453 				self.exons[0].setStart(max(0, self.exons[0].getStart() - size)) | 
|  | 454 			else: | 
|  | 455 				self.exons[0].setEnd(self.exons[0].getEnd() + size) | 
|  | 456 		super(Transcript, self).extendStart(size) | 
|  | 457 		self.bin  = None | 
|  | 458 | 
|  | 459 | 
|  | 460 	def extendEnd(self, size): | 
|  | 461 		""" | 
|  | 462 		Extend the transcript by the 3' end | 
|  | 463 		@param size: the size to be extended | 
|  | 464 		@type	size: int | 
|  | 465 		""" | 
|  | 466 		if len(self.exons) != 0: | 
|  | 467 			self.sortExons() | 
|  | 468 			if self.getDirection() == 1: | 
|  | 469 				self.exons[-1].setEnd(self.exons[-1].getEnd() + size) | 
|  | 470 			else: | 
|  | 471 				self.exons[-1].setStart(max(0, self.exons[-1].getStart() - size)) | 
|  | 472 		super(Transcript, self).extendEnd(size) | 
|  | 473 		self.bin  = None | 
|  | 474 | 
|  | 475 | 
|  | 476 	def extendExons(self, size): | 
|  | 477 		""" | 
|  | 478 		Extend all the exons | 
|  | 479 		@param size: the size to be extended | 
|  | 480 		@type	size: int | 
|  | 481 		""" | 
|  | 482 		if len(self.exons) != 0: | 
|  | 483 			self.sortExons() | 
|  | 484 			exons = [] | 
|  | 485 			previousExon = None | 
|  | 486 			for exon in self.exons: | 
|  | 487 				exon.extendStart(size) | 
|  | 488 				exon.extendEnd(size) | 
|  | 489 				exon.setDirection(self.getDirection()) | 
|  | 490 				if previousExon != None and previousExon.overlapWith(exon): | 
|  | 491 					previousExon.merge(exon) | 
|  | 492 				else: | 
|  | 493 					if previousExon != None: | 
|  | 494 						exons.append(previousExon) | 
|  | 495 					previousExon = exon | 
|  | 496 			exons.append(previousExon) | 
|  | 497 			self.exons = exons | 
|  | 498 		super(Transcript, self).extendStart(size) | 
|  | 499 		super(Transcript, self).extendEnd(size) | 
|  | 500 		self.bin  = None | 
|  | 501 | 
|  | 502 | 
|  | 503 	def restrictStart(self, size = 1): | 
|  | 504 		""" | 
|  | 505 		Restrict the transcript by some nucleotides, start from its start position | 
|  | 506 		Remove the exons | 
|  | 507 		@param size: the size to be restricted to | 
|  | 508 		@type  size: int | 
|  | 509 		""" | 
|  | 510 		newExons = [] | 
|  | 511 		if self.getDirection() == 1: | 
|  | 512 			for exon in self.exons: | 
|  | 513 				if exon.getStart() <= self.getStart() + size - 1: | 
|  | 514 					if exon.getEnd() > self.getStart() + size - 1: | 
|  | 515 						exon.setEnd(self.getStart() + size - 1) | 
|  | 516 					newExons.append(exon) | 
|  | 517 		else: | 
|  | 518 			for exon in self.exons: | 
|  | 519 				if exon.getEnd() >= self.getEnd() - size + 1: | 
|  | 520 					if exon.getStart() < self.getEnd() - size + 1: | 
|  | 521 						exon.setStart(self.getEnd() - size + 1) | 
|  | 522 					newExons.append(exon) | 
|  | 523 		super(Transcript, self).restrictStart(size) | 
|  | 524 		self.exons = newExons | 
|  | 525 | 
|  | 526 | 
|  | 527 	def restrictEnd(self, size = 1): | 
|  | 528 		""" | 
|  | 529 		Restrict the transcript by some nucleotides, end from its end position | 
|  | 530 		Remove the exons | 
|  | 531 		@param size: the size to be restricted to | 
|  | 532 		@type  size: int | 
|  | 533 		""" | 
|  | 534 		newExons = [] | 
|  | 535 		if self.getDirection() == 1: | 
|  | 536 			for exon in self.exons: | 
|  | 537 				if exon.getEnd() >= self.getEnd() - size + 1: | 
|  | 538 					if exon.getStart() < self.getEnd() - size + 1: | 
|  | 539 						exon.setStart(self.getEnd() - size + 1) | 
|  | 540 					newExons.append(exon) | 
|  | 541 		else: | 
|  | 542 			for exon in self.exons: | 
|  | 543 				if exon.getEnd() >= self.getEnd() - size + 1: | 
|  | 544 					if exon.getStart() < self.getEnd() - size + 1: | 
|  | 545 						exon.setEnd(self.getEnd() - size + 1) | 
|  | 546 					newExons.append(exon) | 
|  | 547 		super(Transcript, self).restrictEnd(size) | 
|  | 548 		self.exons = newExons | 
|  | 549 | 
|  | 550 | 
|  | 551 	def removeExons(self): | 
|  | 552 		""" | 
|  | 553 		Remove the exons and transforms the current transcript into a mere interval | 
|  | 554 		""" | 
|  | 555 		self.exons = [] | 
|  | 556 		self.bin   = None | 
|  | 557 | 
|  | 558 | 
|  | 559 	def printGtf(self, title): | 
|  | 560 		""" | 
|  | 561 		Export this transcript using GTF2.2 format | 
|  | 562 		@param title: the title of the transcripts | 
|  | 563 		@type title: string | 
|  | 564 		@return:	 a string | 
|  | 565 		""" | 
|  | 566 		transcriptId = self.getUniqueName() | 
|  | 567 		geneId	   = "%s_gene" % (transcriptId) | 
|  | 568 		direction	= "+" | 
|  | 569 		if self.getDirection() == -1: | 
|  | 570 			direction = "-" | 
|  | 571 		self.sortExonsIncreasing() | 
|  | 572 		string = "" | 
|  | 573 		for i, exon in enumerate(self.getExons()): | 
|  | 574 			exonCopy = Interval() | 
|  | 575 			exonCopy.copy(exon) | 
|  | 576 			if "ID" in exonCopy.getTagValues(): | 
|  | 577 				del exonCopy.tags["ID"] | 
|  | 578 			feature = "exon" | 
|  | 579 			if "feature" in exonCopy.getTagNames(): | 
|  | 580 				feature = exonCopy.getTagValue("feature") | 
|  | 581 				del exonCopy.tags["feature"] | 
|  | 582 			score = "." | 
|  | 583 			if "score" in exonCopy.getTagNames(): | 
|  | 584 				score = "%d" % (int(exonCopy.getTagValue("score"))) | 
|  | 585 				del exonCopy.tags["score"] | 
|  | 586 			if "Parent" in exonCopy.getTagNames(): | 
|  | 587 				del exonCopy.tags["Parent"] | 
|  | 588 			exonCopy.setName("%s_part%d" % (self.getName(), i+1)) | 
|  | 589 			comment = exonCopy.getTagValues("; ", " ", "\"") | 
|  | 590 			string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\ttranscript_id \"%s\"; gene_id \"%s\"; %s\n" % (exonCopy.getChromosome(), title, feature, exonCopy.getStart(), exonCopy.getEnd(), score, direction, transcriptId, geneId, comment) | 
|  | 591 		return string | 
|  | 592 | 
|  | 593 | 
|  | 594 	def printGff2(self, title): | 
|  | 595 		""" | 
|  | 596 		Export this transcript using GFF2 format | 
|  | 597 		@param title: the title of the transcripts | 
|  | 598 		@type  title: string | 
|  | 599 		@return: a string | 
|  | 600 		""" | 
|  | 601 		direction = "+" | 
|  | 602 		if self.getDirection() == -1: | 
|  | 603 			direction = "-" | 
|  | 604 		self.sortExonsIncreasing() | 
|  | 605 		comment = self.getTagValues() | 
|  | 606 		if comment != None: | 
|  | 607 			comment = ";%s" % (comment) | 
|  | 608 		score = "." | 
|  | 609 		if "score" in self.getTagNames(): | 
|  | 610 			score = "%d" % (int(self.getTagValue("score"))) | 
|  | 611 		feature = "transcript" | 
|  | 612 		if "feature" in self.getTagNames(): | 
|  | 613 			feature = self.getTagValue("feature") | 
|  | 614 		string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tGENE %s%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, self.name, comment) | 
|  | 615 		for exon in self.getExons(): | 
|  | 616 			if "score" in exon.getTagNames(): | 
|  | 617 				score = "%d" % (int(self.getTagValue("score"))) | 
|  | 618 			string += "%s\t%s\t_exon\t%d\t%d\t%s\t%s\t.\tGENE %s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.name) | 
|  | 619 		return string | 
|  | 620 | 
|  | 621 | 
|  | 622 	def printGff3(self, title): | 
|  | 623 		""" | 
|  | 624 		Export this transcript using GFF3 format | 
|  | 625 		@param title: the title of the transcripts | 
|  | 626 		@type title: string | 
|  | 627 		@return: a string | 
|  | 628 		""" | 
|  | 629 		direction = "+" | 
|  | 630 		if self.getDirection() == -1: | 
|  | 631 			direction = "-" | 
|  | 632 		self.sortExonsIncreasing() | 
|  | 633 		if "ID" not in self.getTagValues(): | 
|  | 634 			self.setTagValue("ID", self.getUniqueName()) | 
|  | 635 		feature = "transcript" | 
|  | 636 		tags = self.tags | 
|  | 637 		if "feature" in self.getTagNames(): | 
|  | 638 			feature = self.getTagValue("feature") | 
|  | 639 			del self.tags["feature"] | 
|  | 640 		score = "." | 
|  | 641 		if "score" in self.getTagNames(): | 
|  | 642 			score = "%d" % (int(self.getTagValue("score"))) | 
|  | 643 			del self.tags["score"] | 
|  | 644 		comment = self.getTagValues(";", "=") | 
|  | 645 		string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, comment) | 
|  | 646 		if len(self.exons) > 1: | 
|  | 647 			for i, exon in enumerate(self.getExons()): | 
|  | 648 				if "score" in exon.getTagNames(): | 
|  | 649 					score = "%d" % (int(exon.getTagValue("score"))) | 
|  | 650 				string += "%s\t%s\texon\t%d\t%d\t%s\t%s\t.\tID=%s-exon%d;Name=%s-exon%d;Parent=%s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.getTagValue("ID"), i+1, self.name, i+1, self.getTagValue("ID")) | 
|  | 651 		self.tags = tags | 
|  | 652 		return string | 
|  | 653 | 
|  | 654 | 
|  | 655 	def printEmbl(self): | 
|  | 656 		""" | 
|  | 657 		Export this transcript using EMBL format | 
|  | 658 		@return: a string | 
|  | 659 		""" | 
|  | 660 		if len(self.exons) <= 1: | 
|  | 661 			position = "%d..%d" % (self.getStart(), self.getEnd()) | 
|  | 662 		else: | 
|  | 663 			positions = [] | 
|  | 664 			for exon in self.getExons(): | 
|  | 665 				positions.append("%d..%d" % (self.getStart(), self.getEnd())) | 
|  | 666 			position = ",".join(positions) | 
|  | 667 			position = "join(%s)" % (position) | 
|  | 668 		if self.getDirection() == -1: | 
|  | 669 			position = "complement(%s)" % (position) | 
|  | 670 		feature = "misc_feature" | 
|  | 671 		if "feature" in self.getTagNames(): | 
|  | 672 			if not self.getTagValue("feature").startswith("S-MART"): | 
|  | 673 				feature = self.getTagValue("feature") | 
|  | 674 		string = "FT %s %s\n" % (feature, position) | 
|  | 675 		if "Name" in self.getTagNames(): | 
|  | 676 			string += "FT /label=\"%s\"\n" % (self.getTagValue("Name")) | 
|  | 677 		return string | 
|  | 678 | 
|  | 679 | 
|  | 680 	def printBed(self): | 
|  | 681 		""" | 
|  | 682 		Export this transcript using BED format | 
|  | 683 		@return: a string | 
|  | 684 		""" | 
|  | 685 		name = self.name | 
|  | 686 		if "nbOccurrences" in self.getTagNames() and self.getTagValue("nbOccurrences") != 1 and self.getTagValue("occurrences"): | 
|  | 687 			name = "%s-%d" % (name, self.getTagValue("occurrence")) | 
|  | 688 		comment = self.getTagValues(";", "=") | 
|  | 689 		sizes   = [] | 
|  | 690 		starts  = [] | 
|  | 691 		direction = "+" | 
|  | 692 		if self.getDirection() == -1: | 
|  | 693 			direction = "-" | 
|  | 694 		self.sortExonsIncreasing() | 
|  | 695 		for exon in self.getExons(): | 
|  | 696 			sizes.append("%d" % (exon.getSize())) | 
|  | 697 			starts.append("%d" % (exon.getStart() - self.getStart())) | 
|  | 698 		return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome(), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts)) | 
|  | 699 | 
|  | 700 | 
|  | 701 	def printSam(self): | 
|  | 702 		""" | 
|  | 703 		Export this transcript using SAM format | 
|  | 704 		@return: a string | 
|  | 705 		""" | 
|  | 706 		name			= self.name | 
|  | 707 		flag			= 0 if self.getDirection() == 1 else 0x10 | 
|  | 708 		chromosome	  = self.getChromosome() | 
|  | 709 		genomeStart	 = self.getStart() | 
|  | 710 		quality		 = 255 | 
|  | 711 		mate			= "*" | 
|  | 712 		mateGenomeStart = 0 | 
|  | 713 		gapSize		 = 0 | 
|  | 714 		sequence		= "*" | 
|  | 715 		qualityString   = "*" | 
|  | 716 		tags			= "NM:i:0" | 
|  | 717 | 
|  | 718 		lastExonEnd = None | 
|  | 719 		self.sortExonsIncreasing() | 
|  | 720 		exon  = self.getExons()[0] | 
|  | 721 		cigar = "%dM" % (self.getExons()[0].getSize()) | 
|  | 722 		lastExonEnd = exon.getEnd() | 
|  | 723 		for i, exon in enumerate(self.getExons()): | 
|  | 724 			if i == 0: | 
|  | 725 				continue | 
|  | 726 			cigar += "%dN" % (exon.getStart() - lastExonEnd - 1) | 
|  | 727 			cigar += "%dM" % (exon.getSize()) | 
|  | 728 | 
|  | 729 		return "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n" % (name, flag, chromosome, genomeStart, quality, cigar, mate, mateGenomeStart, gapSize, sequence, qualityString, tags) | 
|  | 730 | 
|  | 731 | 
|  | 732 	def printUcsc(self): | 
|  | 733 		""" | 
|  | 734 		Export this transcript using UCSC BED format | 
|  | 735 		@return: a string | 
|  | 736 		""" | 
|  | 737 		if self.getChromosome().find("Het") != -1: | 
|  | 738 			return "" | 
|  | 739 		name	  = self.name | 
|  | 740 		comment   = self.getTagValues(";", "") | 
|  | 741 		sizes	 = [] | 
|  | 742 		starts	= [] | 
|  | 743 		direction = "+" | 
|  | 744 		if self.getDirection() == -1: | 
|  | 745 			direction = "-" | 
|  | 746 		self.sortExonsIncreasing() | 
|  | 747 		for exon in self.getExons(): | 
|  | 748 			sizes.append("%d" % (exon.getSize())) | 
|  | 749 			starts.append("%d" % (exon.getStart() - self.getStart())) | 
|  | 750 		return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome().replace("arm_", "chr"), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts)) | 
|  | 751 | 
|  | 752 | 
|  | 753 	def printGBrowseReference(self): | 
|  | 754 		""" | 
|  | 755 		Export this transcript using GBrowse format (1st line only) | 
|  | 756 		@return: a string | 
|  | 757 		""" | 
|  | 758 		return "reference = %s\n" % (self.getChromosome()) | 
|  | 759 | 
|  | 760 | 
|  | 761 	def printGBrowseLine(self): | 
|  | 762 		""" | 
|  | 763 		Export this transcript using GBrowse format (2nd line only) | 
|  | 764 		@return: a string | 
|  | 765 		""" | 
|  | 766 		self.sortExons() | 
|  | 767 		coordinates = [] | 
|  | 768 		for exon in self.getExons(): | 
|  | 769 			coordinates.append(exon.printCoordinates()) | 
|  | 770 		coordinatesString = ",".join(coordinates) | 
|  | 771 		comment = self.getTagValues(";", "=") | 
|  | 772 		if comment: | 
|  | 773 			comment = "\t\"%s\"" % (comment) | 
|  | 774 		return "User_data\t%s\t%s%s\n" % (self.name, coordinatesString, comment) | 
|  | 775 | 
|  | 776 | 
|  | 777 	def printGBrowse(self): | 
|  | 778 		""" | 
|  | 779 		Export this transcript using GBrowse format | 
|  | 780 		@return: a string | 
|  | 781 		""" | 
|  | 782 		return "%s%s" % (self.printGBrowseReference(), self.printGBrowseLine()) | 
|  | 783 | 
|  | 784 | 
|  | 785 	def printCsv(self): | 
|  | 786 		""" | 
|  | 787 		Export this transcript using CSV format | 
|  | 788 		@return: a string | 
|  | 789 		""" | 
|  | 790 		self.sortExons() | 
|  | 791 		string = "%s,%d,%d,\"%s\"," % (self.getChromosome(), self.getStart(), self.getEnd(), "+" if self.getDirection() == 1 else "-") | 
|  | 792 		if len(self.getExons()) == 1: | 
|  | 793 			string += "None" | 
|  | 794 		else: | 
|  | 795 			for exon in self.getExons(): | 
|  | 796 				string += "%d-%d " % (exon.getStart(), exon.getEnd()) | 
|  | 797 		for tag in sorted(self.tags.keys()): | 
|  | 798 			string += ",%s=%s" % (tag, str(self.tags[tag])) | 
|  | 799 		string += "\n" | 
|  | 800 		return string | 
|  | 801 | 
|  | 802 | 
|  | 803 	def extractSequence(self, parser): | 
|  | 804 		""" | 
|  | 805 		Get the sequence corresponding to this transcript | 
|  | 806 		@param parser: a parser to a FASTA file | 
|  | 807 		@type  parser: class L{SequenceListParser<SequenceListParser>} | 
|  | 808 		@return:	   an instance of L{Sequence<Sequence>} | 
|  | 809 		""" | 
|  | 810 		self.sortExons() | 
|  | 811 		name = self.name | 
|  | 812 		if "ID" in self.getTagNames() and self.getTagValue("ID") != self.name: | 
|  | 813 			name += ":%s" % (self.getTagValue("ID")) | 
|  | 814 		sequence = Sequence(name) | 
|  | 815 		for exon in self.getExons(): | 
|  | 816 			sequence.concatenate(exon.extractSequence(parser)) | 
|  | 817 		return sequence | 
|  | 818 | 
|  | 819 | 
|  | 820 	def extractWigData(self, parser): | 
|  | 821 		""" | 
|  | 822 		Get some wig data corresponding to this transcript | 
|  | 823 		@param parser: a parser to a wig file | 
|  | 824 		@type  parser: class L{WigParser<WigParser>} | 
|  | 825 		@return: a sequence of float | 
|  | 826 		""" | 
|  | 827 		self.sortExons() | 
|  | 828 		if parser.strands: | 
|  | 829 			strands = (-1, 1) | 
|  | 830 			values  = dict([(strand, []) for strand in strands]) | 
|  | 831 			for exon in self.getExons(): | 
|  | 832 				theseValues = exon.extractWigData(parser) | 
|  | 833 				if self.getDirection() == -1: | 
|  | 834 					for strand in strands: | 
|  | 835 						theseValues[strand].reverse() | 
|  | 836 				for strand in strands: | 
|  | 837 					values[strand].extend(theseValues[strand]) | 
|  | 838 			if self.getDirection() == -1: | 
|  | 839 				for strand in strands: | 
|  | 840 					values[strand].reverse() | 
|  | 841 			return values | 
|  | 842 		else: | 
|  | 843 			values = [] | 
|  | 844 			for exon in self.getExons(): | 
|  | 845 				theseValues = exon.extractWigData(parser) | 
|  | 846 				#if self.getDirection() == -1: | 
|  | 847 				#	theseValues.reverse() | 
|  | 848 				values.extend(theseValues) | 
|  | 849 			#if self.getDirection() == -1: | 
|  | 850 			#	values.reverse() | 
|  | 851 			return values |