Mercurial > repos > yufei-luo > s_mart

#
# Copyright INRA-URGI 2009-2010
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
#
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
import sys
from SMART.Java.Python.structure.Interval import Interval
from SMART.Java.Python.structure.Sequence import Sequence


class Transcript(Interval):
	"""
	A class that models an transcript, considered as a specialized interval (the bounds of the transcript) that contains exons (also represented as intervals)
	@ivar exons: a list of exons (intervals)
	@type exons: list of L{Interval{Interval}}
	"""

	def __init__(self, transcript = None, verbosity = 0):
		"""
		Constructor
		@param transcript: transcript to be copied
		@type  transcript: class L{Transcript<Transcript>}
		@param verbosity:  verbosity
		@type  verbosity:  int
		"""
		super(Transcript, self).__init__(None, verbosity)
		self.exons   = []
		self.introns = None
		if transcript != None:
			self.copy(transcript)


	def copy(self, transcript):
		"""
		Copy method
		@param transcript: transcript to be copied
		@type	transcript: class L{Transcript<Transcript>} or L{Interval<Interval>}
		"""
		super(Transcript, self).copy(transcript)
		if transcript.__class__.__name__ == "Transcript":
			exons = transcript.getExons()
			if len(exons) > 1:
				for exon in exons:
					exonCopy = Interval(exon)
					self.addExon(exonCopy)


	def setDirection(self, direction):
		"""
		Set the direction of the interval
		Possibly parse different formats
		Impact all exons
		@param direction: direction of the transcript (+ / -)
		@type	direction: int or string
		"""
		super(Transcript, self).setDirection(direction)
		for exon in self.exons:
			exon.setDirection(direction)


	def setChromosome(self, chromosome):
		"""
		Set the chromosome
		@param chromosome: chromosome on which the transcript is
		@type  chromosome: string
		"""
		super(Transcript, self).setChromosome(chromosome)
		for exon in self.exons:
			exon.setChromosome(chromosome)


	def addExon(self, exon):
		"""
		Add an exon to the list of exons
		@param exon: a new exon
		@type  exon: class L{Interval<Interval>}
		"""
		if not self.exons and not exon.overlapWith(self):
			firstExon = Interval()
			firstExon.setStart(self.getStart())
			firstExon.setEnd(self.getEnd())
			firstExon.setDirection(self.getDirection())
			firstExon.setChromosome(self.getChromosome())
			self.exons.append(firstExon)
		newExon = Interval(exon)
		newExon.setDirection(self.getDirection())
		self.exons.append(newExon)
		if newExon.getStart() < self.getStart():
			self.setStart(newExon.getStart())
		if newExon.getEnd() > self.getEnd():
			self.setEnd(newExon.getEnd())


	def setStart(self, start):
		"""
		Set the new start, move the first exon accordingly (if exists)
		@param start: the new start
		@type  start: int
		"""
		super(Transcript, self).setStart(start)
		if self.exons:
			self.sortExonsIncreasing()
			self.exons[0].setStart(start)


	def setEnd(self, end):
		"""
		Set the new end, move the last exon accordingly (if exists)
		@param end: the new end
		@type  end: int
		"""
		super(Transcript, self).setEnd(end)
		if self.exons:
			self.sortExonsIncreasing()
			self.exons[-1].setEnd(end)


	def reverse(self):
		"""
		Reverse the strand of the transcript
		"""
		super(Transcript, self).reverse()
		for exon in self.exons:
			exon.reverse()


	def getUniqueName(self):
		"""
		Try to give a unique name by possibly adding occurrence
		"""
		if "nbOccurrences" in self.tags and "occurrence" in self.tags and self.tags["nbOccurrences"] != 1:
			return "%s-%d" % (self.name, self.tags["occurrence"])
		return self.name


	def getNbExons(self):
		"""
		Get the number of exons
		"""
		return max(1, len(self.exons))


	def getExon(self, i):
		"""
		Get a specific exon
		@param i: the rank of the exon
		@type  i: int
		"""
		if len(self.exons) == 0:
			if i != 0:
				raise Exception("Cannot get exon #%i while there is no exon in the transcript" % (i))
			return self
		return self.exons[i]


	def getExons(self):
		"""
		Get all the exons
		"""
		if len(self.exons) == 0:
			return [Interval(self)]
		return self.exons


	def getIntrons(self):
		"""
		Get all the introns
		Compute introns on the fly
		"""
		if self.introns != None:
			return self.introns
		self.sortExons()
		self.introns = []
		exonStart = self.getExon(0)
		for cpt, exonEnd in enumerate(self.exons[1:]):
			intron = Interval()
			intron.setName("%s_intron%d" % (self.getName(), cpt+1))
			intron.setChromosome(self.getChromosome())
			intron.setDirection(self.getDirection())
			if self.getDirection() == 1:
				intron.setEnd(exonEnd.getStart() - 1)
				intron.setStart(exonStart.getEnd() + 1)
			else:
				intron.setStart(exonEnd.getEnd() + 1)
				intron.setEnd(exonStart.getStart() - 1)
			intron.setDirection(self.getDirection())
			if intron.getSize() > 0:
				self.introns.append(intron)
			exonStart = exonEnd
			intron.setSize(intron.getEnd() - intron.getStart() + 1)
		return self.introns


	def getSize(self):
		"""
		Get the size of the transcript (i.e. the number of nucleotides)
		Compute size on the fly
		"""
		if len(self.exons) == 0:
			return self.getSizeWithIntrons()
		size = 0
		for exon in self.exons:
			size += exon.getSize()
		return size


	def getSizeWithIntrons(self):
		"""
		Get the size of the interval (i.e. distance from start to end)
		"""
		return super(Transcript, self).getSize()


	def overlapWithExon(self, transcript, nbNucleotides = 1):
		"""
		Check if the exons of this transcript overlap with the exons of another transcript
		@param transcript:		transcript to be compared to
		@type	transcript:		class L{Transcript<Transcript>}
		@param nbNucleotides: minimum number of nucleotides to declare and overlap
		@type	nbNucleotides: int
		"""
		if not self.overlapWith(transcript, nbNucleotides):
			return False
		for thisExon in self.getExons():
			for thatExon in transcript.getExons():
				if thisExon.overlapWith(thatExon, nbNucleotides):
					return True
		return False


	def include(self, transcript):
		"""
		Whether this transcript includes the other one
		@param transcript: object to be compared to
		@type  transcript: class L{Transcript<Transcript>}
		"""
		if not super(Transcript, self).include(transcript):
			return False
		for thatExon in transcript.getExons():
			for thisExon in self.getExons():
				if thisExon.include(thatExon):
					break
			else:
				return False
		return True


	def merge(self, transcript, normalization = False):
		"""
		Merge with another transcript
		Merge exons if they overlap, otherwise add exons
		@param transcript:		transcript to be merged to
		@type	transcript:		class L{Transcript<Transcript>}
		@param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements
		@type	normalization: boolean
		"""
		if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection():
			raise Exception("Cannot merge '%s' with '%s'!" % (self, transcript))

		theseExons = self.getExons()
		thoseExons = transcript.getExons()

		for thatExon in thoseExons:
			toBeRemoved = []
			for thisIndex, thisExon in enumerate(theseExons):
				if thisExon.overlapWith(thatExon):
					thatExon.merge(thisExon)
					toBeRemoved.append(thisIndex)
			theseExons.append(thatExon)
			for thisIndex in reversed(toBeRemoved):
				del theseExons[thisIndex]
		self.removeExons()
		self.setStart(min(self.getStart(), transcript.getStart()))
		self.setEnd(max(self.getEnd(), transcript.getEnd()))
		if len(theseExons) > 1:
			for thisExon in theseExons:
				self.addExon(thisExon)

		self.setName("%s--%s" % (self.getUniqueName(), transcript.getUniqueName()))
		super(Transcript, self).merge(transcript, normalization)


	def getDifference(self, transcript, sameStrand = False):
		"""
		Get the difference between this cluster and another one
		@param transcript: object to be compared to
		@type  transcript: class L{Transcript<Transcript>}
		@param sameStrand: do the comparison iff the transcripts are on the same strand
		@type  sameStrand: boolean
		@return:		   a transcript
		"""
		newTranscript = Transcript()
		newTranscript.copy(self)
		if self.getChromosome() != transcript.getChromosome():
			return newTranscript
		if not self.overlapWith(transcript):
			return newTranscript
		if sameStrand and self.getDirection() != transcript.getDirection():
			return newTranscript
		newTranscript.removeExons()
		if transcript.getEnd() > newTranscript.getStart():
			newTranscript.setStart(transcript.getEnd() + 1)
		if transcript.getStart() < newTranscript.getEnd():
			newTranscript.setEnd(transcript.getStart() + 1)
		theseExons = []
		for exon in self.getExons():
			exonCopy = Interval()
			exonCopy.copy(exon)
			theseExons.append(exonCopy)
		for thatExon in transcript.getExons():
			newExons = []
			for thisExon in theseExons:
				newExons.extend(thisExon.getDifference(thatExon))
			theseExons = newExons
		if not theseExons:
			return None
		newStart, newEnd = theseExons[0].getStart(), theseExons[0].getEnd()
		for thisExon in theseExons[1:]:
			newStart = min(newStart, thisExon.getStart())
			newEnd   = max(newEnd,   thisExon.getEnd())
		newTranscript.setEnd(newEnd)
		newTranscript.setStart(newStart)
		newTranscript.exons = theseExons
		return newTranscript


	def getIntersection(self, transcript):
		"""
		Get the intersection between this transcript and another one
		@param transcript: object to be compared to
		@type  transcript: class L{Transcript<Transcript>}
		@return:           an other transcript
		"""
		if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection():
			return None
		newTranscript = Transcript()
		newTranscript.setDirection(self.getDirection())
		newTranscript.setChromosome(self.getChromosome())
		newTranscript.setName("%s_intersect_%s" % (self.getName(), transcript.getName()))
		newExons = []
		for thisExon in self.getExons():
			for thatExon in transcript.getExons():
				newExon = thisExon.getIntersection(thatExon)
				if newExon != None:
					newExons.append(newExon)
		if not newExons:
			return None
		newTranscript.exons = newExons
		return newTranscript


	def getSqlVariables(cls):
		"""
		Get the properties of the object that should be saved in a database
		"""
		variables = Interval.getSqlVariables()
		variables.append("exons")
		return variables
	getSqlVariables = classmethod(getSqlVariables)


	def setSqlValues(self, array):
		"""
		Set the values of the properties of this object as given by a results line of a SQL query
		@param array: the values to be copied
		@type	array: a list
		"""
		super(Transcript, self).setSqlValues(array)
		mergedExons = array[8]
		if not mergedExons:
			return
		for exonCount, splittedExon in enumerate(mergedExons.split(",")):
			start, end = splittedExon.split("-")
			exon = Interval()
			exon.setChromosome(self.getChromosome())
			exon.setDirection(self.getDirection())
			exon.setName("%s_exon%d" % (self.getName(), exonCount+1))
			exon.setStart(int(start))
			exon.setEnd(int(end))
			self.addExon(exon)


	def getSqlValues(self):
		"""
		Get the values of the properties that should be saved in a database
		"""
		values = super(Transcript, self).getSqlValues()
		values["size"] = self.getSize()
		if self.getNbExons() == 1:
			values["exons"] = ""
		else:
			values["exons"] = ",".join(["%d-%d" % (exon.getStart(), exon.getEnd()) for exon in self.getExons()])
		return values


	def getSqlTypes(cls):
		"""
		Get the types of the properties that should be saved in a database
		"""
		types		  = Interval.getSqlTypes()
		types["exons"] = "varchar"
		return types
	getSqlTypes = classmethod(getSqlTypes)


	def getSqlSizes(cls):
		"""
		Get the sizes of the properties that should be saved in a database
		"""
		sizes		  = Interval.getSqlSizes()
		sizes["exons"] = 10000
		return sizes
	getSqlSizes = classmethod(getSqlSizes)


	def sortExons(self):
		"""
		Sort the exons
		Increasing order if transcript is on strand "+", decreasing otherwise
		"""
		self.sortExonsIncreasing()
		if self.getDirection() == -1:
			exons = self.getExons()
			exons.reverse()
			self.exons = exons


	def sortExonsIncreasing(self):
		"""
		Sort the exons
		Increasing order
		"""
		exons = self.getExons()
		sortedExons = []
		while len(exons) > 0:
			minExon = exons[0]
			for index in range(1, len(exons)):
				if minExon.getStart() > exons[index].getStart():
					minExon = exons[index]
			sortedExons.append(minExon)
			exons.remove(minExon)
		self.exons = sortedExons


	def extendStart(self, size):
		"""
		Extend the transcript by the 5' end
		@param size: the size to be extended
		@type	size: int
		"""
		if len(self.exons) != 0:
			self.sortExons()
			if self.getDirection() == 1:
				self.exons[0].setStart(max(0, self.exons[0].getStart() - size))
			else:
				self.exons[0].setEnd(self.exons[0].getEnd() + size)
		super(Transcript, self).extendStart(size)
		self.bin  = None


	def extendEnd(self, size):
		"""
		Extend the transcript by the 3' end
		@param size: the size to be extended
		@type	size: int
		"""
		if len(self.exons) != 0:
			self.sortExons()
			if self.getDirection() == 1:
				self.exons[-1].setEnd(self.exons[-1].getEnd() + size)
			else:
				self.exons[-1].setStart(max(0, self.exons[-1].getStart() - size))
		super(Transcript, self).extendEnd(size)
		self.bin  = None


	def extendExons(self, size):
		"""
		Extend all the exons
		@param size: the size to be extended
		@type	size: int
		"""
		if len(self.exons) != 0:
			self.sortExons()
			exons = []
			previousExon = None
			for exon in self.exons:
				exon.extendStart(size)
				exon.extendEnd(size)
				exon.setDirection(self.getDirection())
				if previousExon != None and previousExon.overlapWith(exon):
					previousExon.merge(exon)
				else:
					if previousExon != None:
						exons.append(previousExon)
					previousExon = exon
			exons.append(previousExon)
			self.exons = exons
		super(Transcript, self).extendStart(size)
		super(Transcript, self).extendEnd(size)
		self.bin  = None


	def restrictStart(self, size = 1):
		"""
		Restrict the transcript by some nucleotides, start from its start position
		Remove the exons
		@param size: the size to be restricted to
		@type  size: int
		"""
		newExons = []
		if self.getDirection() == 1:
			for exon in self.exons:
				if exon.getStart() <= self.getStart() + size - 1:
					if exon.getEnd() > self.getStart() + size - 1:
						exon.setEnd(self.getStart() + size - 1)
					newExons.append(exon)
		else:
			for exon in self.exons:
				if exon.getEnd() >= self.getEnd() - size + 1:
					if exon.getStart() < self.getEnd() - size + 1:
						exon.setStart(self.getEnd() - size + 1)
					newExons.append(exon)
		super(Transcript, self).restrictStart(size)
		self.exons = newExons


	def restrictEnd(self, size = 1):
		"""
		Restrict the transcript by some nucleotides, end from its end position
		Remove the exons
		@param size: the size to be restricted to
		@type  size: int
		"""
		newExons = []
		if self.getDirection() == 1:
			for exon in self.exons:
				if exon.getEnd() >= self.getEnd() - size + 1:
					if exon.getStart() < self.getEnd() - size + 1:
						exon.setStart(self.getEnd() - size + 1)
					newExons.append(exon)
		else:
			for exon in self.exons:
				if exon.getEnd() >= self.getEnd() - size + 1:
					if exon.getStart() < self.getEnd() - size + 1:
						exon.setEnd(self.getEnd() - size + 1)
					newExons.append(exon)
		super(Transcript, self).restrictEnd(size)
		self.exons = newExons


	def removeExons(self):
		"""
		Remove the exons and transforms the current transcript into a mere interval
		"""
		self.exons = []
		self.bin   = None


	def printGtf(self, title):
		"""
		Export this transcript using GTF2.2 format
		@param title: the title of the transcripts
		@type title: string
		@return:	 a string
		"""
		transcriptId = self.getUniqueName()
		geneId	   = "%s_gene" % (transcriptId)
		direction	= "+"
		if self.getDirection() == -1:
			direction = "-"
		self.sortExonsIncreasing()
		string = ""
		for i, exon in enumerate(self.getExons()):
			exonCopy = Interval()
			exonCopy.copy(exon)
			if "ID" in exonCopy.getTagValues():
				del exonCopy.tags["ID"]
			feature = "exon"
			if "feature" in exonCopy.getTagNames():
				feature = exonCopy.getTagValue("feature")
				del exonCopy.tags["feature"]
			score = "."
			if "score" in exonCopy.getTagNames():
				score = "%d" % (int(exonCopy.getTagValue("score")))
				del exonCopy.tags["score"]
			if "Parent" in exonCopy.getTagNames():
				del exonCopy.tags["Parent"]
			exonCopy.setName("%s_part%d" % (self.getName(), i+1))
			comment = exonCopy.getTagValues("; ", " ", "\"")
			string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\ttranscript_id \"%s\"; gene_id \"%s\"; %s\n" % (exonCopy.getChromosome(), title, feature, exonCopy.getStart(), exonCopy.getEnd(), score, direction, transcriptId, geneId, comment)
		return string


	def printGff2(self, title):
		"""
		Export this transcript using GFF2 format
		@param title: the title of the transcripts
		@type  title: string
		@return: a string
		"""
		direction = "+"
		if self.getDirection() == -1:
			direction = "-"
		self.sortExonsIncreasing()
		comment = self.getTagValues()
		if comment != None:
			comment = ";%s" % (comment)
		score = "."
		if "score" in self.getTagNames():
			score = "%d" % (int(self.getTagValue("score")))
		feature = "transcript"
		if "feature" in self.getTagNames():
			feature = self.getTagValue("feature")
		string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tGENE %s%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, self.name, comment)
		for exon in self.getExons():
			if "score" in exon.getTagNames():
				score = "%d" % (int(self.getTagValue("score")))
			string += "%s\t%s\t_exon\t%d\t%d\t%s\t%s\t.\tGENE %s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.name)
		return string


	def printGff3(self, title):
		"""
		Export this transcript using GFF3 format
		@param title: the title of the transcripts
		@type title: string
		@return: a string
		"""
		direction = "+"
		if self.getDirection() == -1:
			direction = "-"
		self.sortExonsIncreasing()
		if "ID" not in self.getTagValues():
			self.setTagValue("ID", self.getUniqueName())
		feature = "transcript"
		tags = self.tags
		if "feature" in self.getTagNames():
			feature = self.getTagValue("feature")
			del self.tags["feature"]
		score = "."
		if "score" in self.getTagNames():
			score = "%d" % (int(self.getTagValue("score")))
			del self.tags["score"]
		comment = self.getTagValues(";", "=")
		string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, comment)
		if len(self.exons) > 1:
			for i, exon in enumerate(self.getExons()):
				if "score" in exon.getTagNames():
					score = "%d" % (int(exon.getTagValue("score")))
				string += "%s\t%s\texon\t%d\t%d\t%s\t%s\t.\tID=%s-exon%d;Name=%s-exon%d;Parent=%s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.getTagValue("ID"), i+1, self.name, i+1, self.getTagValue("ID"))
		self.tags = tags
		return string


	def printEmbl(self):
		"""
		Export this transcript using EMBL format
		@return: a string
		"""
		if len(self.exons) <= 1:
			position = "%d..%d" % (self.getStart(), self.getEnd())
		else:
			positions = []
			for exon in self.getExons():
				positions.append("%d..%d" % (self.getStart(), self.getEnd()))
			position = ",".join(positions)
			position = "join(%s)" % (position)
		if self.getDirection() == -1:
			position = "complement(%s)" % (position)
		feature = "misc_feature"
		if "feature" in self.getTagNames():
			if not self.getTagValue("feature").startswith("S-MART"):
				feature = self.getTagValue("feature")
		string = "FT %s %s\n" % (feature, position)
		if "Name" in self.getTagNames():
			string += "FT /label=\"%s\"\n" % (self.getTagValue("Name"))
		return string


	def printBed(self):
		"""
		Export this transcript using BED format
		@return: a string
		"""
		name = self.name
		if "nbOccurrences" in self.getTagNames() and self.getTagValue("nbOccurrences") != 1 and self.getTagValue("occurrences"):
			name = "%s-%d" % (name, self.getTagValue("occurrence"))
		comment = self.getTagValues(";", "=")
		sizes   = []
		starts  = []
		direction = "+"
		if self.getDirection() == -1:
			direction = "-"
		self.sortExonsIncreasing()
		for exon in self.getExons():
			sizes.append("%d" % (exon.getSize()))
			starts.append("%d" % (exon.getStart() - self.getStart()))
		return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome(), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts))


	def printSam(self):
		"""
		Export this transcript using SAM format
		@return: a string
		"""
		name			= self.name
		flag			= 0 if self.getDirection() == 1 else 0x10
		chromosome	  = self.getChromosome()
		genomeStart	 = self.getStart()
		quality		 = 255
		mate			= "*"
		mateGenomeStart = 0
		gapSize		 = 0
		sequence		= "*"
		qualityString   = "*"
		tags			= "NM:i:0"

		lastExonEnd = None
		self.sortExonsIncreasing()
		exon  = self.getExons()[0]
		cigar = "%dM" % (self.getExons()[0].getSize())
		lastExonEnd = exon.getEnd()
		for i, exon in enumerate(self.getExons()):
			if i == 0:
				continue
			cigar += "%dN" % (exon.getStart() - lastExonEnd - 1)
			cigar += "%dM" % (exon.getSize())

		return "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n" % (name, flag, chromosome, genomeStart, quality, cigar, mate, mateGenomeStart, gapSize, sequence, qualityString, tags)


	def printUcsc(self):
		"""
		Export this transcript using UCSC BED format
		@return: a string
		"""
		if self.getChromosome().find("Het") != -1:
			return ""
		name	  = self.name
		comment   = self.getTagValues(";", "")
		sizes	 = []
		starts	= []
		direction = "+"
		if self.getDirection() == -1:
			direction = "-"
		self.sortExonsIncreasing()
		for exon in self.getExons():
			sizes.append("%d" % (exon.getSize()))
			starts.append("%d" % (exon.getStart() - self.getStart()))
		return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome().replace("arm_", "chr"), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts))


	def printGBrowseReference(self):
		"""
		Export this transcript using GBrowse format (1st line only)
		@return: a string
		"""
		return "reference = %s\n" % (self.getChromosome())


	def printGBrowseLine(self):
		"""
		Export this transcript using GBrowse format (2nd line only)
		@return: a string
		"""
		self.sortExons()
		coordinates = []
		for exon in self.getExons():
			coordinates.append(exon.printCoordinates())
		coordinatesString = ",".join(coordinates)
		comment = self.getTagValues(";", "=")
		if comment:
			comment = "\t\"%s\"" % (comment)
		return "User_data\t%s\t%s%s\n" % (self.name, coordinatesString, comment)


	def printGBrowse(self):
		"""
		Export this transcript using GBrowse format
		@return: a string
		"""
		return "%s%s" % (self.printGBrowseReference(), self.printGBrowseLine())


	def printCsv(self):
		"""
		Export this transcript using CSV format
		@return: a string
		"""
		self.sortExons()
		string = "%s,%d,%d,\"%s\"," % (self.getChromosome(), self.getStart(), self.getEnd(), "+" if self.getDirection() == 1 else "-")
		if len(self.getExons()) == 1:
			string += "None"
		else:
			for exon in self.getExons():
				string += "%d-%d " % (exon.getStart(), exon.getEnd())
		for tag in sorted(self.tags.keys()):
			string += ",%s=%s" % (tag, str(self.tags[tag]))
		string += "\n"
		return string


	def extractSequence(self, parser):
		"""
		Get the sequence corresponding to this transcript
		@param parser: a parser to a FASTA file
		@type  parser: class L{SequenceListParser<SequenceListParser>}
		@return:	   an instance of L{Sequence<Sequence>}
		"""
		self.sortExons()
		name = self.name
		if "ID" in self.getTagNames() and self.getTagValue("ID") != self.name:
			name += ":%s" % (self.getTagValue("ID"))
		sequence = Sequence(name)
		for exon in self.getExons():
			sequence.concatenate(exon.extractSequence(parser))
		return sequence


	def extractWigData(self, parser):
		"""
		Get some wig data corresponding to this transcript
		@param parser: a parser to a wig file
		@type  parser: class L{WigParser<WigParser>}
		@return: a sequence of float
		"""
		self.sortExons()
		if parser.strands:
			strands = (-1, 1)
			values  = dict([(strand, []) for strand in strands])
			for exon in self.getExons():
				theseValues = exon.extractWigData(parser)
				if self.getDirection() == -1:
					for strand in strands:
						theseValues[strand].reverse()
				for strand in strands:
					values[strand].extend(theseValues[strand])
			if self.getDirection() == -1:
				for strand in strands:
					values[strand].reverse()
			return values
		else:
			values = []
			for exon in self.getExons():
				theseValues = exon.extractWigData(parser)
				#if self.getDirection() == -1:
				#	theseValues.reverse()
				values.extend(theseValues)
			#if self.getDirection() == -1:
			#	values.reverse()
			return values
author	m-zytnicki
date	Mon, 29 Apr 2013 03:26:07 -0400
parents	94ab73e8a190
children