Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/structure/Transcript.py @ 36:44d5973c188c
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 15:02:29 -0400 |
parents | |
children | 169d364ddd91 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/Java/Python/structure/Transcript.py Tue Apr 30 15:02:29 2013 -0400 @@ -0,0 +1,876 @@ +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import sys +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.structure.Sequence import Sequence + + +class Transcript(Interval): + """ + A class that models an transcript, considered as a specialized interval (the bounds of the transcript) that contains exons (also represented as intervals) + @ivar exons: a list of exons (intervals) + @type exons: list of L{Interval{Interval}} + """ + + def __init__(self, transcript = None, verbosity = 0): + """ + Constructor + @param transcript: transcript to be copied + @type transcript: class L{Transcript<Transcript>} + @param verbosity: verbosity + @type verbosity: int + """ + super(Transcript, self).__init__(None, verbosity) + self.exons = [] + self.introns = None + if transcript != None: + self.copy(transcript) + + + def copy(self, transcript): + """ + Copy method + @param transcript: transcript to be copied + @type transcript: class L{Transcript<Transcript>} or L{Interval<Interval>} + """ + super(Transcript, self).copy(transcript) + if transcript.__class__.__name__ == "Transcript": + exons = transcript.getExons() + if len(exons) > 1: + for exon in exons: + exonCopy = Interval(exon) + self.addExon(exonCopy) + + + def setDirection(self, direction): + """ + Set the direction of the interval + Possibly parse different formats + Impact all exons + @param direction: direction of the transcript (+ / -) + @type direction: int or string + """ + super(Transcript, self).setDirection(direction) + for exon in self.exons: + exon.setDirection(direction) + + + def setChromosome(self, chromosome): + """ + Set the chromosome + @param chromosome: chromosome on which the transcript is + @type chromosome: string + """ + super(Transcript, self).setChromosome(chromosome) + for exon in self.exons: + exon.setChromosome(chromosome) + + + def addExon(self, exon): + """ + Add an exon to the list of exons + @param exon: a new exon + @type exon: class L{Interval<Interval>} + """ + if not self.exons and not exon.overlapWith(self): + firstExon = Interval() + firstExon.setStart(self.getStart()) + firstExon.setEnd(self.getEnd()) + firstExon.setDirection(self.getDirection()) + firstExon.setChromosome(self.getChromosome()) + self.exons.append(firstExon) + newExon = Interval(exon) + newExon.setDirection(self.getDirection()) + self.exons.append(newExon) + if newExon.getStart() < self.getStart(): + self.setStart(newExon.getStart()) + if newExon.getEnd() > self.getEnd(): + self.setEnd(newExon.getEnd()) + + + def setStart(self, start): + """ + Set the new start, move the first exon accordingly (if exists) + @param start: the new start + @type start: int + """ + super(Transcript, self).setStart(start) + if self.exons: + self.sortExonsIncreasing() + self.exons[0].setStart(start) + + + def setEnd(self, end): + """ + Set the new end, move the last exon accordingly (if exists) + @param end: the new end + @type end: int + """ + super(Transcript, self).setEnd(end) + if self.exons: + self.sortExonsIncreasing() + self.exons[-1].setEnd(end) + + + def reverse(self): + """ + Reverse the strand of the transcript + """ + super(Transcript, self).reverse() + for exon in self.exons: + exon.reverse() + + + def getUniqueName(self): + """ + Try to give a unique name by possibly adding occurrence + """ + if "nbOccurrences" in self.tags and "occurrence" in self.tags and self.tags["nbOccurrences"] != 1: + return "%s-%d" % (self.name, self.tags["occurrence"]) + return self.name + + + def getNbExons(self): + """ + Get the number of exons + """ + return max(1, len(self.exons)) + + + def getExon(self, i): + """ + Get a specific exon + @param i: the rank of the exon + @type i: int + """ + if len(self.exons) == 0: + if i != 0: + raise Exception("Cannot get exon #%i while there is no exon in the transcript" % (i)) + return self + return self.exons[i] + + + def getExons(self): + """ + Get all the exons + """ + if len(self.exons) == 0: + return [Interval(self)] + return self.exons + + + def getIntrons(self): + """ + Get all the introns + Compute introns on the fly + """ + if self.introns != None: + return self.introns + self.sortExons() + self.introns = [] + exonStart = self.getExon(0) + for cpt, exonEnd in enumerate(self.exons[1:]): + intron = Interval() + intron.setName("%s_intron%d" % (self.getName(), cpt+1)) + intron.setChromosome(self.getChromosome()) + intron.setDirection(self.getDirection()) + if self.getDirection() == 1: + intron.setEnd(exonEnd.getStart() - 1) + intron.setStart(exonStart.getEnd() + 1) + else: + intron.setStart(exonEnd.getEnd() + 1) + intron.setEnd(exonStart.getStart() - 1) + intron.setDirection(self.getDirection()) + if intron.getSize() > 0: + self.introns.append(intron) + exonStart = exonEnd + intron.setSize(intron.getEnd() - intron.getStart() + 1) + return self.introns + + + def getSize(self): + """ + Get the size of the transcript (i.e. the number of nucleotides) + Compute size on the fly + """ + if len(self.exons) == 0: + return self.getSizeWithIntrons() + size = 0 + for exon in self.exons: + size += exon.getSize() + return size + + + def getSizeWithIntrons(self): + """ + Get the size of the interval (i.e. distance from start to end) + """ + return super(Transcript, self).getSize() + + + def overlapWithExon(self, transcript, nbNucleotides = 1): + """ + Check if the exons of this transcript overlap with the exons of another transcript + @param transcript: transcript to be compared to + @type transcript: class L{Transcript<Transcript>} + @param nbNucleotides: minimum number of nucleotides to declare and overlap + @type nbNucleotides: int + """ + if not self.overlapWith(transcript, nbNucleotides): + return False + for thisExon in self.getExons(): + for thatExon in transcript.getExons(): + if thisExon.overlapWith(thatExon, nbNucleotides): + return True + return False + + + def include(self, transcript): + """ + Whether this transcript includes the other one + @param transcript: object to be compared to + @type transcript: class L{Transcript<Transcript>} + """ + if not super(Transcript, self).include(transcript): + return False + for thatExon in transcript.getExons(): + for thisExon in self.getExons(): + if thisExon.include(thatExon): + break + else: + return False + return True + + + def merge(self, transcript, normalization = False): + """ + Merge with another transcript + Merge exons if they overlap, otherwise add exons + @param transcript: transcript to be merged to + @type transcript: class L{Transcript<Transcript>} + @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements + @type normalization: boolean + """ + if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection(): + raise Exception("Cannot merge '%s' with '%s'!" % (self, transcript)) + + theseExons = self.getExons() + thoseExons = transcript.getExons() + + for thatExon in thoseExons: + toBeRemoved = [] + for thisIndex, thisExon in enumerate(theseExons): + if thisExon.overlapWith(thatExon): + thatExon.merge(thisExon) + toBeRemoved.append(thisIndex) + theseExons.append(thatExon) + for thisIndex in reversed(toBeRemoved): + del theseExons[thisIndex] + self.removeExons() + self.setStart(min(self.getStart(), transcript.getStart())) + self.setEnd(max(self.getEnd(), transcript.getEnd())) + if len(theseExons) > 1: + for thisExon in theseExons: + self.addExon(thisExon) + + self.setName("%s--%s" % (self.getUniqueName(), transcript.getUniqueName())) + super(Transcript, self).merge(transcript, normalization) + + + def getDifference(self, transcript, sameStrand = False): + """ + Get the difference between this cluster and another one + @param transcript: object to be compared to + @type transcript: class L{Transcript<Transcript>} + @param sameStrand: do the comparison iff the transcripts are on the same strand + @type sameStrand: boolean + @return: a transcript + """ + newTranscript = Transcript() + newTranscript.copy(self) + if self.getChromosome() != transcript.getChromosome(): + return newTranscript + if not self.overlapWith(transcript): + return newTranscript + if sameStrand and self.getDirection() != transcript.getDirection(): + return newTranscript + newTranscript.removeExons() + if transcript.getEnd() > newTranscript.getStart(): + newTranscript.setStart(transcript.getEnd() + 1) + if transcript.getStart() < newTranscript.getEnd(): + newTranscript.setEnd(transcript.getStart() + 1) + theseExons = [] + for exon in self.getExons(): + exonCopy = Interval() + exonCopy.copy(exon) + theseExons.append(exonCopy) + for thatExon in transcript.getExons(): + newExons = [] + for thisExon in theseExons: + newExons.extend(thisExon.getDifference(thatExon)) + theseExons = newExons + if not theseExons: + return None + newStart, newEnd = theseExons[0].getStart(), theseExons[0].getEnd() + for thisExon in theseExons[1:]: + newStart = min(newStart, thisExon.getStart()) + newEnd = max(newEnd, thisExon.getEnd()) + newTranscript.setEnd(newEnd) + newTranscript.setStart(newStart) + newTranscript.exons = theseExons + return newTranscript + + + def getIntersection(self, transcript): + """ + Get the intersection between this transcript and another one + @param transcript: object to be compared to + @type transcript: class L{Transcript<Transcript>} + @return: an other transcript + """ + if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection(): + return None + newTranscript = Transcript() + newTranscript.setDirection(self.getDirection()) + newTranscript.setChromosome(self.getChromosome()) + newTranscript.setName("%s_intersect_%s" % (self.getName(), transcript.getName())) + newExons = [] + for thisExon in self.getExons(): + for thatExon in transcript.getExons(): + newExon = thisExon.getIntersection(thatExon) + if newExon != None: + newExons.append(newExon) + if not newExons: + return None + newTranscript.exons = newExons + return newTranscript + + + def getSqlVariables(cls): + """ + Get the properties of the object that should be saved in a database + """ + variables = Interval.getSqlVariables() + variables.append("exons") + return variables + getSqlVariables = classmethod(getSqlVariables) + + + def setSqlValues(self, array): + """ + Set the values of the properties of this object as given by a results line of a SQL query + @param array: the values to be copied + @type array: a list + """ + super(Transcript, self).setSqlValues(array) + mergedExons = array[8] + if not mergedExons: + return + for exonCount, splittedExon in enumerate(mergedExons.split(",")): + start, end = splittedExon.split("-") + exon = Interval() + exon.setChromosome(self.getChromosome()) + exon.setDirection(self.getDirection()) + exon.setName("%s_exon%d" % (self.getName(), exonCount+1)) + exon.setStart(int(start)) + exon.setEnd(int(end)) + self.addExon(exon) + + + def getSqlValues(self): + """ + Get the values of the properties that should be saved in a database + """ + values = super(Transcript, self).getSqlValues() + values["size"] = self.getSize() + if self.getNbExons() == 1: + values["exons"] = "" + else: + values["exons"] = ",".join(["%d-%d" % (exon.getStart(), exon.getEnd()) for exon in self.getExons()]) + return values + + + def getSqlTypes(cls): + """ + Get the types of the properties that should be saved in a database + """ + types = Interval.getSqlTypes() + types["exons"] = "varchar" + return types + getSqlTypes = classmethod(getSqlTypes) + + + def getSqlSizes(cls): + """ + Get the sizes of the properties that should be saved in a database + """ + sizes = Interval.getSqlSizes() + sizes["exons"] = 10000 + return sizes + getSqlSizes = classmethod(getSqlSizes) + + + def sortExons(self): + """ + Sort the exons + Increasing order if transcript is on strand "+", decreasing otherwise + """ + self.sortExonsIncreasing() + if self.getDirection() == -1: + exons = self.getExons() + exons.reverse() + self.exons = exons + + + def sortExonsIncreasing(self): + """ + Sort the exons + Increasing order + """ + exons = self.getExons() + sortedExons = [] + while len(exons) > 0: + minExon = exons[0] + for index in range(1, len(exons)): + if minExon.getStart() > exons[index].getStart(): + minExon = exons[index] + sortedExons.append(minExon) + exons.remove(minExon) + self.exons = sortedExons + + + def extendStart(self, size): + """ + Extend the transcript by the 5' end + @param size: the size to be extended + @type size: int + """ + if len(self.exons) != 0: + self.sortExons() + if self.getDirection() == 1: + self.exons[0].setStart(max(0, self.exons[0].getStart() - size)) + else: + self.exons[0].setEnd(self.exons[0].getEnd() + size) + super(Transcript, self).extendStart(size) + self.bin = None + + + def extendEnd(self, size): + """ + Extend the transcript by the 3' end + @param size: the size to be extended + @type size: int + """ + if len(self.exons) != 0: + self.sortExons() + if self.getDirection() == 1: + self.exons[-1].setEnd(self.exons[-1].getEnd() + size) + else: + self.exons[-1].setStart(max(0, self.exons[-1].getStart() - size)) + super(Transcript, self).extendEnd(size) + self.bin = None + + + def extendExons(self, size): + """ + Extend all the exons + @param size: the size to be extended + @type size: int + """ + if len(self.exons) != 0: + self.sortExons() + exons = [] + previousExon = None + for exon in self.exons: + exon.extendStart(size) + exon.extendEnd(size) + exon.setDirection(self.getDirection()) + if previousExon != None and previousExon.overlapWith(exon): + previousExon.merge(exon) + else: + if previousExon != None: + exons.append(previousExon) + previousExon = exon + exons.append(previousExon) + self.exons = exons + super(Transcript, self).extendStart(size) + super(Transcript, self).extendEnd(size) + self.bin = None + + + def restrictStart(self, size = 1): + """ + Restrict the transcript by some nucleotides, start from its start position + Remove the exons + @param size: the size to be restricted to + @type size: int + """ + newExons = [] + if self.getDirection() == 1: + for exon in self.exons: + if exon.getStart() <= self.getStart() + size - 1: + if exon.getEnd() > self.getStart() + size - 1: + exon.setEnd(self.getStart() + size - 1) + newExons.append(exon) + else: + for exon in self.exons: + if exon.getEnd() >= self.getEnd() - size + 1: + if exon.getStart() < self.getEnd() - size + 1: + exon.setStart(self.getEnd() - size + 1) + newExons.append(exon) + super(Transcript, self).restrictStart(size) + self.exons = newExons + + + def restrictEnd(self, size = 1): + """ + Restrict the transcript by some nucleotides, end from its end position + Remove the exons + @param size: the size to be restricted to + @type size: int + """ + newExons = [] + if self.getDirection() == 1: + for exon in self.exons: + if exon.getEnd() >= self.getEnd() - size + 1: + if exon.getStart() < self.getEnd() - size + 1: + exon.setStart(self.getEnd() - size + 1) + newExons.append(exon) + else: + for exon in self.exons: + if exon.getEnd() >= self.getEnd() - size + 1: + if exon.getStart() < self.getEnd() - size + 1: + exon.setEnd(self.getEnd() - size + 1) + newExons.append(exon) + super(Transcript, self).restrictEnd(size) + self.exons = newExons + + + def removeExons(self): + """ + Remove the exons and transforms the current transcript into a mere interval + """ + self.exons = [] + self.bin = None + + + def printGtf(self, title): + """ + Export this transcript using GTF2.2 format + @param title: the title of the transcripts + @type title: string + @return: a string + """ + transcriptId = self.getUniqueName() + geneId = "%s_gene" % (transcriptId) + direction = "+" + if self.getDirection() == -1: + direction = "-" + self.sortExonsIncreasing() + string = "" + for i, exon in enumerate(self.getExons()): + exonCopy = Interval() + exonCopy.copy(exon) + if "ID" in exonCopy.getTagValues(): + del exonCopy.tags["ID"] + feature = "exon" + if "feature" in exonCopy.getTagNames(): + feature = exonCopy.getTagValue("feature") + del exonCopy.tags["feature"] + score = "." + if "score" in exonCopy.getTagNames(): + score = "%d" % (int(exonCopy.getTagValue("score"))) + del exonCopy.tags["score"] + if "Parent" in exonCopy.getTagNames(): + del exonCopy.tags["Parent"] + exonCopy.setName("%s_part%d" % (self.getName(), i+1)) + comment = exonCopy.getTagValues("; ", " ", "\"") + string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\ttranscript_id \"%s\"; gene_id \"%s\"; %s\n" % (exonCopy.getChromosome(), title, feature, exonCopy.getStart(), exonCopy.getEnd(), score, direction, transcriptId, geneId, comment) + return string + + + def printGff2(self, title): + """ + Export this transcript using GFF2 format + @param title: the title of the transcripts + @type title: string + @return: a string + """ + direction = "+" + if self.getDirection() == -1: + direction = "-" + self.sortExonsIncreasing() + comment = self.getTagValues() + if comment != None: + comment = ";%s" % (comment) + score = "." + if "score" in self.getTagNames(): + score = "%d" % (int(self.getTagValue("score"))) + feature = "transcript" + if "feature" in self.getTagNames(): + feature = self.getTagValue("feature") + string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tGENE %s%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, self.name, comment) + for exon in self.getExons(): + if "score" in exon.getTagNames(): + score = "%d" % (int(self.getTagValue("score"))) + string += "%s\t%s\t_exon\t%d\t%d\t%s\t%s\t.\tGENE %s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.name) + return string + + + def printGff3(self, title): + """ + Export this transcript using GFF3 format + @param title: the title of the transcripts + @type title: string + @return: a string + """ + direction = "+" + if self.getDirection() == -1: + direction = "-" + self.sortExonsIncreasing() + if "ID" not in self.getTagValues(): + self.setTagValue("ID", self.getUniqueName()) + feature = "transcript" + tags = self.tags + if "feature" in self.getTagNames(): + feature = self.getTagValue("feature") + del self.tags["feature"] + score = "." + if "score" in self.getTagNames(): + score = "%d" % (int(self.getTagValue("score"))) + del self.tags["score"] + comment = self.getTagValues(";", "=") + string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (self.getChromosome(), title, feature, self.getStart(), self.getEnd(), score, direction, comment) + if len(self.exons) > 1: + for i, exon in enumerate(self.getExons()): + if "score" in exon.getTagNames(): + score = "%d" % (int(exon.getTagValue("score"))) + string += "%s\t%s\texon\t%d\t%d\t%s\t%s\t.\tID=%s-exon%d;Name=%s-exon%d;Parent=%s\n" % (self.getChromosome(), title, exon.getStart(), exon.getEnd(), score, direction, self.getTagValue("ID"), i+1, self.name, i+1, self.getTagValue("ID")) + self.tags = tags + return string + + + def printEmbl(self): + """ + Export this transcript using EMBL format + @return: a string + """ + if len(self.exons) <= 1: + position = "%d..%d" % (self.getStart(), self.getEnd()) + else: + positions = [] + for exon in self.getExons(): + positions.append("%d..%d" % (self.getStart(), self.getEnd())) + position = ",".join(positions) + position = "join(%s)" % (position) + if self.getDirection() == -1: + position = "complement(%s)" % (position) + feature = "misc_feature" + if "feature" in self.getTagNames(): + if not self.getTagValue("feature").startswith("S-MART"): + feature = self.getTagValue("feature") + string = "FT %s %s\n" % (feature, position) + if "Name" in self.getTagNames(): + string += "FT /label=\"%s\"\n" % (self.getTagValue("Name")) + return string + + + def printBed(self): + """ + Export this transcript using BED format + @return: a string + """ + name = self.name + if "nbOccurrences" in self.getTagNames() and self.getTagValue("nbOccurrences") != 1 and self.getTagValue("occurrences"): + name = "%s-%d" % (name, self.getTagValue("occurrence")) + comment = self.getTagValues(";", "=") + sizes = [] + starts = [] + direction = "+" + if self.getDirection() == -1: + direction = "-" + self.sortExonsIncreasing() + for exon in self.getExons(): + sizes.append("%d" % (exon.getSize())) + starts.append("%d" % (exon.getStart() - self.getStart())) + return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome(), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts)) + + + def printSam(self): + """ + Export this transcript using SAM format + @return: a string + """ + name = self.name + flag = 0 if self.getDirection() == 1 else 0x10 + chromosome = self.getChromosome() + genomeStart = self.getStart() + quality = 255 + mate = "*" + mateGenomeStart = 0 + gapSize = 0 + sequence = "*" + qualityString = "*" + tags = "NM:i:0" + + lastExonEnd = None + self.sortExonsIncreasing() + exon = self.getExons()[0] + cigar = "%dM" % (self.getExons()[0].getSize()) + lastExonEnd = exon.getEnd() + for i, exon in enumerate(self.getExons()): + if i == 0: + continue + cigar += "%dN" % (exon.getStart() - lastExonEnd - 1) + cigar += "%dM" % (exon.getSize()) + + return "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n" % (name, flag, chromosome, genomeStart, quality, cigar, mate, mateGenomeStart, gapSize, sequence, qualityString, tags) + + + def printUcsc(self): + """ + Export this transcript using UCSC BED format + @return: a string + """ + if self.getChromosome().find("Het") != -1: + return "" + name = self.name + comment = self.getTagValues(";", "") + sizes = [] + starts = [] + direction = "+" + if self.getDirection() == -1: + direction = "-" + self.sortExonsIncreasing() + for exon in self.getExons(): + sizes.append("%d" % (exon.getSize())) + starts.append("%d" % (exon.getStart() - self.getStart())) + return "%s\t%d\t%d\t%s\t1000\t%s\t%d\t%d\t0\t%d\t%s,\t%s,\n" % (self.getChromosome().replace("arm_", "chr"), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts)) + + + def printGBrowseReference(self): + """ + Export this transcript using GBrowse format (1st line only) + @return: a string + """ + return "reference = %s\n" % (self.getChromosome()) + + + def printGBrowseLine(self): + """ + Export this transcript using GBrowse format (2nd line only) + @return: a string + """ + self.sortExons() + coordinates = [] + for exon in self.getExons(): + coordinates.append(exon.printCoordinates()) + coordinatesString = ",".join(coordinates) + comment = self.getTagValues(";", "=") + if comment: + comment = "\t\"%s\"" % (comment) + return "User_data\t%s\t%s%s\n" % (self.name, coordinatesString, comment) + + + def printGBrowse(self): + """ + Export this transcript using GBrowse format + @return: a string + """ + return "%s%s" % (self.printGBrowseReference(), self.printGBrowseLine()) + + + def printCsv(self): + """ + Export this transcript using CSV format + @return: a string + """ + self.sortExons() + string = "%s,%d,%d,\"%s\"," % (self.getChromosome(), self.getStart(), self.getEnd(), "+" if self.getDirection() == 1 else "-") + if len(self.getExons()) == 1: + string += "None" + else: + for exon in self.getExons(): + string += "%d-%d " % (exon.getStart(), exon.getEnd()) + for tag in sorted(self.tags.keys()): + string += ",%s=%s" % (tag, str(self.tags[tag])) + string += "\n" + return string + + + def extractSequence(self, parser): + """ + Get the sequence corresponding to this transcript + @param parser: a parser to a FASTA file + @type parser: class L{SequenceListParser<SequenceListParser>} + @return: an instance of L{Sequence<Sequence>} + """ + self.sortExons() + name = self.name + if "ID" in self.getTagNames() and self.getTagValue("ID") != self.name: + name += ":%s" % (self.getTagValue("ID")) + sequence = Sequence(name) + for exon in self.getExons(): + sequence.concatenate(exon.extractSequence(parser)) + return sequence + + + def extractWigData(self, parser): + """ + Get some wig data corresponding to this transcript + @param parser: a parser to a wig file + @type parser: class L{WigParser<WigParser>} + @return: a sequence of float + """ + self.sortExons() + if parser.strands: + strands = (-1, 1) + values = dict([(strand, []) for strand in strands]) + for exon in self.getExons(): + theseValues = exon.extractWigData(parser) + if self.getDirection() == -1: + for strand in strands: + theseValues[strand].reverse() + for strand in strands: + values[strand].extend(theseValues[strand]) + if self.getDirection() == -1: + for strand in strands: + values[strand].reverse() + return values + else: + values = [] + for exon in self.getExons(): + theseValues = exon.extractWigData(parser) + #if self.getDirection() == -1: + # theseValues.reverse() + values.extend(theseValues) + #if self.getDirection() == -1: + # values.reverse() + return values