view commons/core/writer/Gff3Writer.py @ 60:90f4b29d884f

Uploaded
author m-zytnicki
date Fri, 21 Feb 2014 08:32:36 -0500
parents 769e306b7933
children
line wrap: on
line source

#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
from commons.core.writer.TranscriptListWriter import TranscriptListWriter


class Gff3Writer(TranscriptListWriter):
    """
    A class that writes a transcript list into a file with GFF3 format
    @ivar fileName: name of the file 
    @type fileName: string
    @ivar handle: handle to the file
    @type handle: file handle
    """


    def __init__(self, fileName, verbosity = 0, title="S-MART", feature="transcript", featurePart="exon"):
        """
        Constructor
        @param fileName: name of the file 
        @type fileName: string
        @param verbosity: verbosity
        @type verbosity: int
        """
        self.header = ""
        self.title    = title
        self.feature = feature
        self.featurePart = featurePart
        super(Gff3Writer, self).__init__(fileName, verbosity)
            

    @staticmethod
    def getFileFormats():
        """
        Get the format of the file
        """
        return ["gff3", "gff"]
        
        
    @staticmethod
    def getExtension():
        """
        Get the usual extension for the file
        """
        return "gff3"
        
        
    def setTitle(self, title):
        """
        Set the title of the transcripts
        @param title: the title of the transcripts
        @type    title: string
        """
        self.title = title
        
    def setFeature(self, feature):
        """
        Set the name of the feature
        @param title: the title of the feature
        @type    feature: string
        """
        self.feature = feature
        
    def setFeaturePart(self, featurePart):
        """
        Set the name of the feature part
        @param title: the title of the feature part
        @type    featurePart: string
        """
        self.featurePart = featurePart


    def printTranscript(self, transcript):
        """
        Export the given transcript with GFF2 format
        @param transcript: transcript to be printed
        @type transcript: class L{Transcript<Transcript>}
        @return: a string
        """
        direction = "+"
        if transcript.getDirection() == -1:
            direction = "-"
        transcript.sortExonsIncreasing()
        if "ID" not in transcript.getTagValues():
            transcript.setTagValue("ID", transcript.getUniqueName())
        feature = self.feature
        tags = transcript.tags
        if "feature" in transcript.getTagNames():
            feature = transcript.getTagValue("feature")
            del transcript.tags["feature"]
        score = "."
        if "score" in transcript.getTagNames():
            score = "%d" % (int(transcript.getTagValue("score")))
            del transcript.tags["score"]
        comment = transcript.getTagValues(";", "=")
        string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (transcript.getChromosome(), self.title, feature, transcript.getStart(), transcript.getEnd(), score, direction, comment)
        if len(transcript.exons) > 1:
            for i, exon in enumerate(transcript.getExons()):
                if "score" in exon.getTagNames():
                    score = "%d" % (int(exon.getTagValue("score")))
                string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tID=%s-%s%d;Name=%s-%s%d;Parent=%s\n" % (transcript.getChromosome(), self.title,self.featurePart, exon.getStart(), exon.getEnd(), score, direction, transcript.getTagValue("ID"),self.featurePart, i+1, transcript.name,self.featurePart, i+1, transcript.getTagValue("ID"))
        self.tags = tags
        return string