view smart_toolShed/SMART/Java/Python/ncList/test/Test_F_FileSorter.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

import os
import unittest
import struct
from SMART.Java.Python.misc import Utils
from SMART.Java.Python.ncList.FileSorter import FileSorter
from SMART.Java.Python.structure.Transcript import Transcript
from commons.core.writer.Gff3Writer import Gff3Writer
from commons.core.parsing.GffParser import GffParser
from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle


class Test_F_FileSorter(unittest.TestCase):

    def setUp(self):
        self._inputGff3FileName = 'inputFile.gff3'
        self._outputFileName    = 'outputFile.pkl'
        
    def tearDown(self):
        return
        for fileName in (self._inputGff3FileName, self._sortedFileName, self._expHFileName, self._expLFileName, self._obsHFileName, self._obsLFileName, self._addressFileName):
            if os.path.exists(fileName):
                os.remove(fileName)
        
    def test_unique(self):
        transcript = self._createTranscript("chr1", 100, 200, "test1.1")
        parser     = self._writeAndSortAndParse([transcript])
        self.assertEquals(parser.getNbTranscripts(), 1)
        for transcript in parser.getIterator():
            self._checkTranscript(transcript, "chr1", 100, 200, "test1.1")
            
    def test_simple(self):
        transcript1 = self._createTranscript("chr1", 300, 400, "test1.1")
        transcript2 = self._createTranscript("chr1", 100, 200, "test1.2")
        parser = self._writeAndSortAndParse([transcript1, transcript2])
        self.assertEquals(parser.getNbTranscripts(), 2)
        for cpt, transcript in enumerate(parser.getIterator()):
            if cpt == 0:
                self._checkTranscript(transcript, "chr1", 100, 200, "test1.2")
            else:
                self._checkTranscript(transcript, "chr1", 300, 400, "test1.1")

    def test_same_start(self):
        transcript1 = self._createTranscript("chr1", 100, 200, "test1.1")
        transcript2 = self._createTranscript("chr1", 100, 300, "test1.2")
        parser = self._writeAndSortAndParse([transcript1, transcript2])
        self.assertEquals(parser.getNbTranscripts(), 2)
        for cpt, transcript in enumerate(parser.getIterator()):
            if cpt == 0:
                self._checkTranscript(transcript, "chr1", 100, 300, "test1.2")
            else:
                self._checkTranscript(transcript, "chr1", 100, 200, "test1.1")

    def _writeAndSortAndParse(self, transcripts):
        writer = Gff3Writer(self._inputGff3FileName, 0)
        for transcript in transcripts:
            writer.addTranscript(transcript)
        writer.close()
        parser = GffParser(self._inputGff3FileName, 0)
        fs = FileSorter(parser, 0)
        fs.setOutputFileName(self._outputFileName)
        fs.sort()
        parser = NCListFileUnpickle(self._outputFileName, 0)
        return parser

    def _createTranscript(self, chromosome, start, end, name):
        transcript = Transcript()
        transcript.setChromosome(chromosome)
        transcript.setStart(start)
        transcript.setEnd(end)
        transcript.setDirection("+")
        transcript.setName(name)
        return transcript

    def _checkTranscript(self, transcript, chromosome, start, end, name):
        self.assertEquals(transcript.getChromosome(), chromosome)
        self.assertEquals(transcript.getStart(),      start)
        self.assertEquals(transcript.getEnd(),        end)
        self.assertEquals(transcript.getDirection(),  1)
        self.assertEquals(transcript.getName(),       name)
        
            
if __name__ == "__main__":
    unittest.main()