view commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

import unittest
import os
import time
from commons.core.coord.Align import Align
from commons.core.utils.FileUtils import FileUtils
from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps


class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ):
    
    def setUp( self ):
        self._i = RmvPairAlignInChunkOverlaps()
        self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") )


    def tearDown( self ):
        self._i = None
        self._uniqId = None
        
        
    def test_isPairAlignAChunkOverlap( self ):
        a = Align()
        a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )
        self._i.setChunkLength( 500 )
        self._i.setChunkOverlap( 100 )
        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
        self.assertTrue( obs )
        a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )  # reverse subject
        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
        self.assertTrue( obs )
        a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) )   # chunk subject not contiguous
        obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 )
        self.assertFalse( obs )
        a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) )   # hit longer than chunk overlap
        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
        self.assertFalse( obs )
        a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) )   # chunk overlap returned by PALS (+1), query > subject
        obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 )
        self.assertTrue( obs )
        a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) )   # chunk overlap returned by PALS (+1), query < subject
        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
        self.assertTrue( obs )
        
        
    def test_isPairAlignWithinAndDueToAChunkOverlap( self ):
        a = Align()
        a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) )  # included, due to overlap
        self._i.setChunkLength( 500 )
        self._i.setChunkOverlap( 100 )
        self._i._verbose = 0
        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
        self.assertTrue( obs )
        a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) )  # reverse subject
        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
        self.assertFalse( obs )
        a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) )   # chunk subject not contiguous
        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 )
        self.assertFalse( obs )
        a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) )   # hit longer than chunk overlap
        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
        self.assertFalse( obs )
        a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) )   # repeat within overlap...
        self._i._margin = 2   # ... but not due to it
        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
        self.assertFalse( obs )
        self._i._margin = 10   # ... and due to it
        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
        self.assertTrue( obs )
        
        
    def test_removeChunkOverlaps( self ):
        inFileName = "dummyInFile_%s" % ( self._uniqId )
        inF = open( inFileName, "w" )
        a = Align()
        a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) )
        a.write( inF )
        a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) )
        a.write( inF )
        inF.close()
        self._i.setInputFileName( inFileName )
        self._i.setChunkLength( 500 )
        self._i.setChunkOverlap( 100 )
        obsFileName = "dummyObsFile_%s"  %( self._uniqId )
        self._i.setOutputFileName( obsFileName )
        d = self._i.removeChunkOverlaps()
        expFileName = "dummyExpFile_%s" % ( self._uniqId )
        expF = open( expFileName, "w" )
        expF.write( "" )
        expF.close()
        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
        for f in [ inFileName, obsFileName, expFileName ]: os.remove( f )
        
        
    def test_zRunAsScript( self ):
        cDir = os.getcwd()
        
        alignFileName = "dummyInFile_%s" % ( self._uniqId )
        aF = open( alignFileName, "w" )
        aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" )  # remove
        aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" )  # remove
        aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" )  # keep because non-contiguous chunks
        aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" )  # keep because non-contiguous chunks
        aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" )  # keep because within overlap but reverse
        aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" )  # remove because within and due to overlap
        aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" )  # keep because within but not due to overlap
        aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" )  # keep because longer HSP on query
        aF.close()
        
        expFileName = "dummyExpFile_%s" % ( self._uniqId )
        expF = open( expFileName, "w" )
        expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" )
        expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" )
        expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" )
        expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" )
        expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" )
        expF.close()
        
        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
        cmd = "RmvPairAlignInChunkOverlaps.py"
        cmd += " -i %s" % ( alignFileName )
        cmd += " -l 500"
        cmd += " -o 100"
        cmd += " -O %s" % ( obsFileName )
        cmd += " -v 0"
        os.system( cmd )
        
        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
        for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
        os.chdir( cDir )
        
    def test_zRunAsScript_bug_to_fix(self):
        cDir = os.getcwd()
        
        alignFileName = "dummyInFile_%s" % ( self._uniqId )
        aF = open( alignFileName, "w" )
        aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
        aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
        aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n")
        aF.close()
        
        expFileName = "dummyExpFile_%s" % ( self._uniqId )
        expF = open( expFileName, "w" )
        expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
        expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
        expF.close()
        
        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
        cmd = "RmvPairAlignInChunkOverlaps.py"
        cmd += " -i %s" % ( alignFileName )
        cmd += " -l 200000"
        cmd += " -o 10000"
        cmd += " -O %s" % ( obsFileName )
        cmd += " -v 0"
        os.system( cmd )
        
        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
        for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
        os.chdir( cDir )
        
if __name__ == "__main__":
        unittest.main()