diff commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,161 @@
+import unittest
+import os
+import time
+from commons.core.coord.Align import Align
+from commons.core.utils.FileUtils import FileUtils
+from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps
+
+
+class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ):
+    
+    def setUp( self ):
+        self._i = RmvPairAlignInChunkOverlaps()
+        self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") )
+
+
+    def tearDown( self ):
+        self._i = None
+        self._uniqId = None
+        
+        
+    def test_isPairAlignAChunkOverlap( self ):
+        a = Align()
+        a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )
+        self._i.setChunkLength( 500 )
+        self._i.setChunkOverlap( 100 )
+        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
+        self.assertTrue( obs )
+        a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )  # reverse subject
+        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
+        self.assertTrue( obs )
+        a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) )   # chunk subject not contiguous
+        obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 )
+        self.assertFalse( obs )
+        a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) )   # hit longer than chunk overlap
+        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
+        self.assertFalse( obs )
+        a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) )   # chunk overlap returned by PALS (+1), query > subject
+        obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 )
+        self.assertTrue( obs )
+        a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) )   # chunk overlap returned by PALS (+1), query < subject
+        obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
+        self.assertTrue( obs )
+        
+        
+    def test_isPairAlignWithinAndDueToAChunkOverlap( self ):
+        a = Align()
+        a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) )  # included, due to overlap
+        self._i.setChunkLength( 500 )
+        self._i.setChunkOverlap( 100 )
+        self._i._verbose = 0
+        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
+        self.assertTrue( obs )
+        a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) )  # reverse subject
+        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
+        self.assertFalse( obs )
+        a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) )   # chunk subject not contiguous
+        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 )
+        self.assertFalse( obs )
+        a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) )   # hit longer than chunk overlap
+        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
+        self.assertFalse( obs )
+        a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) )   # repeat within overlap...
+        self._i._margin = 2   # ... but not due to it
+        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
+        self.assertFalse( obs )
+        self._i._margin = 10   # ... and due to it
+        obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
+        self.assertTrue( obs )
+        
+        
+    def test_removeChunkOverlaps( self ):
+        inFileName = "dummyInFile_%s" % ( self._uniqId )
+        inF = open( inFileName, "w" )
+        a = Align()
+        a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) )
+        a.write( inF )
+        a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) )
+        a.write( inF )
+        inF.close()
+        self._i.setInputFileName( inFileName )
+        self._i.setChunkLength( 500 )
+        self._i.setChunkOverlap( 100 )
+        obsFileName = "dummyObsFile_%s"  %( self._uniqId )
+        self._i.setOutputFileName( obsFileName )
+        d = self._i.removeChunkOverlaps()
+        expFileName = "dummyExpFile_%s" % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write( "" )
+        expF.close()
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ inFileName, obsFileName, expFileName ]: os.remove( f )
+        
+        
+    def test_zRunAsScript( self ):
+        cDir = os.getcwd()
+        
+        alignFileName = "dummyInFile_%s" % ( self._uniqId )
+        aF = open( alignFileName, "w" )
+        aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" )  # remove
+        aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" )  # remove
+        aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" )  # keep because non-contiguous chunks
+        aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" )  # keep because non-contiguous chunks
+        aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" )  # keep because within overlap but reverse
+        aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" )  # remove because within and due to overlap
+        aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" )  # keep because within but not due to overlap
+        aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" )  # keep because longer HSP on query
+        aF.close()
+        
+        expFileName = "dummyExpFile_%s" % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" )
+        expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" )
+        expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" )
+        expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" )
+        expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" )
+        expF.close()
+        
+        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
+        cmd = "RmvPairAlignInChunkOverlaps.py"
+        cmd += " -i %s" % ( alignFileName )
+        cmd += " -l 500"
+        cmd += " -o 100"
+        cmd += " -O %s" % ( obsFileName )
+        cmd += " -v 0"
+        os.system( cmd )
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
+        os.chdir( cDir )
+        
+    def test_zRunAsScript_bug_to_fix(self):
+        cDir = os.getcwd()
+        
+        alignFileName = "dummyInFile_%s" % ( self._uniqId )
+        aF = open( alignFileName, "w" )
+        aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
+        aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
+        aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n")
+        aF.close()
+        
+        expFileName = "dummyExpFile_%s" % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
+        expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
+        expF.close()
+        
+        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
+        cmd = "RmvPairAlignInChunkOverlaps.py"
+        cmd += " -i %s" % ( alignFileName )
+        cmd += " -l 200000"
+        cmd += " -o 10000"
+        cmd += " -O %s" % ( obsFileName )
+        cmd += " -v 0"
+        os.system( cmd )
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
+        os.chdir( cDir )
+        
+if __name__ == "__main__":
+        unittest.main()
\ No newline at end of file