Mercurial > repos > yufei-luo > s_mart
diff commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,161 @@ +import unittest +import os +import time +from commons.core.coord.Align import Align +from commons.core.utils.FileUtils import FileUtils +from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps + + +class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ): + + def setUp( self ): + self._i = RmvPairAlignInChunkOverlaps() + self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") ) + + + def tearDown( self ): + self._i = None + self._uniqId = None + + + def test_isPairAlignAChunkOverlap( self ): + a = Align() + a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) + self._i.setChunkLength( 500 ) + self._i.setChunkOverlap( 100 ) + obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) + self.assertTrue( obs ) + a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject + obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) + self.assertTrue( obs ) + a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous + obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 ) + self.assertFalse( obs ) + a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap + obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) + self.assertFalse( obs ) + a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject + obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 ) + self.assertTrue( obs ) + a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject + obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) + self.assertTrue( obs ) + + + def test_isPairAlignWithinAndDueToAChunkOverlap( self ): + a = Align() + a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap + self._i.setChunkLength( 500 ) + self._i.setChunkOverlap( 100 ) + self._i._verbose = 0 + obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) + self.assertTrue( obs ) + a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject + obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) + self.assertFalse( obs ) + a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous + obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 ) + self.assertFalse( obs ) + a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap + obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) + self.assertFalse( obs ) + a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap... + self._i._margin = 2 # ... but not due to it + obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) + self.assertFalse( obs ) + self._i._margin = 10 # ... and due to it + obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) + self.assertTrue( obs ) + + + def test_removeChunkOverlaps( self ): + inFileName = "dummyInFile_%s" % ( self._uniqId ) + inF = open( inFileName, "w" ) + a = Align() + a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) ) + a.write( inF ) + a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) ) + a.write( inF ) + inF.close() + self._i.setInputFileName( inFileName ) + self._i.setChunkLength( 500 ) + self._i.setChunkOverlap( 100 ) + obsFileName = "dummyObsFile_%s" %( self._uniqId ) + self._i.setOutputFileName( obsFileName ) + d = self._i.removeChunkOverlaps() + expFileName = "dummyExpFile_%s" % ( self._uniqId ) + expF = open( expFileName, "w" ) + expF.write( "" ) + expF.close() + self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) + for f in [ inFileName, obsFileName, expFileName ]: os.remove( f ) + + + def test_zRunAsScript( self ): + cDir = os.getcwd() + + alignFileName = "dummyInFile_%s" % ( self._uniqId ) + aF = open( alignFileName, "w" ) + aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" ) # remove + aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" ) # remove + aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks + aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks + aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" ) # keep because within overlap but reverse + aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" ) # remove because within and due to overlap + aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" ) # keep because within but not due to overlap + aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" ) # keep because longer HSP on query + aF.close() + + expFileName = "dummyExpFile_%s" % ( self._uniqId ) + expF = open( expFileName, "w" ) + expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" ) + expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" ) + expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" ) + expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" ) + expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" ) + expF.close() + + obsFileName = "dummyObsFile_%s" % ( self._uniqId ) + cmd = "RmvPairAlignInChunkOverlaps.py" + cmd += " -i %s" % ( alignFileName ) + cmd += " -l 500" + cmd += " -o 100" + cmd += " -O %s" % ( obsFileName ) + cmd += " -v 0" + os.system( cmd ) + + self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) + for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f ) + os.chdir( cDir ) + + def test_zRunAsScript_bug_to_fix(self): + cDir = os.getcwd() + + alignFileName = "dummyInFile_%s" % ( self._uniqId ) + aF = open( alignFileName, "w" ) + aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n") + aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n") + aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n") + aF.close() + + expFileName = "dummyExpFile_%s" % ( self._uniqId ) + expF = open( expFileName, "w" ) + expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n") + expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n") + expF.close() + + obsFileName = "dummyObsFile_%s" % ( self._uniqId ) + cmd = "RmvPairAlignInChunkOverlaps.py" + cmd += " -i %s" % ( alignFileName ) + cmd += " -l 200000" + cmd += " -o 10000" + cmd += " -O %s" % ( obsFileName ) + cmd += " -v 0" + os.system( cmd ) + + self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) + for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f ) + os.chdir( cDir ) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file