Mercurial > repos > yufei-luo > s_mart
view commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line source
import unittest import os import time from commons.core.coord.Align import Align from commons.core.utils.FileUtils import FileUtils from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ): def setUp( self ): self._i = RmvPairAlignInChunkOverlaps() self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") ) def tearDown( self ): self._i = None self._uniqId = None def test_isPairAlignAChunkOverlap( self ): a = Align() a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) self._i.setChunkLength( 500 ) self._i.setChunkOverlap( 100 ) obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) self.assertTrue( obs ) a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) self.assertTrue( obs ) a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 ) self.assertFalse( obs ) a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) self.assertFalse( obs ) a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 ) self.assertTrue( obs ) a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) self.assertTrue( obs ) def test_isPairAlignWithinAndDueToAChunkOverlap( self ): a = Align() a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap self._i.setChunkLength( 500 ) self._i.setChunkOverlap( 100 ) self._i._verbose = 0 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) self.assertTrue( obs ) a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) self.assertFalse( obs ) a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 ) self.assertFalse( obs ) a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) self.assertFalse( obs ) a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap... self._i._margin = 2 # ... but not due to it obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) self.assertFalse( obs ) self._i._margin = 10 # ... and due to it obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) self.assertTrue( obs ) def test_removeChunkOverlaps( self ): inFileName = "dummyInFile_%s" % ( self._uniqId ) inF = open( inFileName, "w" ) a = Align() a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) ) a.write( inF ) a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) ) a.write( inF ) inF.close() self._i.setInputFileName( inFileName ) self._i.setChunkLength( 500 ) self._i.setChunkOverlap( 100 ) obsFileName = "dummyObsFile_%s" %( self._uniqId ) self._i.setOutputFileName( obsFileName ) d = self._i.removeChunkOverlaps() expFileName = "dummyExpFile_%s" % ( self._uniqId ) expF = open( expFileName, "w" ) expF.write( "" ) expF.close() self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) for f in [ inFileName, obsFileName, expFileName ]: os.remove( f ) def test_zRunAsScript( self ): cDir = os.getcwd() alignFileName = "dummyInFile_%s" % ( self._uniqId ) aF = open( alignFileName, "w" ) aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" ) # remove aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" ) # remove aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" ) # keep because within overlap but reverse aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" ) # remove because within and due to overlap aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" ) # keep because within but not due to overlap aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" ) # keep because longer HSP on query aF.close() expFileName = "dummyExpFile_%s" % ( self._uniqId ) expF = open( expFileName, "w" ) expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" ) expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" ) expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" ) expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" ) expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" ) expF.close() obsFileName = "dummyObsFile_%s" % ( self._uniqId ) cmd = "RmvPairAlignInChunkOverlaps.py" cmd += " -i %s" % ( alignFileName ) cmd += " -l 500" cmd += " -o 100" cmd += " -O %s" % ( obsFileName ) cmd += " -v 0" os.system( cmd ) self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f ) os.chdir( cDir ) def test_zRunAsScript_bug_to_fix(self): cDir = os.getcwd() alignFileName = "dummyInFile_%s" % ( self._uniqId ) aF = open( alignFileName, "w" ) aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n") aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n") aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n") aF.close() expFileName = "dummyExpFile_%s" % ( self._uniqId ) expF = open( expFileName, "w" ) expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n") expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n") expF.close() obsFileName = "dummyObsFile_%s" % ( self._uniqId ) cmd = "RmvPairAlignInChunkOverlaps.py" cmd += " -i %s" % ( alignFileName ) cmd += " -l 200000" cmd += " -o 10000" cmd += " -O %s" % ( obsFileName ) cmd += " -v 0" os.system( cmd ) self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f ) os.chdir( cDir ) if __name__ == "__main__": unittest.main()