18
|
1 import unittest
|
|
2 import os
|
|
3 import time
|
|
4 from commons.core.coord.Align import Align
|
|
5 from commons.core.utils.FileUtils import FileUtils
|
|
6 from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps
|
|
7
|
|
8
|
|
9 class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ):
|
|
10
|
|
11 def setUp( self ):
|
|
12 self._i = RmvPairAlignInChunkOverlaps()
|
|
13 self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") )
|
|
14
|
|
15
|
|
16 def tearDown( self ):
|
|
17 self._i = None
|
|
18 self._uniqId = None
|
|
19
|
|
20
|
|
21 def test_isPairAlignAChunkOverlap( self ):
|
|
22 a = Align()
|
|
23 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )
|
|
24 self._i.setChunkLength( 500 )
|
|
25 self._i.setChunkOverlap( 100 )
|
|
26 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
|
|
27 self.assertTrue( obs )
|
|
28 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject
|
|
29 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
|
|
30 self.assertTrue( obs )
|
|
31 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous
|
|
32 obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 )
|
|
33 self.assertFalse( obs )
|
|
34 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap
|
|
35 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
|
|
36 self.assertFalse( obs )
|
|
37 a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject
|
|
38 obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 )
|
|
39 self.assertTrue( obs )
|
|
40 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject
|
|
41 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
|
|
42 self.assertTrue( obs )
|
|
43
|
|
44
|
|
45 def test_isPairAlignWithinAndDueToAChunkOverlap( self ):
|
|
46 a = Align()
|
|
47 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap
|
|
48 self._i.setChunkLength( 500 )
|
|
49 self._i.setChunkOverlap( 100 )
|
|
50 self._i._verbose = 0
|
|
51 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
|
|
52 self.assertTrue( obs )
|
|
53 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject
|
|
54 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
|
|
55 self.assertFalse( obs )
|
|
56 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous
|
|
57 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 )
|
|
58 self.assertFalse( obs )
|
|
59 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap
|
|
60 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
|
|
61 self.assertFalse( obs )
|
|
62 a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap...
|
|
63 self._i._margin = 2 # ... but not due to it
|
|
64 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
|
|
65 self.assertFalse( obs )
|
|
66 self._i._margin = 10 # ... and due to it
|
|
67 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
|
|
68 self.assertTrue( obs )
|
|
69
|
|
70
|
|
71 def test_removeChunkOverlaps( self ):
|
|
72 inFileName = "dummyInFile_%s" % ( self._uniqId )
|
|
73 inF = open( inFileName, "w" )
|
|
74 a = Align()
|
|
75 a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) )
|
|
76 a.write( inF )
|
|
77 a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) )
|
|
78 a.write( inF )
|
|
79 inF.close()
|
|
80 self._i.setInputFileName( inFileName )
|
|
81 self._i.setChunkLength( 500 )
|
|
82 self._i.setChunkOverlap( 100 )
|
|
83 obsFileName = "dummyObsFile_%s" %( self._uniqId )
|
|
84 self._i.setOutputFileName( obsFileName )
|
|
85 d = self._i.removeChunkOverlaps()
|
|
86 expFileName = "dummyExpFile_%s" % ( self._uniqId )
|
|
87 expF = open( expFileName, "w" )
|
|
88 expF.write( "" )
|
|
89 expF.close()
|
|
90 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
|
|
91 for f in [ inFileName, obsFileName, expFileName ]: os.remove( f )
|
|
92
|
|
93
|
|
94 def test_zRunAsScript( self ):
|
|
95 cDir = os.getcwd()
|
|
96
|
|
97 alignFileName = "dummyInFile_%s" % ( self._uniqId )
|
|
98 aF = open( alignFileName, "w" )
|
|
99 aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" ) # remove
|
|
100 aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" ) # remove
|
|
101 aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks
|
|
102 aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks
|
|
103 aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" ) # keep because within overlap but reverse
|
|
104 aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" ) # remove because within and due to overlap
|
|
105 aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" ) # keep because within but not due to overlap
|
|
106 aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" ) # keep because longer HSP on query
|
|
107 aF.close()
|
|
108
|
|
109 expFileName = "dummyExpFile_%s" % ( self._uniqId )
|
|
110 expF = open( expFileName, "w" )
|
|
111 expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" )
|
|
112 expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" )
|
|
113 expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" )
|
|
114 expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" )
|
|
115 expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" )
|
|
116 expF.close()
|
|
117
|
|
118 obsFileName = "dummyObsFile_%s" % ( self._uniqId )
|
|
119 cmd = "RmvPairAlignInChunkOverlaps.py"
|
|
120 cmd += " -i %s" % ( alignFileName )
|
|
121 cmd += " -l 500"
|
|
122 cmd += " -o 100"
|
|
123 cmd += " -O %s" % ( obsFileName )
|
|
124 cmd += " -v 0"
|
|
125 os.system( cmd )
|
|
126
|
|
127 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
|
|
128 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
|
|
129 os.chdir( cDir )
|
|
130
|
|
131 def test_zRunAsScript_bug_to_fix(self):
|
|
132 cDir = os.getcwd()
|
|
133
|
|
134 alignFileName = "dummyInFile_%s" % ( self._uniqId )
|
|
135 aF = open( alignFileName, "w" )
|
|
136 aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
|
|
137 aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
|
|
138 aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n")
|
|
139 aF.close()
|
|
140
|
|
141 expFileName = "dummyExpFile_%s" % ( self._uniqId )
|
|
142 expF = open( expFileName, "w" )
|
|
143 expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
|
|
144 expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
|
|
145 expF.close()
|
|
146
|
|
147 obsFileName = "dummyObsFile_%s" % ( self._uniqId )
|
|
148 cmd = "RmvPairAlignInChunkOverlaps.py"
|
|
149 cmd += " -i %s" % ( alignFileName )
|
|
150 cmd += " -l 200000"
|
|
151 cmd += " -o 10000"
|
|
152 cmd += " -O %s" % ( obsFileName )
|
|
153 cmd += " -v 0"
|
|
154 os.system( cmd )
|
|
155
|
|
156 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
|
|
157 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
|
|
158 os.chdir( cDir )
|
|
159
|
|
160 if __name__ == "__main__":
|
|
161 unittest.main() |