comparison commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 import unittest
2 import os
3 import time
4 from commons.core.coord.Align import Align
5 from commons.core.utils.FileUtils import FileUtils
6 from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps
7
8
9 class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ):
10
11 def setUp( self ):
12 self._i = RmvPairAlignInChunkOverlaps()
13 self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") )
14
15
16 def tearDown( self ):
17 self._i = None
18 self._uniqId = None
19
20
21 def test_isPairAlignAChunkOverlap( self ):
22 a = Align()
23 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )
24 self._i.setChunkLength( 500 )
25 self._i.setChunkOverlap( 100 )
26 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
27 self.assertTrue( obs )
28 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject
29 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
30 self.assertTrue( obs )
31 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous
32 obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 )
33 self.assertFalse( obs )
34 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap
35 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
36 self.assertFalse( obs )
37 a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject
38 obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 )
39 self.assertTrue( obs )
40 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject
41 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
42 self.assertTrue( obs )
43
44
45 def test_isPairAlignWithinAndDueToAChunkOverlap( self ):
46 a = Align()
47 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap
48 self._i.setChunkLength( 500 )
49 self._i.setChunkOverlap( 100 )
50 self._i._verbose = 0
51 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
52 self.assertTrue( obs )
53 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject
54 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
55 self.assertFalse( obs )
56 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous
57 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 )
58 self.assertFalse( obs )
59 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap
60 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
61 self.assertFalse( obs )
62 a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap...
63 self._i._margin = 2 # ... but not due to it
64 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
65 self.assertFalse( obs )
66 self._i._margin = 10 # ... and due to it
67 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
68 self.assertTrue( obs )
69
70
71 def test_removeChunkOverlaps( self ):
72 inFileName = "dummyInFile_%s" % ( self._uniqId )
73 inF = open( inFileName, "w" )
74 a = Align()
75 a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) )
76 a.write( inF )
77 a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) )
78 a.write( inF )
79 inF.close()
80 self._i.setInputFileName( inFileName )
81 self._i.setChunkLength( 500 )
82 self._i.setChunkOverlap( 100 )
83 obsFileName = "dummyObsFile_%s" %( self._uniqId )
84 self._i.setOutputFileName( obsFileName )
85 d = self._i.removeChunkOverlaps()
86 expFileName = "dummyExpFile_%s" % ( self._uniqId )
87 expF = open( expFileName, "w" )
88 expF.write( "" )
89 expF.close()
90 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
91 for f in [ inFileName, obsFileName, expFileName ]: os.remove( f )
92
93
94 def test_zRunAsScript( self ):
95 cDir = os.getcwd()
96
97 alignFileName = "dummyInFile_%s" % ( self._uniqId )
98 aF = open( alignFileName, "w" )
99 aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" ) # remove
100 aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" ) # remove
101 aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks
102 aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks
103 aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" ) # keep because within overlap but reverse
104 aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" ) # remove because within and due to overlap
105 aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" ) # keep because within but not due to overlap
106 aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" ) # keep because longer HSP on query
107 aF.close()
108
109 expFileName = "dummyExpFile_%s" % ( self._uniqId )
110 expF = open( expFileName, "w" )
111 expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" )
112 expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" )
113 expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" )
114 expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" )
115 expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" )
116 expF.close()
117
118 obsFileName = "dummyObsFile_%s" % ( self._uniqId )
119 cmd = "RmvPairAlignInChunkOverlaps.py"
120 cmd += " -i %s" % ( alignFileName )
121 cmd += " -l 500"
122 cmd += " -o 100"
123 cmd += " -O %s" % ( obsFileName )
124 cmd += " -v 0"
125 os.system( cmd )
126
127 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
128 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
129 os.chdir( cDir )
130
131 def test_zRunAsScript_bug_to_fix(self):
132 cDir = os.getcwd()
133
134 alignFileName = "dummyInFile_%s" % ( self._uniqId )
135 aF = open( alignFileName, "w" )
136 aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
137 aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
138 aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n")
139 aF.close()
140
141 expFileName = "dummyExpFile_%s" % ( self._uniqId )
142 expF = open( expFileName, "w" )
143 expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
144 expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
145 expF.close()
146
147 obsFileName = "dummyObsFile_%s" % ( self._uniqId )
148 cmd = "RmvPairAlignInChunkOverlaps.py"
149 cmd += " -i %s" % ( alignFileName )
150 cmd += " -l 200000"
151 cmd += " -o 10000"
152 cmd += " -O %s" % ( obsFileName )
153 cmd += " -v 0"
154 os.system( cmd )
155
156 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
157 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
158 os.chdir( cDir )
159
160 if __name__ == "__main__":
161 unittest.main()