Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 import unittest | |
2 import os | |
3 import time | |
4 from commons.core.coord.Align import Align | |
5 from commons.core.utils.FileUtils import FileUtils | |
6 from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps | |
7 | |
8 | |
9 class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ): | |
10 | |
11 def setUp( self ): | |
12 self._i = RmvPairAlignInChunkOverlaps() | |
13 self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") ) | |
14 | |
15 | |
16 def tearDown( self ): | |
17 self._i = None | |
18 self._uniqId = None | |
19 | |
20 | |
21 def test_isPairAlignAChunkOverlap( self ): | |
22 a = Align() | |
23 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) | |
24 self._i.setChunkLength( 500 ) | |
25 self._i.setChunkOverlap( 100 ) | |
26 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) | |
27 self.assertTrue( obs ) | |
28 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject | |
29 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) | |
30 self.assertTrue( obs ) | |
31 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous | |
32 obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 ) | |
33 self.assertFalse( obs ) | |
34 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap | |
35 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) | |
36 self.assertFalse( obs ) | |
37 a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject | |
38 obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 ) | |
39 self.assertTrue( obs ) | |
40 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject | |
41 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 ) | |
42 self.assertTrue( obs ) | |
43 | |
44 | |
45 def test_isPairAlignWithinAndDueToAChunkOverlap( self ): | |
46 a = Align() | |
47 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap | |
48 self._i.setChunkLength( 500 ) | |
49 self._i.setChunkOverlap( 100 ) | |
50 self._i._verbose = 0 | |
51 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) | |
52 self.assertTrue( obs ) | |
53 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject | |
54 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) | |
55 self.assertFalse( obs ) | |
56 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous | |
57 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 ) | |
58 self.assertFalse( obs ) | |
59 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap | |
60 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) | |
61 self.assertFalse( obs ) | |
62 a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap... | |
63 self._i._margin = 2 # ... but not due to it | |
64 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) | |
65 self.assertFalse( obs ) | |
66 self._i._margin = 10 # ... and due to it | |
67 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 ) | |
68 self.assertTrue( obs ) | |
69 | |
70 | |
71 def test_removeChunkOverlaps( self ): | |
72 inFileName = "dummyInFile_%s" % ( self._uniqId ) | |
73 inF = open( inFileName, "w" ) | |
74 a = Align() | |
75 a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) ) | |
76 a.write( inF ) | |
77 a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) ) | |
78 a.write( inF ) | |
79 inF.close() | |
80 self._i.setInputFileName( inFileName ) | |
81 self._i.setChunkLength( 500 ) | |
82 self._i.setChunkOverlap( 100 ) | |
83 obsFileName = "dummyObsFile_%s" %( self._uniqId ) | |
84 self._i.setOutputFileName( obsFileName ) | |
85 d = self._i.removeChunkOverlaps() | |
86 expFileName = "dummyExpFile_%s" % ( self._uniqId ) | |
87 expF = open( expFileName, "w" ) | |
88 expF.write( "" ) | |
89 expF.close() | |
90 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) | |
91 for f in [ inFileName, obsFileName, expFileName ]: os.remove( f ) | |
92 | |
93 | |
94 def test_zRunAsScript( self ): | |
95 cDir = os.getcwd() | |
96 | |
97 alignFileName = "dummyInFile_%s" % ( self._uniqId ) | |
98 aF = open( alignFileName, "w" ) | |
99 aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" ) # remove | |
100 aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" ) # remove | |
101 aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks | |
102 aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks | |
103 aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" ) # keep because within overlap but reverse | |
104 aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" ) # remove because within and due to overlap | |
105 aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" ) # keep because within but not due to overlap | |
106 aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" ) # keep because longer HSP on query | |
107 aF.close() | |
108 | |
109 expFileName = "dummyExpFile_%s" % ( self._uniqId ) | |
110 expF = open( expFileName, "w" ) | |
111 expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" ) | |
112 expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" ) | |
113 expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" ) | |
114 expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" ) | |
115 expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" ) | |
116 expF.close() | |
117 | |
118 obsFileName = "dummyObsFile_%s" % ( self._uniqId ) | |
119 cmd = "RmvPairAlignInChunkOverlaps.py" | |
120 cmd += " -i %s" % ( alignFileName ) | |
121 cmd += " -l 500" | |
122 cmd += " -o 100" | |
123 cmd += " -O %s" % ( obsFileName ) | |
124 cmd += " -v 0" | |
125 os.system( cmd ) | |
126 | |
127 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) | |
128 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f ) | |
129 os.chdir( cDir ) | |
130 | |
131 def test_zRunAsScript_bug_to_fix(self): | |
132 cDir = os.getcwd() | |
133 | |
134 alignFileName = "dummyInFile_%s" % ( self._uniqId ) | |
135 aF = open( alignFileName, "w" ) | |
136 aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n") | |
137 aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n") | |
138 aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n") | |
139 aF.close() | |
140 | |
141 expFileName = "dummyExpFile_%s" % ( self._uniqId ) | |
142 expF = open( expFileName, "w" ) | |
143 expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n") | |
144 expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n") | |
145 expF.close() | |
146 | |
147 obsFileName = "dummyObsFile_%s" % ( self._uniqId ) | |
148 cmd = "RmvPairAlignInChunkOverlaps.py" | |
149 cmd += " -i %s" % ( alignFileName ) | |
150 cmd += " -l 200000" | |
151 cmd += " -o 10000" | |
152 cmd += " -O %s" % ( obsFileName ) | |
153 cmd += " -v 0" | |
154 os.system( cmd ) | |
155 | |
156 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) ) | |
157 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f ) | |
158 os.chdir( cDir ) | |
159 | |
160 if __name__ == "__main__": | |
161 unittest.main() |