annotate commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 import unittest
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import time
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 from commons.core.coord.Align import Align
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 from commons.core.utils.FileUtils import FileUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 from commons.tools.RmvPairAlignInChunkOverlaps import RmvPairAlignInChunkOverlaps
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 class Test_RmvPairAlignInChunkOverlaps( unittest.TestCase ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 def setUp( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 self._i = RmvPairAlignInChunkOverlaps()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 self._uniqId = "%s" % ( time.strftime("%Y%m%d%H%M%S") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 def tearDown( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 self._i = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 self._uniqId = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 def test_isPairAlignAChunkOverlap( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 a = Align()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 self._i.setChunkLength( 500 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 self._i.setChunkOverlap( 100 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 self.assertTrue( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "100", "0.0", "125", "97.6" ) ) # reverse subject
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 self.assertTrue( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 obs = self._i.isPairAlignAChunkOverlap( a, 1, 3 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 self.assertFalse( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 self.assertFalse( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 a.setFromTuple( ( "chunk2", "1", "101", "chunk1", "401", "500", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query > subject
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 obs = self._i.isPairAlignAChunkOverlap( a, 2, 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 self.assertTrue( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 a.setFromTuple( ( "chunk1", "401", "500", "chunk2", "1", "101", "0.0", "500", "100.0" ) ) # chunk overlap returned by PALS (+1), query < subject
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 obs = self._i.isPairAlignAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 self.assertTrue( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 def test_isPairAlignWithinAndDueToAChunkOverlap( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 a = Align()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "11", "50", "0.0", "73", "97.6" ) ) # included, due to overlap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 self._i.setChunkLength( 500 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 self._i.setChunkOverlap( 100 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 self._i._verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 self.assertTrue( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 a.setFromTuple( ( "chunk1", "411", "450", "chunk2", "50", "11", "0.0", "73", "97.6" ) ) # reverse subject
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 self.assertFalse( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 a.setFromTuple( ( "chunk1", "401", "500", "chunk3", "1", "100", "0.0", "125", "97.6" ) ) # chunk subject not contiguous
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 3 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self.assertFalse( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 a.setFromTuple( ( "chunk1", "390", "500", "chunk2", "1", "110", "0.0", "125", "97.6" ) ) # hit longer than chunk overlap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 self.assertFalse( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 a.setFromTuple( ( "chunk1", "411", "430", "chunk2", "16", "35", "0.0", "73", "97.6" ) ) # repeat within overlap...
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 self._i._margin = 2 # ... but not due to it
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 self.assertFalse( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 self._i._margin = 10 # ... and due to it
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 obs = self._i.isPairAlignWithinAndDueToAChunkOverlap( a, 1, 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 self.assertTrue( obs )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 def test_removeChunkOverlaps( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 inFileName = "dummyInFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 inF = open( inFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 a = Align()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 a.setFromTuple( ( "chunk1", "411", "490", "chunk2", "11", "90", "0.0", "73", "97.6" ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 a.write( inF )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 a.setFromTuple( ( "chunk1", "1", "500", "chunk1", "1", "500", "0.0", "500", "100.0" ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 a.write( inF )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 inF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 self._i.setInputFileName( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 self._i.setChunkLength( 500 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 self._i.setChunkOverlap( 100 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 obsFileName = "dummyObsFile_%s" %( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 self._i.setOutputFileName( obsFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 d = self._i.removeChunkOverlaps()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 expFileName = "dummyExpFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 expF = open( expFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 expF.write( "" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 expF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 for f in [ inFileName, obsFileName, expFileName ]: os.remove( f )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 def test_zRunAsScript( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 cDir = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 alignFileName = "dummyInFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 aF = open( alignFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 aF.write( "chunk1\t401\t500\tchunk2\t1\t100\t0.0\t131\t100.0\n" ) # remove
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 aF.write( "chunk2\t1\t100\tchunk1\t401\t500\t0.0\t132\t100.0\n" ) # remove
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 aF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 aF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0.0\t132\t100.0\n" ) # keep because non-contiguous chunks
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 aF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0.0\t132\t100.0\n" ) # keep because within overlap but reverse
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 aF.write( "chunk1\t431\t490\tchunk2\t31\t90\t0.0\t132\t100.0\n" ) # remove because within and due to overlap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 aF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0.0\t132\t100.0\n" ) # keep because within but not due to overlap
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 aF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0.0\t132\t100.0\n" ) # keep because longer HSP on query
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 aF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 expFileName = "dummyExpFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 expF = open( expFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 expF.write( "chunk1\t401\t500\tchunk3\t1\t100\t0\t132\t100.000000\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 expF.write( "chunk3\t401\t500\tchunk1\t1\t100\t0\t132\t100.000000\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 expF.write( "chunk1\t401\t500\tchunk2\t100\t1\t0\t132\t100.000000\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 expF.write( "chunk1\t411\t430\tchunk2\t61\t90\t0\t132\t100.000000\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 expF.write( "chunk1\t390\t500\tchunk2\t1\t100\t0\t132\t100.000000\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 expF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 obsFileName = "dummyObsFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 cmd = "RmvPairAlignInChunkOverlaps.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 cmd += " -i %s" % ( alignFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 cmd += " -l 500"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 cmd += " -o 100"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 cmd += " -O %s" % ( obsFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 cmd += " -v 0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 os.chdir( cDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 def test_zRunAsScript_bug_to_fix(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 cDir = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 alignFileName = "dummyInFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 aF = open( alignFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 aF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 aF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 aF.write("chunk4\t63217\t63680\tchunk6\t5316\t4837\t0\t676\t92.71\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 aF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 expFileName = "dummyExpFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 expF = open( expFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 expF.write("chunk4\t63217\t63680\tchunk5\t195316\t194837\t0\t676\t92.71\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 expF.write("chunk4\t63217\t63680\tchunk6\t2618\t3101\t0\t714\t93.6\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 expF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 obsFileName = "dummyObsFile_%s" % ( self._uniqId )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 cmd = "RmvPairAlignInChunkOverlaps.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 cmd += " -i %s" % ( alignFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 cmd += " -l 200000"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 cmd += " -o 10000"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 cmd += " -O %s" % ( obsFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 cmd += " -v 0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 for f in [ alignFileName, expFileName, obsFileName ]: os.remove( f )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 os.chdir( cDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 unittest.main()