6
|
1 import unittest
|
|
2 import os
|
|
3 import time
|
|
4 from commons.core.coord.ConvCoord import ConvCoord
|
|
5 from commons.core.utils.FileUtils import FileUtils
|
|
6 from commons.core.sql.DbFactory import DbFactory
|
|
7 from commons.core.coord.Map import Map
|
|
8
|
|
9 class Test_ConvCoord( unittest.TestCase ):
|
|
10
|
|
11 def setUp( self ):
|
|
12 self._i = ConvCoord()
|
|
13 self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
|
|
14 self._inData = "dummyInData_%s" % ( self._uniqId )
|
|
15 self._mapData = "dummyMapData_%s" % ( self._uniqId )
|
|
16 self._expData = "dummyExpData_%s" % ( self._uniqId )
|
|
17 self._obsData = "dummyObsData_%s" % ( self._uniqId )
|
|
18 self._iDb = DbFactory.createInstance()
|
|
19 self._i._iDb = self._iDb
|
|
20
|
|
21 def tearDown( self ):
|
|
22 self._iDb.close()
|
|
23
|
|
24 #TODO: handle duplicated matchs for path
|
|
25 # def test_convCoordsChkToChrFromFile_duplicated_matchs( self ):
|
|
26 # dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),
|
|
27 # "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }
|
|
28 # tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId
|
|
29 # self._writePathFileCoordOnChunk(tmpPathFileName)
|
|
30 #
|
|
31 # expPathFile = "dummyExpPathFile_%s" % self._uniqId
|
|
32 # self._writePathFileCoordOnChrWithOutDoublons(expPathFile)
|
|
33 #
|
|
34 # outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)
|
|
35 #
|
|
36 # obsPathFile = "dummyObsPathFile_%s" % self._uniqId
|
|
37 # self._iDb.exportDataToFile(outTableName, obsPathFile)
|
|
38 #
|
|
39 # self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))
|
|
40 #
|
|
41 # for f in [ expPathFile, obsPathFile, tmpPathFileName ]:
|
|
42 # os.remove( f )
|
|
43 # self._iDb.dropTable(outTableName)
|
|
44
|
|
45 #TODO: handle matchs out of chunk overlap ? For one side (=> path 128, remove path 152) ? For two sides (path 129, fusion with path 154) ?
|
|
46 # def test_convCoordsChkToChrFromFile_matchs_out_of_overlap( self ):
|
|
47 # dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),
|
|
48 # "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }
|
|
49 # tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId
|
|
50 # self._writePathFileCoordOnChunk_outOfOverlap(tmpPathFileName)
|
|
51 #
|
|
52 # expPathFile = "dummyExpPathFile_%s" % self._uniqId
|
|
53 # self._writePathFileCoordOnChrWithOutDoublons_outOfOverlap(expPathFile)
|
|
54 #
|
|
55 # outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)
|
|
56 #
|
|
57 # obsPathFile = "dummyObsPathFile_%s" % self._uniqId
|
|
58 # self._iDb.exportDataToFile(outTableName, obsPathFile)
|
|
59 #
|
|
60 # self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))
|
|
61 #
|
|
62 # for f in [ expPathFile, obsPathFile, tmpPathFileName ]:
|
|
63 # os.remove( f )
|
|
64 # self._iDb.dropTable(outTableName)
|
|
65
|
|
66 def test_mergeCoordsOnChunkOverlaps( self ):
|
|
67 dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ),
|
|
68 "chunk2": Map( "chunk2", "chromosome1", 91, 190 ),
|
|
69 "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) }
|
|
70 tmpPathTable = "dummyTmpPathTable"
|
|
71 linesToProcess = [
|
|
72 "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit within the 1st chunk
|
|
73 "3" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 2nd chunk
|
|
74 "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk
|
|
75 ]
|
|
76 FileUtils.writeLineListInFile( tmpPathTable, linesToProcess )
|
|
77 self._iDb.createTable( tmpPathTable, "path", tmpPathTable, True)
|
|
78 os.remove( tmpPathTable )
|
|
79
|
|
80 expPathFile = "dummyExpPathFile"
|
|
81 linesToProcess = [ "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit within the 1st chunk
|
|
82 "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk
|
|
83 ]
|
|
84 FileUtils.writeLineListInFile( expPathFile, linesToProcess )
|
|
85
|
|
86 self._i.mergeCoordsOnChunkOverlaps( dChunks2CoordMaps, tmpPathTable)
|
|
87
|
|
88 obsPathFile = "dummyObsPathFile"
|
|
89 self._iDb.exportDataToFile( tmpPathTable, obsPathFile )
|
|
90
|
|
91 self.assertTrue( FileUtils.are2FilesIdentical( expPathFile, obsPathFile ) )
|
|
92
|
|
93 for f in [ expPathFile, obsPathFile ]:
|
|
94 os.remove( f )
|
|
95 self._iDb.dropTable( tmpPathTable )
|
|
96
|
|
97 def test_mergeCoordsOnChunkOverlaps_withConnectedMatches( self ):
|
|
98 dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ),
|
|
99 "chunk2": Map( "chunk2", "chromosome1", 91, 190 ),
|
|
100 "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) }
|
|
101 tmpPathTable = "dummyTmpPathTable"
|
|
102 linesToProcess = [
|
|
103 "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit on the 1st chunk
|
|
104 "1" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "28" + "\t" + "36" + "\t" + "8e-58" + "\t" + "10" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk, connected to the previous
|
|
105 "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "28" + "\t" + "36" + "\t" + "8e-58" + "\t" + "10" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 2nd chunk
|
|
106 "2" + "\t" + "chromosome1" + "\t" + "111" + "\t" + "120" + "\t" + "TE1" + "\t" + "37" + "\t" + "46" + "\t" + "8e-58" + "\t" + "15" + "\t" + "97.8" + "\n", # hit on the 2nd chunk, connected to the previous
|
|
107 ]
|
|
108 FileUtils.writeLineListInFile( tmpPathTable, linesToProcess )
|
|
109 self._iDb.createTable( tmpPathTable, "path", tmpPathTable, True)
|
|
110 os.remove( tmpPathTable )
|
|
111
|
|
112 expPathFile = "dummyExpPathFile"
|
|
113 linesToProcess = [ "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit within the 1st chunk
|
|
114 "1" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "28" + "\t" + "36" + "\t" + "8e-58" + "\t" + "10" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk
|
|
115 "1" + "\t" + "chromosome1" + "\t" + "111" + "\t" + "120" + "\t" + "TE1" + "\t" + "37" + "\t" + "46" + "\t" + "8e-58" + "\t" + "15" + "\t" + "97.8" + "\n", # hit on the 2nd chunk, connected to the previous
|
|
116 ]
|
|
117 FileUtils.writeLineListInFile( expPathFile, linesToProcess )
|
|
118
|
|
119 self._i.mergeCoordsOnChunkOverlaps( dChunks2CoordMaps, tmpPathTable )
|
|
120
|
|
121 obsPathFile = "dummyObsPathFile"
|
|
122 self._iDb.exportDataToFile( tmpPathTable, obsPathFile )
|
|
123
|
|
124 self.assertTrue( FileUtils.are2FilesIdentical( expPathFile, obsPathFile ) )
|
|
125
|
|
126 for f in [ expPathFile, obsPathFile ]:
|
|
127 os.remove( f )
|
|
128 self._iDb.dropTable( tmpPathTable )
|
|
129
|
|
130 def _writePathFileCoordOnChrWithOutDoublons(self, pathFileName):
|
|
131 file = open( pathFileName, "w" )
|
|
132 file.write("123\tdmel_chr4\t868397\t868531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n")
|
|
133 file.write("123\tdmel_chr4\t868545\t869120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n")
|
|
134 file.write("124\tdmel_chr4\t819607\t819714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n")
|
|
135 file.write("124\tdmel_chr4\t819695\t820156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n")
|
|
136 file.write("125\tdmel_chr4\t953027\t953101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n")
|
|
137 file.write("126\tdmel_chr4\t862131\t862178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n")
|
|
138 file.write("127\tdmel_chr4\t819520\t819606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n")
|
|
139 # file.write("128\tdmel_chr4\t953866\t953889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n")
|
|
140 # file.write("129\tdmel_chr4\t953866\t953889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
141 file.write("150\tdmel_chr4\t971176\t971250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n")
|
|
142 file.write("151\tdmel_chr4\t1066603\t1066698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n")
|
|
143 file.write("152\tdmel_chr4\t953866\t953889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n")
|
|
144 file.write("153\tdmel_chr4\t953951\t954343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n")
|
|
145 file.write("154\tdmel_chr4\t953866\t953889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
146 file.write("155\tdmel_chr4\t953102\t953199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n")
|
|
147 file.close()
|
|
148
|
|
149 def _writePathFileCoordOnChunk(self, pathFileName):
|
|
150 pathFile = open( pathFileName, "w" )
|
|
151 pathFile.write("123\tchunk1\t108397\t108531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n")
|
|
152 pathFile.write("123\tchunk1\t108545\t109120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n")
|
|
153 pathFile.write("124\tchunk1\t59607\t59714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n")
|
|
154 pathFile.write("124\tchunk1\t59695\t60156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n")
|
|
155 pathFile.write("125\tchunk1\t193027\t193101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n")
|
|
156 pathFile.write("126\tchunk1\t102131\t102178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n")
|
|
157 pathFile.write("127\tchunk1\t59520\t59606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n")
|
|
158 pathFile.write("128\tchunk1\t193866\t193889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n")
|
|
159 pathFile.write("129\tchunk1\t193866\t193889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
160 pathFile.write("150\tchunk2\t21176\t21250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n")
|
|
161 pathFile.write("151\tchunk2\t116603\t116698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n")
|
|
162 pathFile.write("152\tchunk2\t3866\t3889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n")
|
|
163 pathFile.write("153\tchunk2\t3951\t4343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n")
|
|
164 pathFile.write("154\tchunk2\t3866\t3889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
165 pathFile.write("155\tchunk2\t3102\t3199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n")
|
|
166 pathFile.close()
|
|
167
|
|
168 # def _writePathFileCoordOnChunk_outOfOverlap(self, pathFileName):
|
|
169 # pathFile = open( pathFileName, "w" )
|
|
170 # pathFile.write("123\tchunk1\t108397\t108531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n")
|
|
171 # pathFile.write("123\tchunk1\t108545\t109120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n")
|
|
172 # pathFile.write("124\tchunk1\t59607\t59714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n")
|
|
173 # pathFile.write("124\tchunk1\t59695\t60156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n")
|
|
174 # pathFile.write("125\tchunk1\t193027\t193101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n")
|
|
175 # pathFile.write("126\tchunk1\t102131\t102178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n")
|
|
176 # pathFile.write("127\tchunk1\t59520\t59606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n")
|
|
177 # pathFile.write("128\tchunk1\t183866\t193889\tCR1-19_HM_1p:classI:LINE\t898\t1891\t5e-21\t4\t34.98\n")
|
|
178 # pathFile.write("129\tchunk1\t183866\t200000\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
179 # pathFile.write("150\tchunk2\t21176\t21250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n")
|
|
180 # pathFile.write("151\tchunk2\t116603\t116698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n")
|
|
181 # pathFile.write("152\tchunk2\t1\t3889\tCR1-19_HM_1p:classI:LINE\t898\t1891\t5e-21\t4\t34.98\n")
|
|
182 # pathFile.write("153\tchunk2\t3951\t4343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n")
|
|
183 # pathFile.write("154\tchunk2\t1\t13889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
184 # pathFile.write("155\tchunk2\t3102\t3199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n")
|
|
185 # pathFile.close()
|
|
186 #
|
|
187 # def _writePathFileCoordOnChrWithOutDoublons_outOfOverlap(self, pathFileName):
|
|
188 # file = open( pathFileName, "w" )
|
|
189 # file.write("123\tdmel_chr4\t868397\t868531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n")
|
|
190 # file.write("123\tdmel_chr4\t868545\t869120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n")
|
|
191 # file.write("124\tdmel_chr4\t819607\t819714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n")
|
|
192 # file.write("124\tdmel_chr4\t819695\t820156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n")
|
|
193 # file.write("125\tdmel_chr4\t953027\t953101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n")
|
|
194 # file.write("126\tdmel_chr4\t862131\t862178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n")
|
|
195 # file.write("127\tdmel_chr4\t819520\t819606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n")
|
|
196 # file.write("128\tdmel_chr4\t943866\t953889\tCR1-19_HM_1p:classI:LINE\t898\t1891\t5e-21\t4\t34.98\n")
|
|
197 # file.write("129\tdmel_chr4\t943866\t963889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n")
|
|
198 # file.write("150\tdmel_chr4\t971176\t971250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n")
|
|
199 # file.write("151\tdmel_chr4\t1066603\t1066698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n")
|
|
200 # file.write("153\tdmel_chr4\t953951\t954343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n")
|
|
201 # file.write("155\tdmel_chr4\t953102\t953199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n")
|
|
202 # file.close()
|
|
203
|
|
204 if __name__ == "__main__":
|
|
205 unittest.main() |