Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/commons/core/seq/test/Test_BioseqDB.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0f8dcca02ed |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 | |
32 import unittest | |
33 import os | |
34 import time | |
35 from commons.core.seq.BioseqDB import BioseqDB | |
36 from commons.core.seq.Bioseq import Bioseq | |
37 from commons.core.utils.FileUtils import FileUtils | |
38 from commons.core.coord.Map import Map | |
39 | |
40 | |
41 class Test_BioseqDB( unittest.TestCase ): | |
42 | |
43 def setUp( self ): | |
44 self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() ) | |
45 | |
46 | |
47 def tearDown( self ): | |
48 if os._exists("dummyBioseqDB.fa"): | |
49 os.remove("dummyBioseqDB.fa") | |
50 | |
51 | |
52 def test__eq__(self): | |
53 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
54 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) | |
55 expBioseqDB = BioseqDB() | |
56 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
57 | |
58 iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
59 iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) | |
60 obsBioseqDB = BioseqDB() | |
61 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
62 | |
63 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
64 | |
65 | |
66 def test__eq__instances_with_different_header(self): | |
67 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
68 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) | |
69 expBioseqDB = BioseqDB() | |
70 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
71 | |
72 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
73 iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" ) | |
74 obsBioseqDB = BioseqDB() | |
75 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
76 | |
77 self.assertNotEquals( expBioseqDB, obsBioseqDB ) | |
78 | |
79 | |
80 def test__eq__instances_with_different_sequences(self): | |
81 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
82 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) | |
83 expBioseqDB = BioseqDB() | |
84 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
85 | |
86 iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
87 iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" ) | |
88 obsBioseqDB = BioseqDB() | |
89 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
90 | |
91 self.assertNotEquals( expBioseqDB, obsBioseqDB ) | |
92 | |
93 | |
94 def test__eq__instances_with_different_sequences_and_headers(self): | |
95 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
96 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) | |
97 expBioseqDB = BioseqDB() | |
98 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
99 | |
100 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
101 iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" ) | |
102 obsBioseqDB = BioseqDB() | |
103 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
104 | |
105 self.assertNotEquals( expBioseqDB, obsBioseqDB ) | |
106 | |
107 | |
108 def test__eq__instances_with_different_sizeOfBioseq(self): | |
109 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
110 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) | |
111 expBioseqDB = BioseqDB() | |
112 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
113 | |
114 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
115 obsBioseqDB = BioseqDB() | |
116 obsBioseqDB.setData( [ iBioseq3 ] ) | |
117 | |
118 self.assertNotEquals( expBioseqDB, obsBioseqDB ) | |
119 | |
120 | |
121 def test_setName (self): | |
122 expName = "myDataBank" | |
123 iBioseqDB = BioseqDB() | |
124 self.assertEquals (iBioseqDB.name, "") | |
125 | |
126 iBioseqDB.setName (expName) | |
127 obsName = iBioseqDB.name | |
128 self.assertEquals (expName, obsName) | |
129 | |
130 | |
131 def test_read(self): | |
132 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCT") | |
133 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGC") | |
134 expBioseqDB = BioseqDB() | |
135 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
136 | |
137 faFN = "dummyFaFile.fa" | |
138 faF = open( faFN, "w" ) | |
139 faF.write(">consensus1\n") | |
140 faF.write("GAGATGGCTCATGGAGTACCTGCCT\n") | |
141 faF.write(">consensus2\n") | |
142 faF.write("GAGATGGCTCATGGAGTACCGC\n") | |
143 faF.close() | |
144 | |
145 faF = open( faFN, "r" ) | |
146 obsBioseqDB = BioseqDB() | |
147 obsBioseqDB.read( faF ) | |
148 faF.close() | |
149 os.remove( faFN ) | |
150 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
151 | |
152 | |
153 def test_write(self): | |
154 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
155 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
156 iBioseqDB = BioseqDB() | |
157 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
158 | |
159 expFaFileName = "dummyFaFile.fa" | |
160 expFaFile = open( expFaFileName, "w" ) | |
161 expFaFile.write(">consensus1\n") | |
162 expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n") | |
163 expFaFile.write("ATGGAGTACCTGCCT\n") | |
164 expFaFile.write(">consensus2\n") | |
165 expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n") | |
166 expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n") | |
167 expFaFile.close() | |
168 | |
169 obsFaFileName = "obsDummyFastaFile.fa" | |
170 obsFaFile = open( obsFaFileName, "w" ) | |
171 iBioseqDB.write( obsFaFile ) | |
172 obsFaFile.close() | |
173 | |
174 self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) ) | |
175 os.remove( expFaFileName ) | |
176 os.remove( obsFaFileName ) | |
177 | |
178 | |
179 def test_save(self): | |
180 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
181 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
182 iBioseqDB = BioseqDB() | |
183 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
184 | |
185 expFaFileName = "dummyFaFile.fa" | |
186 expFaFile = open( expFaFileName, "w" ) | |
187 expFaFile.write(">consensus1\n") | |
188 expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n") | |
189 expFaFile.write("ATGGAGTACCTGCCT\n") | |
190 expFaFile.write(">consensus2\n") | |
191 expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n") | |
192 expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n") | |
193 expFaFile.close() | |
194 | |
195 obsFaFileName = "obsDummyFastaFile.fa" | |
196 iBioseqDB.save( obsFaFileName ) | |
197 | |
198 self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) ) | |
199 os.remove( expFaFileName ) | |
200 os.remove( obsFaFileName ) | |
201 | |
202 | |
203 def test_load(self): | |
204 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
205 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
206 expBioseqDB = BioseqDB() | |
207 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
208 | |
209 FaFileName = "dummyFaFile.fa" | |
210 FaFile = open( FaFileName, "w" ) | |
211 FaFile.write(">consensus1\n") | |
212 FaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n") | |
213 FaFile.write("ATGGAGTACCTGCCT\n") | |
214 FaFile.write(">consensus2\n") | |
215 FaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n") | |
216 FaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n") | |
217 FaFile.close() | |
218 | |
219 obsBioseqDB = BioseqDB() | |
220 obsBioseqDB.load( FaFileName ) | |
221 | |
222 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
223 os.remove( FaFileName ) | |
224 | |
225 | |
226 def test_reverse( self ): | |
227 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
228 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
229 expBioseqDB = BioseqDB() | |
230 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
231 | |
232 iBioseq3 = Bioseq( "seq1", "GTTA" ) | |
233 iBioseq4 = Bioseq( "seq2", "TAAGC" ) | |
234 obsBioseqDB = BioseqDB() | |
235 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
236 obsBioseqDB.reverse() | |
237 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
238 | |
239 | |
240 def test_complement( self ): | |
241 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
242 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
243 expBioseqDB = BioseqDB() | |
244 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
245 | |
246 iBioseq3 = Bioseq( "seq1", "TAAC" ) | |
247 iBioseq4 = Bioseq( "seq2", "GCTTA" ) | |
248 obsBioseqDB = BioseqDB() | |
249 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
250 | |
251 obsBioseqDB.complement() | |
252 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
253 | |
254 | |
255 def test_reverseComplement( self ): | |
256 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
257 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
258 expBioseqDB = BioseqDB() | |
259 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
260 | |
261 iBioseq3 = Bioseq( "seq1", "CAAT" ) | |
262 iBioseq4 = Bioseq( "seq2", "ATTCG" ) | |
263 obsBioseqDB = BioseqDB() | |
264 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
265 | |
266 obsBioseqDB.reverseComplement() | |
267 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
268 | |
269 | |
270 def test_setData(self): | |
271 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
272 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
273 iBioseq3 = Bioseq( "seq3", "CAAT" ) | |
274 iBioseq4 = Bioseq( "seq4", "ATTCG" ) | |
275 | |
276 lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4] | |
277 expBioseqDB = BioseqDB() | |
278 expBioseqDB.db = lBioseq | |
279 | |
280 iBioseq5 = Bioseq( "seq1", "ATTG" ) | |
281 iBioseq6 = Bioseq( "seq2", "CGAAT" ) | |
282 iBioseq7 = Bioseq( "seq3", "CAAT" ) | |
283 iBioseq8 = Bioseq( "seq4", "ATTCG" ) | |
284 | |
285 lBioseq2 = [iBioseq5, iBioseq6, iBioseq7, iBioseq8] | |
286 obsBioseqDB = BioseqDB() | |
287 obsBioseqDB.setData(lBioseq2) | |
288 | |
289 self.assertEquals(expBioseqDB, obsBioseqDB) | |
290 | |
291 | |
292 def test_reset( self ): | |
293 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
294 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
295 iBioseq3 = Bioseq( "seq3", "CAAT" ) | |
296 iBioseq4 = Bioseq( "seq4", "ATTCG" ) | |
297 | |
298 lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4] | |
299 obsBioseqDB = BioseqDB() | |
300 obsBioseqDB.setData(lBioseq) | |
301 obsBioseqDB.reset() | |
302 | |
303 expBioseqDB = BioseqDB() | |
304 | |
305 self.assertEquals(expBioseqDB, obsBioseqDB) | |
306 | |
307 | |
308 def testCleanGap(self): | |
309 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
310 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
311 expBioseqDB = BioseqDB() | |
312 expBioseqDB.setData([iBioseq1, iBioseq2]) | |
313 | |
314 iBioseq3 = Bioseq( "seq1", "AT-----TG" ) | |
315 iBioseq4 = Bioseq( "seq2", "CGAA----T" ) | |
316 | |
317 obsBioseqDB = BioseqDB() | |
318 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
319 obsBioseqDB.cleanGap() | |
320 | |
321 self.assertEquals(expBioseqDB, obsBioseqDB) | |
322 | |
323 | |
324 def testCleanGap_on_empty_db(self): | |
325 expBioseqDB = BioseqDB() | |
326 | |
327 obsBioseqDB = BioseqDB() | |
328 obsBioseqDB.cleanGap() | |
329 | |
330 self.assertEquals(expBioseqDB, obsBioseqDB) | |
331 | |
332 | |
333 def testCleanGap_on_size_one_db(self): | |
334 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
335 expBioseqDB = BioseqDB() | |
336 expBioseqDB.setData([iBioseq1]) | |
337 | |
338 iBioseq2 = Bioseq( "seq1", "AT-----TG" ) | |
339 obsBioseqDB = BioseqDB() | |
340 obsBioseqDB.setData([iBioseq2]) | |
341 | |
342 obsBioseqDB.cleanGap() | |
343 | |
344 self.assertEquals(expBioseqDB, obsBioseqDB) | |
345 | |
346 | |
347 def test_add_to_a_empty_bioseqDB_instance (self): | |
348 sHeader = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
349 sHeader += "(At4g29080) mRNA, complete cds." | |
350 | |
351 expDictIdx = { sHeader : 0} | |
352 | |
353 sHeaderRenamed = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
354 sHeaderRenamed += "(At4g29080)_mRNA-_complete_cds." | |
355 expDictIdxRenamed = {sHeaderRenamed : 0} | |
356 | |
357 iBioseq1 = Bioseq( sHeader, "ATTG" ) | |
358 obsBioseqDB = BioseqDB() | |
359 obsBioseqDB.add(iBioseq1) | |
360 | |
361 obsDictIdx = obsBioseqDB.idx | |
362 obsDictIdxRenamed = obsBioseqDB.idx_renamed | |
363 | |
364 self.assertEquals(expDictIdx,obsDictIdx) | |
365 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed) | |
366 | |
367 | |
368 def test_add_to_a_size_one_bioseqDB_instance (self): | |
369 sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
370 sHeader1 += "(At4g29080) mRNA, complete cds." | |
371 | |
372 sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
373 sHeader2 += "(At4g29080) mRNA, complete cds." | |
374 | |
375 expDictIdx = { sHeader1 : 0, sHeader2 : 1} | |
376 | |
377 sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
378 sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds." | |
379 | |
380 sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
381 sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds." | |
382 | |
383 expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1} | |
384 | |
385 iBioseq1 = Bioseq( sHeader1, "ATTG" ) | |
386 iBioseq2 = Bioseq( sHeader2, "ATTG" ) | |
387 | |
388 obsBioseqDB = BioseqDB() | |
389 obsBioseqDB.setData([ iBioseq1]) | |
390 obsBioseqDB.add(iBioseq2) | |
391 | |
392 obsDictIdx = obsBioseqDB.idx | |
393 obsDictIdxRenamed = obsBioseqDB.idx_renamed | |
394 | |
395 self.assertEquals(expDictIdx,obsDictIdx) | |
396 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed) | |
397 | |
398 | |
399 def test_add_to_a_size_two_bioseqDB_instance (self): | |
400 sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
401 sHeader1 += "(At4g29080) mRNA, complete cds." | |
402 | |
403 sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
404 sHeader2 += "(At4g29080) mRNA, complete cds." | |
405 | |
406 sHeader3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
407 sHeader3 += "(At4g29080) mRNA, complete cds." | |
408 expDictIdx = { sHeader1 : 0, sHeader2 : 1, sHeader3 : 2} | |
409 | |
410 sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
411 sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds." | |
412 | |
413 sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
414 sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds." | |
415 | |
416 sHeaderRenamed3 = "embl-AF332604-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
417 sHeaderRenamed3 += "(At4g29080)_mRNA-_complete_cds." | |
418 expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1, sHeaderRenamed3 :2} | |
419 | |
420 iBioseq1 = Bioseq( sHeader1, "ATTG" ) | |
421 iBioseq2 = Bioseq( sHeader2, "ATTG" ) | |
422 iBioseq3 = Bioseq( sHeader3, "ATTG" ) | |
423 | |
424 obsBioseqDB = BioseqDB() | |
425 obsBioseqDB.setData([ iBioseq1, iBioseq2 ]) | |
426 obsBioseqDB.add(iBioseq3) | |
427 | |
428 obsDictIdx = obsBioseqDB.idx | |
429 obsDictIdxRenamed = obsBioseqDB.idx_renamed | |
430 | |
431 self.assertEquals(expDictIdx,obsDictIdx) | |
432 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed) | |
433 | |
434 | |
435 def test__getitem__(self): | |
436 iBioseq1 = Bioseq("seq1","ATTG") | |
437 iBioseq2 = Bioseq("seq2","CGAAT") | |
438 iBioseqDB = BioseqDB() | |
439 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
440 expBioseq = Bioseq("seq2","CGAAT") | |
441 obsBioseq = iBioseqDB[1] | |
442 | |
443 self.assertEquals(expBioseq, obsBioseq) | |
444 | |
445 | |
446 def test_getSize(self): | |
447 expSize = 4 | |
448 | |
449 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
450 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
451 iBioseq3 = Bioseq( "seq3", "AT-----TG" ) | |
452 iBioseq4 = Bioseq( "seq4", "CGAA----T" ) | |
453 | |
454 obsBioseqDB = BioseqDB() | |
455 obsBioseqDB.setData( [iBioseq1, iBioseq2 , iBioseq3, iBioseq4 ] ) | |
456 obsSize = obsBioseqDB.getSize() | |
457 | |
458 self.assertEquals(expSize,obsSize) | |
459 | |
460 | |
461 def test_getSize_emptyDB(self): | |
462 expSize = 0 | |
463 | |
464 obsBioseqDB = BioseqDB() | |
465 obsSize = obsBioseqDB.getSize() | |
466 | |
467 self.assertEquals(expSize,obsSize) | |
468 | |
469 | |
470 def test_getLength(self): | |
471 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
472 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
473 iBioseqDB = BioseqDB() | |
474 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
475 | |
476 expLength = 163 | |
477 obsLength = iBioseqDB.getLength() | |
478 | |
479 self.assertEquals( expLength, obsLength) | |
480 | |
481 def test_getListOfSequencesLength(self): | |
482 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
483 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
484 iBioseqDB = BioseqDB() | |
485 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
486 lLength = iBioseqDB.getListOfSequencesLength() | |
487 | |
488 expLLengh = [75, 88] | |
489 self.assertEquals( expLLengh, lLength ) | |
490 | |
491 | |
492 def test_getHeaderList( self ): | |
493 lExpHeader = ["seq1", "seq2"] | |
494 | |
495 iBioseq1 = Bioseq( "seq1", "ATTG" ) | |
496 iBioseq2 = Bioseq( "seq2", "CGAAT" ) | |
497 | |
498 obsBioseqDB = BioseqDB() | |
499 obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
500 | |
501 lObsHeader = obsBioseqDB.getHeaderList() | |
502 | |
503 self.assertEquals( lExpHeader, lObsHeader ) | |
504 | |
505 | |
506 def test_getSequencesList( self ): | |
507 lExpSeqs = ["ATGC", "AATTCCGG"] | |
508 | |
509 iBioseq1 = Bioseq("seq1", "ATGC") | |
510 iBioseq2 = Bioseq("seq2", "AATTCCGG") | |
511 | |
512 obsBioseqDB = BioseqDB() | |
513 obsBioseqDB.setData([iBioseq1, iBioseq2]) | |
514 | |
515 lObsSeqs = obsBioseqDB.getSequencesList() | |
516 | |
517 self.assertEquals(lExpSeqs, lObsSeqs) | |
518 | |
519 | |
520 def test_fetch( self ): | |
521 ibioseq1 = Bioseq( "seq1", "ATTG" ) | |
522 ibioseq2 = Bioseq( "seq2", "CGAAT" ) | |
523 iBioseqDB = BioseqDB() | |
524 iBioseqDB.setData( [ ibioseq1, ibioseq2 ] ) | |
525 expBioseq = ibioseq1 | |
526 obsBioseq = iBioseqDB.fetch( "seq1" ) | |
527 self.assertEquals( expBioseq, obsBioseq ) | |
528 | |
529 | |
530 def test_getBioseqByRenamedHeader( self ): | |
531 Header1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
532 Header1 += "(At4g29080) mRNA, complete cds." | |
533 | |
534 Header2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
535 Header2 += "(At4g29080) mRNA, complete cds." | |
536 | |
537 Header3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)" | |
538 Header3 += "(At4g29080) mRNA, complete cds." | |
539 | |
540 HeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" | |
541 HeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds." | |
542 | |
543 ibioseq1 = Bioseq( Header1, "ATTG" ) | |
544 ibioseq2 = Bioseq( Header2, "CGAAT" ) | |
545 ibioseq3 = Bioseq( Header3, "TGCGAAT" ) | |
546 iBioseqDB = BioseqDB() | |
547 iBioseqDB.setData( [ ibioseq1, ibioseq2, ibioseq3 ] ) | |
548 expBioseq = ibioseq2 | |
549 | |
550 obsBioseq = iBioseqDB.getBioseqByRenamedHeader( HeaderRenamed2 ) | |
551 | |
552 self.assertEquals( expBioseq, obsBioseq ) | |
553 | |
554 | |
555 def test_init_with_the_parm_name( self ): | |
556 iBioseq1 = Bioseq("seq1","ATTG") | |
557 iBioseq2 = Bioseq("seq2","CGAAT") | |
558 expBioseqDB = BioseqDB() | |
559 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
560 fastaFilename = "dummyBioseqDB.fa" | |
561 f = open(fastaFilename, "w") | |
562 f.write(">seq1\n") | |
563 f.write("ATTG\n") | |
564 f.write(">seq2\n") | |
565 f.write("CGAAT\n") | |
566 f.close() | |
567 | |
568 obsBioseqDB = BioseqDB(fastaFilename) | |
569 os.remove(fastaFilename) | |
570 self.assertEquals( expBioseqDB, obsBioseqDB ) | |
571 | |
572 | |
573 def test_countNt(self): | |
574 iBioseq1 = Bioseq() | |
575 iBioseq1.header = "seq1 description1" | |
576 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
577 iBioseq2 = Bioseq() | |
578 iBioseq2.header = "seq2 description2" | |
579 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
580 iBioseqDB = BioseqDB() | |
581 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
582 expCount = 6 | |
583 obsCount = iBioseqDB.countNt('N') | |
584 self.assertEquals(expCount, obsCount) | |
585 | |
586 def test_countNt_lowercase(self): | |
587 iBioseq1 = Bioseq() | |
588 iBioseq1.header = "seq1 description1" | |
589 iBioseq1.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg" | |
590 iBioseq2 = Bioseq() | |
591 iBioseq2.header = "seq2 description2" | |
592 iBioseq2.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg" | |
593 iBioseqDB = BioseqDB() | |
594 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
595 expCount = 0 | |
596 obsCount = iBioseqDB.countNt('N') | |
597 self.assertEquals(expCount, obsCount) | |
598 | |
599 | |
600 def test_countNt_withCharacterNotExisting(self): | |
601 iBioseq1 = Bioseq() | |
602 iBioseq1.header = "seq1 description1" | |
603 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
604 iBioseq2 = Bioseq() | |
605 iBioseq2.header = "seq2 description2" | |
606 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
607 iBioseqDB = BioseqDB() | |
608 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
609 expCount = 0 | |
610 obsCount = iBioseqDB.countNt('W') | |
611 self.assertEquals(expCount, obsCount) | |
612 | |
613 | |
614 def test_countAllNt(self): | |
615 iBioseq1 = Bioseq() | |
616 iBioseq1.header = "seq1 description1" | |
617 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
618 iBioseq2 = Bioseq() | |
619 iBioseq2.header = "seq2 description2" | |
620 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
621 iBioseqDB = BioseqDB() | |
622 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
623 | |
624 dExpCount = {'A': 68, 'C': 62, 'T': 86, 'G': 70, 'N': 6} | |
625 | |
626 dObsCount = iBioseqDB.countAllNt() | |
627 self.assertEquals(dExpCount, dObsCount) | |
628 | |
629 | |
630 def test_extractPart(self): | |
631 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
632 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
633 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
634 iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
635 | |
636 iBioseqDB = BioseqDB() | |
637 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] ) | |
638 | |
639 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
640 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
641 | |
642 expSubBioseqDB = BioseqDB() | |
643 expSubBioseqDB.setData( [ iBioseq5, iBioseq6 ] ) | |
644 | |
645 obsSubBioseqDB = iBioseqDB.extractPart (1, 2) | |
646 | |
647 self.assertEquals(expSubBioseqDB, obsSubBioseqDB) | |
648 | |
649 | |
650 def test_bestLength(self): | |
651 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") | |
652 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
653 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
654 iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
655 iBioseq5 = Bioseq("consensus5","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
656 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
657 iBioseq7 = Bioseq("consensus7","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
658 | |
659 iBioseqDB = BioseqDB() | |
660 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5, iBioseq6 , iBioseq7] ) | |
661 | |
662 iBioseq8 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") | |
663 iBioseq9 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
664 iBioseq10 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
665 iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
666 expBioseqDB = BioseqDB() | |
667 expBioseqDB.setData( [ iBioseq8, iBioseq9, iBioseq10, iBioseq11] ) | |
668 | |
669 obsBioseqDB = iBioseqDB.bestLength (4) | |
670 | |
671 self.assertEquals(expBioseqDB, obsBioseqDB) | |
672 | |
673 | |
674 def test_bestLength_with_a_none_sequence_include(self): | |
675 iBioseq1 = Bioseq("consensus1", None) | |
676 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
677 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
678 | |
679 iBioseqDB = BioseqDB() | |
680 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] ) | |
681 | |
682 iBioseq4 = Bioseq("consensus1", None) | |
683 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
684 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
685 expBioseqDB = BioseqDB() | |
686 expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] ) | |
687 | |
688 obsBioseqDB = iBioseqDB.bestLength (3) | |
689 | |
690 self.assertEquals(expBioseqDB, obsBioseqDB) | |
691 | |
692 | |
693 def test_bestLength_with_a_none_sequence_not_include(self): | |
694 iBioseq1 = Bioseq("consensus1", None) | |
695 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
696 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
697 | |
698 iBioseqDB = BioseqDB() | |
699 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] ) | |
700 | |
701 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
702 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
703 expBioseqDB = BioseqDB() | |
704 expBioseqDB.setData( [ iBioseq5, iBioseq6] ) | |
705 | |
706 obsBioseqDB = iBioseqDB.bestLength (2) | |
707 | |
708 self.assertEquals(expBioseqDB, obsBioseqDB) | |
709 | |
710 | |
711 def test_bestLength_number_of_bioseq_requiered_gt_BioseqDB_size(self): | |
712 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") | |
713 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
714 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
715 | |
716 iBioseqDB = BioseqDB() | |
717 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] ) | |
718 | |
719 iBioseq4 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") | |
720 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
721 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") | |
722 expBioseqDB = BioseqDB() | |
723 expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] ) | |
724 | |
725 obsBioseqDB = iBioseqDB.bestLength (15) | |
726 | |
727 self.assertEquals(expBioseqDB, obsBioseqDB) | |
728 | |
729 | |
730 def test_extractPatternOfFile(self): | |
731 fastaFilename = "dummyBioseqDB.fa" | |
732 f = open(fastaFilename, "w") | |
733 f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n") | |
734 f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") | |
735 f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n") | |
736 f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") | |
737 f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n") | |
738 f.close() | |
739 | |
740 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") | |
741 iBioseq2 = Bioseq("consensus11","TGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT") | |
742 expBioseqDB = BioseqDB() | |
743 expBioseqDB.setData( [ iBioseq1, iBioseq2] ) | |
744 | |
745 obsBioseqDB = BioseqDB() | |
746 obsBioseqDB.extractPatternOfFile("consensus1+" , fastaFilename) | |
747 os.remove(fastaFilename) | |
748 self.assertEquals(expBioseqDB, obsBioseqDB) | |
749 | |
750 | |
751 def test_extractPatternOfFile_WithNoExistingPattern(self): | |
752 fastaFilename = "dummyBioseqDB.fa" | |
753 f = open(fastaFilename, "w") | |
754 f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n") | |
755 f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") | |
756 f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n") | |
757 f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") | |
758 f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n") | |
759 f.close() | |
760 | |
761 expBioseqDB = BioseqDB() | |
762 | |
763 obsBioseqDB = BioseqDB() | |
764 obsBioseqDB.extractPatternOfFile("NoExistingPattern" , fastaFilename) | |
765 os.remove(fastaFilename) | |
766 self.assertEquals(expBioseqDB, obsBioseqDB) | |
767 | |
768 | |
769 def test_getByPattern (self): | |
770 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
771 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
772 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
773 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
774 iBioseqDB = BioseqDB() | |
775 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
776 | |
777 iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
778 iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
779 expBioseqDB = BioseqDB() | |
780 expBioseqDB.setData( [ iBioseq5, iBioseq6] ) | |
781 | |
782 obsBioseqDB = iBioseqDB.getByPattern("consensus1+") | |
783 self.assertEquals(expBioseqDB, obsBioseqDB) | |
784 | |
785 | |
786 def test_getByPattern_with_no_existing_pattern (self): | |
787 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
788 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
789 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
790 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
791 iBioseqDB = BioseqDB() | |
792 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
793 | |
794 expBioseqDB = BioseqDB() | |
795 | |
796 obsBioseqDB = iBioseqDB.getByPattern("noExistingPattern+") | |
797 self.assertEquals(expBioseqDB, obsBioseqDB) | |
798 | |
799 | |
800 def test_getDiffFromPattern (self): | |
801 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
802 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
803 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
804 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
805 iBioseqDB = BioseqDB() | |
806 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
807 | |
808 iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
809 iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
810 expBioseqDB = BioseqDB() | |
811 expBioseqDB.setData( [ iBioseq5, iBioseq6] ) | |
812 | |
813 obsBioseqDB = iBioseqDB.getDiffFromPattern("consensus[4|6]") | |
814 | |
815 self.assertEquals(expBioseqDB, obsBioseqDB) | |
816 | |
817 | |
818 def test_getDiffFromPattern_with_no_existing_pattern (self): | |
819 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
820 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
821 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
822 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
823 iBioseqDB = BioseqDB() | |
824 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
825 | |
826 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
827 iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
828 iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
829 iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
830 expBioseqDB = BioseqDB() | |
831 expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8] ) | |
832 | |
833 obsBioseqDB = iBioseqDB.getDiffFromPattern("noExistingPattern+") | |
834 self.assertEquals(expBioseqDB, obsBioseqDB) | |
835 | |
836 | |
837 def test_rmByPattern (self): | |
838 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
839 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
840 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
841 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
842 obsBioseqDB = BioseqDB() | |
843 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
844 | |
845 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
846 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
847 expBioseqDB = BioseqDB() | |
848 expBioseqDB.setData( [ iBioseq5, iBioseq6 ] ) | |
849 | |
850 obsBioseqDB.rmByPattern("consensus1+") | |
851 self.assertEquals(expBioseqDB, obsBioseqDB) | |
852 | |
853 | |
854 def test_rmByPattern_with_no_existing_pattern (self): | |
855 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
856 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
857 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
858 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
859 obsBioseqDB = BioseqDB() | |
860 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
861 | |
862 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
863 iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
864 iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
865 iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
866 expBioseqDB = BioseqDB() | |
867 expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8 ] ) | |
868 obsBioseqDB.rmByPattern("noExistingPattern+") | |
869 self.assertEquals(expBioseqDB, obsBioseqDB) | |
870 | |
871 | |
872 def test_addBioseqFromABioseqDBIfHeaderContainPattern (self): | |
873 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
874 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
875 iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
876 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
877 obsBioseqDB = BioseqDB() | |
878 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
879 | |
880 iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
881 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
882 inBioseqDB = BioseqDB() | |
883 inBioseqDB.setData( [ iBioseq5, iBioseq6 ]) | |
884 | |
885 iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
886 iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
887 iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
888 iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
889 iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
890 | |
891 expBioseqDB = BioseqDB() | |
892 expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10, iBioseq11] ) | |
893 | |
894 obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB) | |
895 self.assertEquals(expBioseqDB, obsBioseqDB) | |
896 | |
897 | |
898 def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self): | |
899 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
900 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
901 iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
902 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
903 obsBioseqDB = BioseqDB() | |
904 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) | |
905 | |
906 iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
907 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
908 inBioseqDB = BioseqDB() | |
909 inBioseqDB.setData( [ iBioseq5, iBioseq6 ]) | |
910 | |
911 iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") | |
912 iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") | |
913 iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") | |
914 iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") | |
915 | |
916 expBioseqDB = BioseqDB() | |
917 expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] ) | |
918 | |
919 obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB) | |
920 self.assertEquals(expBioseqDB, obsBioseqDB) | |
921 | |
922 | |
923 def test_upCase (self): | |
924 iBioseq1 = Bioseq("consensus4","atgacGatgca") | |
925 iBioseq2 = Bioseq("consensus1","atgcgaT") | |
926 obsBioseqDB = BioseqDB() | |
927 obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) | |
928 iBioseq3 = Bioseq("consensus4","ATGACGATGCA") | |
929 iBioseq4 = Bioseq("consensus1","ATGCGAT") | |
930 expBioseqDB = BioseqDB() | |
931 expBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) | |
932 obsBioseqDB.upCase() | |
933 self.assertEquals(expBioseqDB, obsBioseqDB) | |
934 | |
935 | |
936 def test_getMap(self): | |
937 iBioseq1 = Bioseq("header1","ATGC-RA-GCT") | |
938 iBioseq2 = Bioseq("header2","-TGC-RA-GCT") | |
939 iBioseq3 = Bioseq("header3","ATGC-RA-GC-") | |
940 | |
941 iAlignedBioseqDB = BioseqDB() | |
942 iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) | |
943 | |
944 obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps() | |
945 | |
946 expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )] | |
947 expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )] | |
948 expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )] | |
949 | |
950 expDict = { | |
951 "header1": expLMap1, | |
952 "header2": expLMap2, | |
953 "header3": expLMap3 | |
954 } | |
955 | |
956 self.assertEquals(expDict, obsDict) | |
957 | |
958 def test_getSeqLengthByListOfName(self): | |
959 iBioseq1 = Bioseq("header1","ATGC-RA-GCT") | |
960 iBioseq2 = Bioseq("header2","-TGC-RAR") | |
961 iBioseq3 = Bioseq("header3","ATGC") | |
962 | |
963 iBioseqDB = BioseqDB() | |
964 iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) | |
965 | |
966 expList = [11, 4] | |
967 obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"]) | |
968 | |
969 self.assertEquals( expList, obsList ) | |
970 | |
971 test_suite = unittest.TestSuite() | |
972 test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) ) | |
973 if __name__ == "__main__": | |
974 unittest.TextTestRunner(verbosity=2).run( test_suite ) |