Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/commons/core/seq/test/Test_Bioseq.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0f8dcca02ed |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 | |
32 import unittest | |
33 import os | |
34 import sys | |
35 from commons.core.seq.Bioseq import Bioseq | |
36 from commons.core.utils.FileUtils import FileUtils | |
37 from commons.core.coord.Map import Map | |
38 | |
39 | |
40 class Test_Bioseq( unittest.TestCase ): | |
41 | |
42 def setUp(self): | |
43 self._bs = Bioseq() | |
44 | |
45 | |
46 def test_isEmpty_True(self): | |
47 self._bs.setHeader( "" ) | |
48 self._bs.setSequence( "" ) | |
49 exp = True | |
50 obs = self._bs.isEmpty() | |
51 self.assertEquals( exp, obs ) | |
52 | |
53 | |
54 def test_isEmpty_False(self): | |
55 self._bs.setHeader( "seq1" ) | |
56 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
57 exp = False | |
58 obs = self._bs.isEmpty() | |
59 self.assertEquals( exp, obs ) | |
60 | |
61 | |
62 def test___eq__(self): | |
63 self._bs.setHeader( "seq1" ) | |
64 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
65 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
66 self.assertEquals( self._bs, obs ) | |
67 | |
68 | |
69 def test___ne__Header(self): | |
70 self._bs.setHeader( "seq2" ) | |
71 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
72 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
73 self.assertNotEquals( self._bs, obs ) | |
74 | |
75 | |
76 def test___ne__Sequence(self): | |
77 self._bs.setHeader( "seq1" ) | |
78 self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" ) | |
79 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) | |
80 self.assertNotEquals( self._bs, obs ) | |
81 | |
82 | |
83 def test_reverse(self): | |
84 self._bs.setHeader( "seq1" ) | |
85 self._bs.setSequence( "TGCGGA" ) | |
86 exp = "AGGCGT" | |
87 self._bs.reverse() | |
88 obs = self._bs.sequence | |
89 self.assertEqual( obs, exp ) | |
90 | |
91 | |
92 def test_complement(self): | |
93 self._bs.setHeader( "seq1" ) | |
94 self._bs.setSequence( "TGCGGA" ) | |
95 exp = "ACGCCT" | |
96 self._bs.complement() | |
97 obs = self._bs.sequence | |
98 self.assertEqual( obs, exp ) | |
99 | |
100 | |
101 def test_complement_with_unknown_symbol(self): | |
102 self._bs.setHeader( "seq1" ) | |
103 self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" ) | |
104 exp = "ACGCCTNKYWARSMBDHVN" | |
105 self._bs.complement() | |
106 obs = self._bs.sequence | |
107 self.assertEqual( obs, exp ) | |
108 | |
109 | |
110 def test_reverseComplement(self): | |
111 self._bs.setHeader( "seq1" ) | |
112 self._bs.setSequence( "TGCGGA" ) | |
113 exp = "TCCGCA" | |
114 self._bs.reverseComplement() | |
115 obs = self._bs.sequence | |
116 self.assertEqual( obs, exp ) | |
117 | |
118 | |
119 def test_cleanGap(self): | |
120 self._bs.setSequence("-ATTTTGC-AGTC--TTATTCGAG-----GCCATTGCT-") | |
121 exp = "ATTTTGCAGTCTTATTCGAGGCCATTGCT" | |
122 self._bs.cleanGap() | |
123 obs = self._bs.sequence | |
124 self.assertEquals( obs, exp ) | |
125 | |
126 | |
127 def test_copyBioseqInstance(self): | |
128 self._bs.setHeader( "seq" ) | |
129 self._bs.setSequence( "TGCGGA" ) | |
130 obsBioseq = self._bs.copyBioseqInstance() | |
131 self.assertEquals(self._bs, obsBioseq) | |
132 | |
133 | |
134 def test_setFrameInfoOnHeader_without_description(self): | |
135 self._bs.setHeader( "seq" ) | |
136 self._bs.setSequence( "TGCGGA" ) | |
137 phase = -1 | |
138 expHeader = "seq_-1" | |
139 self._bs.setFrameInfoOnHeader(phase) | |
140 self.assertEquals(expHeader, self._bs.header) | |
141 | |
142 | |
143 def test_setFrameInfoOnHeader_with_description(self): | |
144 self._bs.setHeader( "seq description" ) | |
145 self._bs.setSequence( "TGCGGA" ) | |
146 phase = -1 | |
147 expHeader = "seq_-1 description" | |
148 self._bs.setFrameInfoOnHeader(phase) | |
149 self.assertEquals(expHeader, self._bs.header) | |
150 | |
151 | |
152 def test_read(self): | |
153 faFile = open("dummyFaFile.fa", "w") | |
154 faFile.write(">seq1 description1\n") | |
155 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
156 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
157 faFile.close() | |
158 expBioseq = Bioseq() | |
159 expBioseq.header = "seq1 description1" | |
160 expBioseq.sequence = "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
161 obsBioseq = Bioseq() | |
162 faFile = open("dummyFaFile.fa", "r") | |
163 obsBioseq.read( faFile ) | |
164 faFile.close() | |
165 os.remove("dummyFaFile.fa") | |
166 self.assertEquals(expBioseq, obsBioseq) | |
167 | |
168 | |
169 def test_read_WithEmptyFile(self): | |
170 faFile = open("dummyFaFile.fa", "w") | |
171 faFile.close() | |
172 expBioseq = Bioseq() | |
173 expBioseq.header = None | |
174 expBioseq.sequence = None | |
175 obsBioseq = Bioseq() | |
176 faFile = open("dummyFaFile.fa", "r") | |
177 obsBioseq.read( faFile ) | |
178 faFile.close() | |
179 os.remove("dummyFaFile.fa") | |
180 self.assertEquals(expBioseq, obsBioseq) | |
181 | |
182 | |
183 def test_read_without_header(self): | |
184 faFile = open("dummyFaFile.fa", "w") | |
185 faFile.write("seq1 description1\n") | |
186 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
187 faFile.close() | |
188 expBioseq = Bioseq() | |
189 expBioseq.header = "" | |
190 expBioseq.sequence = "" | |
191 obsBioseq = Bioseq() | |
192 faFile = open("dummyFaFile.fa", "r") | |
193 obsBioseq.read( faFile ) | |
194 faFile.close() | |
195 os.remove("dummyFaFile.fa") | |
196 self.assertEquals(expBioseq, obsBioseq) | |
197 | |
198 | |
199 def test_read_with_two_consecutive_headers(self): | |
200 faFile = open("dummyFaFile.fa", "w") | |
201 faFile.write(">seq1 description1\n") | |
202 faFile.write(">ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
203 faFile.close() | |
204 expBioseq = Bioseq() | |
205 expBioseq.header = "seq1 description1" | |
206 expBioseq.sequence = "" | |
207 obsBioseq = Bioseq() | |
208 faFile = open("dummyFaFile.fa", "r") | |
209 obsBioseq.read( faFile ) | |
210 faFile.close() | |
211 os.remove("dummyFaFile.fa") | |
212 self.assertEquals(expBioseq, obsBioseq) | |
213 | |
214 | |
215 def test_read_withEmptyLines(self): | |
216 faFile = open("dummyFaFile.fa", "w") | |
217 faFile.write("\n") | |
218 faFile.write(">seq1 description1\n") | |
219 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
220 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
221 faFile.write("\n") | |
222 faFile.close() | |
223 | |
224 exp = Bioseq( "seq1 description1", "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" ) | |
225 | |
226 obs = Bioseq() | |
227 faFile = open("dummyFaFile.fa", "r") | |
228 obs.read( faFile ) | |
229 faFile.close() | |
230 | |
231 os.remove("dummyFaFile.fa") | |
232 | |
233 self.assertEquals( exp, obs ) | |
234 | |
235 def test_read_with_70nt_by_line(self): | |
236 faFile = open("dummyFaFile.fa", "w") | |
237 faFile.write("\n") | |
238 faFile.write(">seq1 description1\n") | |
239 faFile.write("TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAA\n") | |
240 faFile.write("TGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT\n") | |
241 faFile.write("\n") | |
242 faFile.close() | |
243 | |
244 exp = Bioseq( "seq1 description1", "TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAATGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT" ) | |
245 | |
246 obs = Bioseq() | |
247 faFile = open("dummyFaFile.fa", "r") | |
248 obs.read( faFile ) | |
249 faFile.close() | |
250 | |
251 os.remove("dummyFaFile.fa") | |
252 | |
253 self.assertEquals( exp, obs ) | |
254 | |
255 def test_appendBioseqInFile(self): | |
256 obsFaFileName = "dummyFaFile.fa" | |
257 obsFaFile = open(obsFaFileName, "w") | |
258 obsFaFile.write(">seq1 description1\n") | |
259 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
260 obsFaFile.close() | |
261 | |
262 bioseq = Bioseq() | |
263 bioseq.header = "seq2 description2" | |
264 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
265 | |
266 expFaFileName = "dummyFaFile2.fa" | |
267 expFaFile = open(expFaFileName, "w") | |
268 expFaFile.write(">seq1 description1\n") | |
269 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
270 expFaFile.write(">seq2 description2\n") | |
271 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
272 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
273 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n") | |
274 expFaFile.close() | |
275 | |
276 bioseq.appendBioseqInFile(obsFaFileName) | |
277 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName)) | |
278 os.remove(obsFaFileName) | |
279 os.remove(expFaFileName) | |
280 | |
281 | |
282 def test_writeABioseqInAFastaFile(self): | |
283 obsFaFileName = "dummyFaFile.fa" | |
284 obsFaFile = open(obsFaFileName, "w") | |
285 obsFaFile.write(">seq1 description1\n") | |
286 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
287 | |
288 bioseq = Bioseq() | |
289 bioseq.header = "seq2 description2" | |
290 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
291 | |
292 expFaFileName = "dummyFaFile2.fa" | |
293 expFaFile = open(expFaFileName, "w") | |
294 expFaFile.write(">seq1 description1\n") | |
295 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
296 expFaFile.write(">seq2 description2\n") | |
297 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
298 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
299 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n") | |
300 expFaFile.close() | |
301 | |
302 bioseq.writeABioseqInAFastaFile(obsFaFile) | |
303 obsFaFile.close() | |
304 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName)) | |
305 os.remove(obsFaFileName) | |
306 | |
307 | |
308 def test_writeABioseqInAFastaFileWithOtherHeader(self): | |
309 obsFaFileName = "dummyFaFile.fa" | |
310 obsFaFile = open(obsFaFileName, "w") | |
311 obsFaFile.write(">seq1 description1\n") | |
312 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
313 | |
314 bioseq = Bioseq() | |
315 bioseq.header = "seq2 description2" | |
316 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
317 | |
318 expFaFileName = "dummyFaFile2.fa" | |
319 newHeader = "seq2 New header2" | |
320 expFaFile = open(expFaFileName, "w") | |
321 expFaFile.write(">seq1 description1\n") | |
322 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") | |
323 expFaFile.write(">" + newHeader + "\n") | |
324 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
325 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
326 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n") | |
327 expFaFile.close() | |
328 | |
329 bioseq.writeABioseqInAFastaFileWithOtherHeader(obsFaFile, newHeader) | |
330 obsFaFile.close() | |
331 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName)) | |
332 os.remove(obsFaFileName) | |
333 os.remove(expFaFileName) | |
334 | |
335 | |
336 def test_writeSeqInFasta(self): | |
337 iBs = Bioseq() | |
338 iBs.header = "dummySeq" | |
339 iBs.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
340 | |
341 expFaFile = "dummyExpFile.fa" | |
342 expFaFileHandler = open(expFaFile, "w") | |
343 expFaFileHandler.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
344 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
345 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCG\n") | |
346 expFaFileHandler.close() | |
347 | |
348 obsFaFile = "dummyObsFile.fa" | |
349 obsFaFileHandler = open( obsFaFile, "w" ) | |
350 | |
351 iBs.writeSeqInFasta( obsFaFileHandler ) | |
352 | |
353 obsFaFileHandler.close() | |
354 | |
355 self.assertTrue( FileUtils.are2FilesIdentical( expFaFile, obsFaFile ) ) | |
356 os.remove(obsFaFile) | |
357 os.remove(expFaFile) | |
358 | |
359 | |
360 def test_subseq(self): | |
361 bioseq = Bioseq() | |
362 bioseq.header = "seq1 description1" | |
363 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
364 start = 10 | |
365 end = 30 | |
366 expSubBioseq = Bioseq() | |
367 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end) | |
368 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end] | |
369 obsBioseq = bioseq.subseq(start, end) | |
370 self.assertEquals(expSubBioseq, obsBioseq) | |
371 | |
372 | |
373 def test_subseq_no_end(self): | |
374 bioseq = Bioseq() | |
375 bioseq.header = "seq1 description1" | |
376 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
377 start = 10 | |
378 expSubBioseq = Bioseq() | |
379 expEnd = len(bioseq.sequence) | |
380 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(expEnd) | |
381 expSubBioseq.sequence = bioseq.sequence[(start - 1) : expEnd] | |
382 obsBioseq = bioseq.subseq(start) | |
383 self.assertEquals(expSubBioseq, obsBioseq) | |
384 | |
385 | |
386 def test_subseq_start_gt_end(self): | |
387 bioseq = Bioseq() | |
388 bioseq.header = "seq1 description1" | |
389 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
390 start = 30 | |
391 end = 10 | |
392 expSubBioseq = None | |
393 obsBioseq = bioseq.subseq(start, end) | |
394 self.assertEquals(expSubBioseq, obsBioseq) | |
395 | |
396 | |
397 def test_subseq_start_eq_end(self): | |
398 bioseq = Bioseq() | |
399 bioseq.header = "seq1 description1" | |
400 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
401 start = 10 | |
402 end = 10 | |
403 expSubBioseq = Bioseq() | |
404 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end) | |
405 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end] | |
406 obsBioseq = bioseq.subseq(start, end) | |
407 self.assertEquals(expSubBioseq, obsBioseq) | |
408 | |
409 | |
410 def test_subseq_negative_start(self): | |
411 bioseq = Bioseq() | |
412 bioseq.header = "seq1 description1" | |
413 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
414 start = -10 | |
415 end = 10 | |
416 expSubBioseq = None | |
417 obsBioseq = bioseq.subseq(start, end) | |
418 self.assertEquals(expSubBioseq, obsBioseq) | |
419 | |
420 | |
421 def test_getNtFromPosition_1(self): | |
422 bioseq = Bioseq() | |
423 bioseq.header = "seq1 description1" | |
424 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
425 expNt = "G" | |
426 obsNt = bioseq.getNtFromPosition(1) | |
427 self.assertEquals(expNt, obsNt) | |
428 | |
429 | |
430 def test_getNtFromPosition_10(self): | |
431 bioseq = Bioseq() | |
432 bioseq.header = "seq1 description1" | |
433 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
434 expNt = "C" | |
435 obsNt = bioseq.getNtFromPosition(10) | |
436 self.assertEquals(expNt, obsNt) | |
437 | |
438 | |
439 def test_getNtFromPosition_last(self): | |
440 bioseq = Bioseq() | |
441 bioseq.header = "seq1 description1" | |
442 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
443 expNt = "G" | |
444 obsNt = bioseq.getNtFromPosition(146) | |
445 self.assertEquals(expNt, obsNt) | |
446 | |
447 | |
448 def test_getNtFromPosition_position_outside_range_0(self): | |
449 bioseq = Bioseq() | |
450 bioseq.header = "seq1 description1" | |
451 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
452 expNt = None | |
453 obsNt = bioseq.getNtFromPosition(0) | |
454 self.assertEquals(expNt, obsNt) | |
455 | |
456 | |
457 def test_getNtFromPosition_position_outside_range_negative(self): | |
458 bioseq = Bioseq() | |
459 bioseq.header = "seq1 description1" | |
460 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
461 expNt = None | |
462 obsNt = bioseq.getNtFromPosition(-10) | |
463 self.assertEquals(expNt, obsNt) | |
464 | |
465 | |
466 def test_getNtFromPosition_position_outside_range_positive(self): | |
467 bioseq = Bioseq() | |
468 bioseq.header = "seq1 description1" | |
469 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
470 expNt = None | |
471 obsNt = bioseq.getNtFromPosition(147) | |
472 self.assertEquals(expNt, obsNt) | |
473 | |
474 | |
475 def test_view(self): | |
476 obsFileName = "obsdummy_Bioseq_view" | |
477 expFileName = "expDummy_Bioseq_View" | |
478 | |
479 bioseq = Bioseq() | |
480 bioseq.header = "seq1 description1" | |
481 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
482 | |
483 obsFile = open(obsFileName,"w") | |
484 expFile = open(expFileName, "w") | |
485 | |
486 expFile.write ( ">seq1 description1\n") | |
487 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
488 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
489 expFile.write ( "ATAGCAGACGCATATTATATTGCGCG\n") | |
490 | |
491 stdoutRef = sys.stdout | |
492 sys.stdout = obsFile | |
493 bioseq.view() | |
494 obsFile.close() | |
495 expFile.close() | |
496 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) | |
497 sys.stdout = stdoutRef | |
498 os.remove ( obsFileName ) | |
499 os.remove ( expFileName ) | |
500 | |
501 | |
502 def test_view_with_l(self): | |
503 obsFileName = "obsdummy_Bioseq_view" | |
504 expFileName = "expDummy_Bioseq_View" | |
505 bioseq = Bioseq() | |
506 bioseq.header = "seq1 description1" | |
507 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
508 obsFile = open(obsFileName,"w") | |
509 expFile = open(expFileName, "w") | |
510 expFile.write ( ">seq1 description1\n") | |
511 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
512 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") | |
513 stdoutRef = sys.stdout | |
514 sys.stdout = obsFile | |
515 bioseq.view(120) | |
516 obsFile.close() | |
517 expFile.close() | |
518 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) | |
519 sys.stdout = stdoutRef | |
520 os.remove ( obsFileName ) | |
521 os.remove ( expFileName ) | |
522 | |
523 | |
524 def test_getLength(self): | |
525 bioseq = Bioseq() | |
526 bioseq.header = "seq1 description1" | |
527 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
528 expLength = 146 | |
529 obsLength = bioseq.getLength() | |
530 self.assertEquals(expLength, obsLength) | |
531 | |
532 | |
533 def test_getLength_empty_seq(self): | |
534 bioseq = Bioseq() | |
535 expLength = 0 | |
536 obsLength = bioseq.getLength() | |
537 self.assertEquals(expLength, obsLength) | |
538 | |
539 | |
540 def test_getLength_WithoutN(self): | |
541 bioseq = Bioseq() | |
542 bioseq.header = "seq1 description1" | |
543 bioseq.sequence = "GCGANCGCTGCTTTATTAAGCGCTAGATGNNNNNNNNNNNNNNNCGACGCTGCATTTATTAAGCGCTAGCGATTATANNNNNNNNNTAGCAGACGCATATTATATTGCGCGATGCGACGCTGCTTTATTANAGCGCTAGCGNNATTATATAGCANGACGCATATTATATTGCGCG" | |
544 expLength = 146 | |
545 obsLength = bioseq.getLength(False) | |
546 self.assertEquals(expLength, obsLength) | |
547 | |
548 | |
549 def test_getLength_WithoutN_empty_seq(self): | |
550 bioseq = Bioseq() | |
551 expLength = 0 | |
552 obsLength = bioseq.getLength(False) | |
553 self.assertEquals(expLength, obsLength) | |
554 | |
555 | |
556 def test_countNt(self): | |
557 bioseq = Bioseq() | |
558 bioseq.header = "seq1 description1" | |
559 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
560 expCount = 3 | |
561 obsCount = bioseq.countNt('N') | |
562 self.assertEquals(expCount, obsCount) | |
563 | |
564 | |
565 def test_countNt_withCharacterNotExisting(self): | |
566 bioseq = Bioseq() | |
567 bioseq.header = "seq1 description1" | |
568 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
569 expCount = 0 | |
570 obsCount = bioseq.countNt('W') | |
571 self.assertEquals(expCount, obsCount) | |
572 | |
573 | |
574 def test_countAllNt(self): | |
575 bioseq = Bioseq() | |
576 bioseq.header = "seq1 description1" | |
577 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
578 dExpCount = {'A': 34, 'C': 31, 'T': 43, 'G': 35, 'N': 3} | |
579 dObsCount = bioseq.countAllNt() | |
580 self.assertEquals(dExpCount, dObsCount) | |
581 | |
582 | |
583 def test_occ_word_size_1(self): | |
584 bioseq = Bioseq() | |
585 bioseq.header = "seq1 description1" | |
586 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
587 dExpOccWord = {'A': 34, 'C': 31, 'T': 43, 'G': 35} | |
588 ExpNbWord = 143 | |
589 dObsOccWord, ObsNbWord = bioseq.occ_word(1) | |
590 self.assertEquals(dExpOccWord, dObsOccWord) | |
591 self.assertEquals(ExpNbWord, ObsNbWord) | |
592 | |
593 | |
594 def test_occ_word_size_0(self): | |
595 bioseq = Bioseq() | |
596 bioseq.header = "seq1 description1" | |
597 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
598 dExpOccWord = {} | |
599 ExpNbWord = 0 | |
600 dObsOccWord, ObsNbWord = bioseq.occ_word(0) | |
601 self.assertEquals(dExpOccWord, dObsOccWord) | |
602 self.assertEquals(ExpNbWord, ObsNbWord) | |
603 | |
604 | |
605 def test_occ_word_size_n(self): | |
606 bioseq = Bioseq() | |
607 bioseq.header = "seq1 description1" | |
608 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
609 dExpOccWord = {'ACC': 0, 'ATG': 2, 'AAG': 3, 'AAA': 0, 'ATC': 0, 'AAC': 0, 'ATA': 8, 'AGG': 0, 'CCT': 0, 'CTC': 0, 'AGC': 8, 'ACA': 0, 'AGA': 2, 'CAT': 3, 'AAT': 0, 'ATT': 9, 'CTG': 3, 'CTA': 3, 'ACT': 0, 'CAC': 0, 'ACG': 2, 'CAA': 0, 'AGT': 0, 'CAG': 2, 'CCG': 0, 'CCC': 0, 'CTT': 3, 'TAT': 13, 'GGT': 0, 'TGT': 0, 'CGA': 3, 'CCA': 0, 'TCT': 0, 'GAT': 3, 'CGG': 0, 'TTT': 3, 'TGC': 7, 'GGG': 0, 'TAG': 5, 'GGA': 0, 'TAA': 3, 'GGC': 0, 'TAC': 0, 'TTC': 0, 'TCG': 0, 'TTA': 10, 'TTG': 2, 'TCC': 0, 'GAA': 0, 'TGG': 0, 'GCA': 5, 'GTA': 0, 'GCC': 0, 'GTC': 0, 'GCG': 12, 'GTG': 0, 'GAG': 0, 'GTT': 0, 'GCT': 9, 'TGA': 0, 'GAC': 2, 'CGT': 0, 'TCA': 0, 'CGC': 10} | |
610 ExpNbWord = 135 | |
611 dObsOccWord, ObsNbWord = bioseq.occ_word(3) | |
612 self.assertEquals(dExpOccWord, dObsOccWord) | |
613 self.assertEquals(ExpNbWord, ObsNbWord) | |
614 | |
615 | |
616 def test_freq_word_size_1(self): | |
617 bioseq = Bioseq() | |
618 bioseq.header = "seq1 description1" | |
619 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
620 dExpFreqWord = {'A': 0.23776223776223776, 'C': 0.21678321678321677, 'T': 0.30069930069930068, 'G': 0.24475524475524477} | |
621 dObsFreqWord = bioseq.freq_word(1) | |
622 self.assertEquals(dExpFreqWord, dObsFreqWord) | |
623 | |
624 | |
625 def test_freq_word_size_0(self): | |
626 bioseq = Bioseq() | |
627 bioseq.header = "seq1 description1" | |
628 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
629 dExpFreqWord = {} | |
630 dObsFreqWord = bioseq.freq_word(0) | |
631 self.assertEquals(dExpFreqWord, dObsFreqWord) | |
632 | |
633 | |
634 def test_freq_word_size_n(self): | |
635 bioseq = Bioseq() | |
636 bioseq.header = "seq1 description1" | |
637 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" | |
638 dExpFreqWord = {'ACC': 0.0, 'ATG': 0.014814814814814815, 'AAG': 0.022222222222222223, 'AAA': 0.0, 'ATC': 0.0, 'AAC': 0.0, 'ATA': 0.059259259259259262, 'AGG': 0.0, 'CCT': 0.0, 'CTC': 0.0, 'AGC': 0.059259259259259262, 'ACA': 0.0, 'AGA': 0.014814814814814815, 'CAT': 0.022222222222222223, 'AAT': 0.0, 'ATT': 0.066666666666666666, 'CTG': 0.022222222222222223, 'CTA': 0.022222222222222223, 'ACT': 0.0, 'CAC': 0.0, 'ACG': 0.014814814814814815, 'CAA': 0.0, 'AGT': 0.0, 'CAG': 0.014814814814814815, 'CCG': 0.0, 'CCC': 0.0, 'TAT': 0.096296296296296297, 'GGT': 0.0, 'TGT': 0.0, 'CGA': 0.022222222222222223, 'CCA': 0.0, 'TCT': 0.0, 'GAT': 0.022222222222222223, 'CGG': 0.0, 'CTT': 0.022222222222222223, 'TGC': 0.05185185185185185, 'GGG': 0.0, 'TAG': 0.037037037037037035, 'GGA': 0.0, 'TAA': 0.022222222222222223, 'GGC': 0.0, 'TAC': 0.0, 'TTC': 0.0, 'TCG': 0.0, 'TTT': 0.022222222222222223, 'TTG': 0.014814814814814815, 'TCC': 0.0, 'GAA': 0.0, 'TGG': 0.0, 'GCA': 0.037037037037037035, 'GTA': 0.0, 'GCC': 0.0, 'GTC': 0.0, 'TGA': 0.0, 'GCG': 0.088888888888888892, 'GTG': 0.0, 'GAG': 0.0, 'GTT': 0.0, 'GCT': 0.066666666666666666, 'TTA': 0.07407407407407407, 'GAC': 0.014814814814814815, 'CGT': 0.0, 'TCA': 0.0, 'CGC': 0.07407407407407407} | |
639 dObsFreqWord = bioseq.freq_word(3) | |
640 self.assertEquals(dExpFreqWord, dObsFreqWord) | |
641 | |
642 | |
643 def test_findORF_no_ORF_in_sequence (self): | |
644 bioseq = Bioseq() | |
645 bioseq.header = "seq1 description1" | |
646 bioseq.sequence = "GCGNCGCTGCTTTATT" | |
647 expORF = {0:[],1:[],2:[]} | |
648 obsORF = bioseq.findORF() | |
649 self.assertEquals (expORF,obsORF) | |
650 | |
651 | |
652 def test_findORF_one_ORF_in_first_phase(self): | |
653 bioseq = Bioseq() | |
654 bioseq.header = "seq1 description1" | |
655 bioseq.sequence = "TAAGCGNCGCTGCTTTATT" | |
656 expORF = {0:[0],1:[],2:[]} | |
657 obsORF = bioseq.findORF() | |
658 self.assertEquals (expORF,obsORF) | |
659 | |
660 | |
661 def test_findORF_three_ORF_in_first_phase(self): | |
662 bioseq = Bioseq() | |
663 bioseq.header = "seq1 description1" | |
664 bioseq.sequence = "TAAGCGTAGNCGTGACTGCTTTATT" | |
665 expORF = {0:[0,6,12],1:[],2:[]} | |
666 obsORF = bioseq.findORF() | |
667 self.assertEquals (expORF,obsORF) | |
668 | |
669 | |
670 def test_findORF_two_ORF_in_first_phase_one_ORF_in_second_phase(self): | |
671 bioseq = Bioseq() | |
672 bioseq.header = "seq1 description1" | |
673 bioseq.sequence = "TAAGTAGAGNCGTGACTGCTTTATT" | |
674 expORF = {0:[0,12],1:[4],2:[]} | |
675 obsORF = bioseq.findORF() | |
676 self.assertEquals (expORF,obsORF) | |
677 | |
678 | |
679 def test_findORF_two_ORF_in_first_phase_three_ORF_in_second_phase(self): | |
680 bioseq = Bioseq() | |
681 bioseq.header = "seq1 description1" | |
682 bioseq.sequence = "TAAGTAGAGNCGTGACTGATAGTATT" | |
683 expORF = {0:[0,12],1:[4,16,19],2:[]} | |
684 obsORF = bioseq.findORF() | |
685 self.assertEquals (expORF,obsORF) | |
686 | |
687 | |
688 def test_findORF_one_ORF_in_second_phase_three_ORF_in_third_phase(self): | |
689 bioseq = Bioseq() | |
690 bioseq.header = "seq1 description1" | |
691 bioseq.sequence = "AATATTAGTGGAGTAGTTGATGATTTT" | |
692 expORF = {0:[], 1:[13], 2:[5,17,20]} | |
693 obsORF = bioseq.findORF() | |
694 self.assertEquals (expORF,obsORF) | |
695 | |
696 | |
697 def test_findORF_three_ORF_in_second_phase_one_ORF_in_third_phase(self): | |
698 bioseq = Bioseq() | |
699 bioseq.header = "seq1 description1" | |
700 bioseq.sequence = "TTTGAAGTGGAGGAGTTGATGATTTTAAT" | |
701 expORF = {0:[], 1:[16, 19, 25], 2:[2]} | |
702 obsORF = bioseq.findORF() | |
703 self.assertEquals (expORF,obsORF) | |
704 | |
705 | |
706 def test_upCase(self): | |
707 bioseq = Bioseq() | |
708 bioseq.header = "seq description" | |
709 bioseq.sequence = "taattcggcct" | |
710 expSeq = "TAATTCGGCCT" | |
711 bioseq.upCase() | |
712 obsSeq = bioseq.sequence | |
713 self.assertEquals( expSeq, obsSeq ) | |
714 | |
715 | |
716 def test_lowCase(self): | |
717 bioseq = Bioseq() | |
718 bioseq.header = "seq description" | |
719 bioseq.sequence = "TAATTCGGCCT" | |
720 expSeq = "taattcggcct" | |
721 bioseq.lowCase() | |
722 obsSeq = bioseq.sequence | |
723 self.assertEquals( expSeq, obsSeq ) | |
724 | |
725 | |
726 def test_getClusterID(self): | |
727 bioseq = Bioseq() | |
728 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" | |
729 bioseq.sequence = "TAATTCGGCCT" | |
730 expID = "0" | |
731 obsID = bioseq.getClusterID() | |
732 self.assertEquals( expID, obsID ) | |
733 | |
734 | |
735 def test_getGroupID(self): | |
736 bioseq = Bioseq() | |
737 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" | |
738 bioseq.sequence = "TAATTCGGCCT" | |
739 expID = "2" | |
740 obsID = bioseq.getGroupID() | |
741 self.assertEquals( expID, obsID ) | |
742 | |
743 | |
744 def test_getHeaderFullSeq(self): | |
745 bioseq = Bioseq() | |
746 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" | |
747 bioseq.sequence = "TAATTCGGCCT" | |
748 expHeader = "chunk1" | |
749 obsHeader = bioseq.getHeaderFullSeq() | |
750 self.assertEquals( expHeader, obsHeader ) | |
751 | |
752 | |
753 def test_getFragStrand_plus_strand(self): | |
754 bioseq = Bioseq() | |
755 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" | |
756 bioseq.sequence = "TAATTCGGCCT" | |
757 expStrand = '+' | |
758 obsStrand = bioseq.getFragStrand() | |
759 self.assertEquals(expStrand, obsStrand) | |
760 | |
761 | |
762 def test_getFragStrand_minus_strand(self): | |
763 bioseq = Bioseq() | |
764 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74624..74091" | |
765 bioseq.sequence = "TAATTCGGCCT" | |
766 expStrand = '-' | |
767 obsStrand = bioseq.getFragStrand() | |
768 self.assertEquals(expStrand, obsStrand) | |
769 | |
770 | |
771 def test_getATGCNFromIUPAC_A(self): | |
772 bioseq = Bioseq() | |
773 expNucl = 'A' | |
774 obsNucl = bioseq.getATGCNFromIUPAC('A') | |
775 self.assertEquals(expNucl, obsNucl) | |
776 | |
777 | |
778 def test_getATGCNFromIUPAC_T(self): | |
779 bioseq = Bioseq() | |
780 expNucl = 'T' | |
781 obsNucl = bioseq.getATGCNFromIUPAC('T') | |
782 self.assertEquals(expNucl, obsNucl) | |
783 | |
784 | |
785 def test_getATGCNFromIUPAC_C(self): | |
786 bioseq = Bioseq() | |
787 expNucl = 'C' | |
788 obsNucl = bioseq.getATGCNFromIUPAC('C') | |
789 self.assertEquals(expNucl, obsNucl) | |
790 | |
791 | |
792 def test_getATGCNFromIUPAC_G(self): | |
793 bioseq = Bioseq() | |
794 expNucl = 'G' | |
795 obsNucl = bioseq.getATGCNFromIUPAC('G') | |
796 self.assertEquals(expNucl, obsNucl) | |
797 | |
798 | |
799 def test_getATGCNFromIUPAC_N(self): | |
800 bioseq = Bioseq() | |
801 expNucl = 'N' | |
802 obsNucl = bioseq.getATGCNFromIUPAC('N') | |
803 self.assertEquals(expNucl, obsNucl) | |
804 | |
805 | |
806 def test_getATGCNFromIUPAC_U(self): | |
807 bioseq = Bioseq() | |
808 expNucl = 'T' | |
809 obsNucl = bioseq.getATGCNFromIUPAC('U') | |
810 self.assertEquals(expNucl, obsNucl) | |
811 | |
812 | |
813 def test_getATGCNFromIUPAC_R(self): | |
814 bioseq = Bioseq() | |
815 expNucl1 = 'A' | |
816 expNucl2 = 'G' | |
817 obsNucl = bioseq.getATGCNFromIUPAC('R') | |
818 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) | |
819 | |
820 | |
821 def test_getATGCNFromIUPAC_Y(self): | |
822 bioseq = Bioseq() | |
823 expNucl1 = 'C' | |
824 expNucl2 = 'T' | |
825 obsNucl = bioseq.getATGCNFromIUPAC('Y') | |
826 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) | |
827 | |
828 | |
829 def test_getATGCNFromIUPAC_M(self): | |
830 bioseq = Bioseq() | |
831 expNucl1 = 'C' | |
832 expNucl2 = 'A' | |
833 obsNucl = bioseq.getATGCNFromIUPAC('M') | |
834 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) | |
835 | |
836 | |
837 def test_getATGCNFromIUPAC_K(self): | |
838 bioseq = Bioseq() | |
839 expNucl1 = 'T' | |
840 expNucl2 = 'G' | |
841 obsNucl = bioseq.getATGCNFromIUPAC('K') | |
842 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) | |
843 | |
844 | |
845 def test_getATGCNFromIUPAC_W(self): | |
846 bioseq = Bioseq() | |
847 expNucl1 = 'T' | |
848 expNucl2 = 'A' | |
849 obsNucl = bioseq.getATGCNFromIUPAC('W') | |
850 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) | |
851 | |
852 | |
853 def test_getATGCNFromIUPAC_S(self): | |
854 bioseq = Bioseq() | |
855 expNucl1 = 'C' | |
856 expNucl2 = 'G' | |
857 obsNucl = bioseq.getATGCNFromIUPAC('S') | |
858 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) | |
859 | |
860 | |
861 def test_getATGCNFromIUPAC_B(self): | |
862 bioseq = Bioseq() | |
863 expNucl1 = 'C' | |
864 expNucl2 = 'T' | |
865 expNucl3 = 'G' | |
866 obsNucl = bioseq.getATGCNFromIUPAC('B') | |
867 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) | |
868 | |
869 | |
870 def test_getATGCNFromIUPAC_D(self): | |
871 bioseq = Bioseq() | |
872 expNucl1 = 'A' | |
873 expNucl2 = 'T' | |
874 expNucl3 = 'G' | |
875 obsNucl = bioseq.getATGCNFromIUPAC('D') | |
876 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) | |
877 | |
878 | |
879 def test_getATGCNFromIUPAC_H(self): | |
880 bioseq = Bioseq() | |
881 expNucl1 = 'C' | |
882 expNucl2 = 'T' | |
883 expNucl3 = 'A' | |
884 obsNucl = bioseq.getATGCNFromIUPAC('H') | |
885 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) | |
886 | |
887 | |
888 def test_getATGCNFromIUPAC_V(self): | |
889 bioseq = Bioseq() | |
890 expNucl1 = 'C' | |
891 expNucl2 = 'A' | |
892 expNucl3 = 'G' | |
893 obsNucl = bioseq.getATGCNFromIUPAC('V') | |
894 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) | |
895 | |
896 | |
897 def test_getATGCNFromIUPAC_Z(self): | |
898 bioseq = Bioseq() | |
899 expNucl = 'N' | |
900 obsNucl = bioseq.getATGCNFromIUPAC('Z') | |
901 self.assertEquals(expNucl, obsNucl) | |
902 | |
903 | |
904 def test_partialIUPAC(self): | |
905 bioseq = Bioseq() | |
906 bioseq.sequence = "ATGCNRATGCN" | |
907 expSequence1 = "ATGCNAATGCN" | |
908 expSequence2 = "ATGCNGATGCN" | |
909 bioseq.partialIUPAC() | |
910 obsSequence = bioseq.sequence | |
911 self.assertTrue(expSequence1 == obsSequence or expSequence2 == obsSequence) | |
912 | |
913 | |
914 def test_checkEOF(self): | |
915 bioseq = Bioseq() | |
916 bioseq.sequence = "ATGCNRATGCN\rATGCAAT\rTATA\r" | |
917 bioseq.checkEOF() | |
918 obsSequence = bioseq.sequence | |
919 expSequence = "ATGCNRATGCNATGCAATTATA" | |
920 | |
921 self.assertEquals(expSequence, obsSequence) | |
922 | |
923 | |
924 def test_getLMapWhithoutGap(self): | |
925 iBioseq = Bioseq() | |
926 iBioseq.header = "header" | |
927 iBioseq.sequence = "ATGC-RA-GCT" | |
928 obsLMap = iBioseq.getLMapWhithoutGap() | |
929 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )] | |
930 | |
931 self.assertEquals(expLMap, obsLMap) | |
932 | |
933 | |
934 def test_getLMapWhithoutGap_seqStartsWithGap(self): | |
935 iBioseq = Bioseq() | |
936 iBioseq.header = "header" | |
937 iBioseq.sequence = "-TGC-RA-GCT" | |
938 obsLMap = iBioseq.getLMapWhithoutGap() | |
939 expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )] | |
940 | |
941 self.assertEquals(expLMap, obsLMap) | |
942 | |
943 | |
944 def test_getLMapWhithoutGap_seqEndsWithGap(self): | |
945 iBioseq = Bioseq() | |
946 iBioseq.header = "header" | |
947 iBioseq.sequence = "ATGC-RA-GC-" | |
948 obsLMap = iBioseq.getLMapWhithoutGap() | |
949 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )] | |
950 | |
951 self.assertEquals(expLMap, obsLMap) | |
952 | |
953 def test_getGCpercentage_onlyATGC( self ): | |
954 iBs = Bioseq( "seq", "TGCAGCT" ) | |
955 exp = 100 * 4 / 7.0 | |
956 obs = iBs.getGCpercentage() | |
957 self.assertEqual( exp, obs ) | |
958 | |
959 def test_getGCpercentageInSequenceWithoutCountNInLength( self ): | |
960 iBs = Bioseq( "seq", "TGCAGCTNNNNN" ) | |
961 exp = 100 * 4 / 7.0 | |
962 obs = iBs.getGCpercentageInSequenceWithoutCountNInLength() | |
963 self.assertEqual( exp, obs ) | |
964 | |
965 def test_get5PrimeFlank(self): | |
966 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
967 position = 7 | |
968 obsFlank = bs.get5PrimeFlank(position, 3) | |
969 expFlank = "TTT" | |
970 self.assertEquals(expFlank, obsFlank) | |
971 | |
972 def test_get5PrimeFlank_flank_length_truncated(self): | |
973 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
974 position = 7 | |
975 obsFlank = bs.get5PrimeFlank(position, 15) | |
976 expFlank = "AACTTT" | |
977 self.assertEquals(expFlank, obsFlank) | |
978 | |
979 def test_get5PrimeFlank_flank_of_first_base(self): | |
980 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
981 position = 1 | |
982 obsFlank = bs.get5PrimeFlank(position, 15) | |
983 expFlank = "" | |
984 self.assertEquals(expFlank, obsFlank) | |
985 | |
986 def test_get3PrimeFlank(self): | |
987 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
988 position = 7 | |
989 obsFlank = bs.get3PrimeFlank(position, 3) | |
990 expFlank = "CAG" | |
991 self.assertEquals(expFlank, obsFlank) | |
992 | |
993 def test_get3PrimeFlank_flank_length_truncated(self): | |
994 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
995 position = 7 | |
996 obsFlank = bs.get3PrimeFlank(position, 15) | |
997 expFlank = "CAGAA" | |
998 self.assertEquals(expFlank, obsFlank) | |
999 | |
1000 def test_get3PrimeFlank_flank_of_last_base(self): | |
1001 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1002 position = 12 | |
1003 obsFlank = bs.get3PrimeFlank(position, 15) | |
1004 expFlank = "" | |
1005 self.assertEquals(expFlank, obsFlank) | |
1006 | |
1007 def test_get3PrimeFlank_polymLength_different_of_1(self): | |
1008 bs = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1009 position = 7 | |
1010 obsFlank = bs.get3PrimeFlank(position, 3, 2) | |
1011 expFlank = "AGA" | |
1012 self.assertEquals(expFlank, obsFlank) | |
1013 | |
1014 test_suite = unittest.TestSuite() | |
1015 test_suite.addTest( unittest.makeSuite( Test_Bioseq ) ) | |
1016 if __name__ == "__main__": | |
1017 unittest.TextTestRunner(verbosity=2).run( test_suite ) |