Mercurial > repos > yufei-luo > s_mart
diff smart_toolShed/commons/core/seq/test/Test_Bioseq.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/commons/core/seq/test/Test_Bioseq.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,1017 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + + +import unittest +import os +import sys +from commons.core.seq.Bioseq import Bioseq +from commons.core.utils.FileUtils import FileUtils +from commons.core.coord.Map import Map + + +class Test_Bioseq( unittest.TestCase ): + + def setUp(self): + self._bs = Bioseq() + + + def test_isEmpty_True(self): + self._bs.setHeader( "" ) + self._bs.setSequence( "" ) + exp = True + obs = self._bs.isEmpty() + self.assertEquals( exp, obs ) + + + def test_isEmpty_False(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" ) + exp = False + obs = self._bs.isEmpty() + self.assertEquals( exp, obs ) + + + def test___eq__(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" ) + obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) + self.assertEquals( self._bs, obs ) + + + def test___ne__Header(self): + self._bs.setHeader( "seq2" ) + self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" ) + obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) + self.assertNotEquals( self._bs, obs ) + + + def test___ne__Sequence(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" ) + obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) + self.assertNotEquals( self._bs, obs ) + + + def test_reverse(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "TGCGGA" ) + exp = "AGGCGT" + self._bs.reverse() + obs = self._bs.sequence + self.assertEqual( obs, exp ) + + + def test_complement(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "TGCGGA" ) + exp = "ACGCCT" + self._bs.complement() + obs = self._bs.sequence + self.assertEqual( obs, exp ) + + + def test_complement_with_unknown_symbol(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" ) + exp = "ACGCCTNKYWARSMBDHVN" + self._bs.complement() + obs = self._bs.sequence + self.assertEqual( obs, exp ) + + + def test_reverseComplement(self): + self._bs.setHeader( "seq1" ) + self._bs.setSequence( "TGCGGA" ) + exp = "TCCGCA" + self._bs.reverseComplement() + obs = self._bs.sequence + self.assertEqual( obs, exp ) + + + def test_cleanGap(self): + self._bs.setSequence("-ATTTTGC-AGTC--TTATTCGAG-----GCCATTGCT-") + exp = "ATTTTGCAGTCTTATTCGAGGCCATTGCT" + self._bs.cleanGap() + obs = self._bs.sequence + self.assertEquals( obs, exp ) + + + def test_copyBioseqInstance(self): + self._bs.setHeader( "seq" ) + self._bs.setSequence( "TGCGGA" ) + obsBioseq = self._bs.copyBioseqInstance() + self.assertEquals(self._bs, obsBioseq) + + + def test_setFrameInfoOnHeader_without_description(self): + self._bs.setHeader( "seq" ) + self._bs.setSequence( "TGCGGA" ) + phase = -1 + expHeader = "seq_-1" + self._bs.setFrameInfoOnHeader(phase) + self.assertEquals(expHeader, self._bs.header) + + + def test_setFrameInfoOnHeader_with_description(self): + self._bs.setHeader( "seq description" ) + self._bs.setSequence( "TGCGGA" ) + phase = -1 + expHeader = "seq_-1 description" + self._bs.setFrameInfoOnHeader(phase) + self.assertEquals(expHeader, self._bs.header) + + + def test_read(self): + faFile = open("dummyFaFile.fa", "w") + faFile.write(">seq1 description1\n") + faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + faFile.close() + expBioseq = Bioseq() + expBioseq.header = "seq1 description1" + expBioseq.sequence = "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + obsBioseq = Bioseq() + faFile = open("dummyFaFile.fa", "r") + obsBioseq.read( faFile ) + faFile.close() + os.remove("dummyFaFile.fa") + self.assertEquals(expBioseq, obsBioseq) + + + def test_read_WithEmptyFile(self): + faFile = open("dummyFaFile.fa", "w") + faFile.close() + expBioseq = Bioseq() + expBioseq.header = None + expBioseq.sequence = None + obsBioseq = Bioseq() + faFile = open("dummyFaFile.fa", "r") + obsBioseq.read( faFile ) + faFile.close() + os.remove("dummyFaFile.fa") + self.assertEquals(expBioseq, obsBioseq) + + + def test_read_without_header(self): + faFile = open("dummyFaFile.fa", "w") + faFile.write("seq1 description1\n") + faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + faFile.close() + expBioseq = Bioseq() + expBioseq.header = "" + expBioseq.sequence = "" + obsBioseq = Bioseq() + faFile = open("dummyFaFile.fa", "r") + obsBioseq.read( faFile ) + faFile.close() + os.remove("dummyFaFile.fa") + self.assertEquals(expBioseq, obsBioseq) + + + def test_read_with_two_consecutive_headers(self): + faFile = open("dummyFaFile.fa", "w") + faFile.write(">seq1 description1\n") + faFile.write(">ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + faFile.close() + expBioseq = Bioseq() + expBioseq.header = "seq1 description1" + expBioseq.sequence = "" + obsBioseq = Bioseq() + faFile = open("dummyFaFile.fa", "r") + obsBioseq.read( faFile ) + faFile.close() + os.remove("dummyFaFile.fa") + self.assertEquals(expBioseq, obsBioseq) + + + def test_read_withEmptyLines(self): + faFile = open("dummyFaFile.fa", "w") + faFile.write("\n") + faFile.write(">seq1 description1\n") + faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + faFile.write("\n") + faFile.close() + + exp = Bioseq( "seq1 description1", "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" ) + + obs = Bioseq() + faFile = open("dummyFaFile.fa", "r") + obs.read( faFile ) + faFile.close() + + os.remove("dummyFaFile.fa") + + self.assertEquals( exp, obs ) + + def test_read_with_70nt_by_line(self): + faFile = open("dummyFaFile.fa", "w") + faFile.write("\n") + faFile.write(">seq1 description1\n") + faFile.write("TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAA\n") + faFile.write("TGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT\n") + faFile.write("\n") + faFile.close() + + exp = Bioseq( "seq1 description1", "TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAATGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT" ) + + obs = Bioseq() + faFile = open("dummyFaFile.fa", "r") + obs.read( faFile ) + faFile.close() + + os.remove("dummyFaFile.fa") + + self.assertEquals( exp, obs ) + + def test_appendBioseqInFile(self): + obsFaFileName = "dummyFaFile.fa" + obsFaFile = open(obsFaFileName, "w") + obsFaFile.write(">seq1 description1\n") + obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + obsFaFile.close() + + bioseq = Bioseq() + bioseq.header = "seq2 description2" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + + expFaFileName = "dummyFaFile2.fa" + expFaFile = open(expFaFileName, "w") + expFaFile.write(">seq1 description1\n") + expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + expFaFile.write(">seq2 description2\n") + expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n") + expFaFile.close() + + bioseq.appendBioseqInFile(obsFaFileName) + self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName)) + os.remove(obsFaFileName) + os.remove(expFaFileName) + + + def test_writeABioseqInAFastaFile(self): + obsFaFileName = "dummyFaFile.fa" + obsFaFile = open(obsFaFileName, "w") + obsFaFile.write(">seq1 description1\n") + obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + + bioseq = Bioseq() + bioseq.header = "seq2 description2" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + + expFaFileName = "dummyFaFile2.fa" + expFaFile = open(expFaFileName, "w") + expFaFile.write(">seq1 description1\n") + expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + expFaFile.write(">seq2 description2\n") + expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n") + expFaFile.close() + + bioseq.writeABioseqInAFastaFile(obsFaFile) + obsFaFile.close() + self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName)) + os.remove(obsFaFileName) + + + def test_writeABioseqInAFastaFileWithOtherHeader(self): + obsFaFileName = "dummyFaFile.fa" + obsFaFile = open(obsFaFileName, "w") + obsFaFile.write(">seq1 description1\n") + obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + + bioseq = Bioseq() + bioseq.header = "seq2 description2" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + + expFaFileName = "dummyFaFile2.fa" + newHeader = "seq2 New header2" + expFaFile = open(expFaFileName, "w") + expFaFile.write(">seq1 description1\n") + expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n") + expFaFile.write(">" + newHeader + "\n") + expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n") + expFaFile.close() + + bioseq.writeABioseqInAFastaFileWithOtherHeader(obsFaFile, newHeader) + obsFaFile.close() + self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName)) + os.remove(obsFaFileName) + os.remove(expFaFileName) + + + def test_writeSeqInFasta(self): + iBs = Bioseq() + iBs.header = "dummySeq" + iBs.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + + expFaFile = "dummyExpFile.fa" + expFaFileHandler = open(expFaFile, "w") + expFaFileHandler.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCG\n") + expFaFileHandler.close() + + obsFaFile = "dummyObsFile.fa" + obsFaFileHandler = open( obsFaFile, "w" ) + + iBs.writeSeqInFasta( obsFaFileHandler ) + + obsFaFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFaFile, obsFaFile ) ) + os.remove(obsFaFile) + os.remove(expFaFile) + + + def test_subseq(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + start = 10 + end = 30 + expSubBioseq = Bioseq() + expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end) + expSubBioseq.sequence = bioseq.sequence[(start - 1) : end] + obsBioseq = bioseq.subseq(start, end) + self.assertEquals(expSubBioseq, obsBioseq) + + + def test_subseq_no_end(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + start = 10 + expSubBioseq = Bioseq() + expEnd = len(bioseq.sequence) + expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(expEnd) + expSubBioseq.sequence = bioseq.sequence[(start - 1) : expEnd] + obsBioseq = bioseq.subseq(start) + self.assertEquals(expSubBioseq, obsBioseq) + + + def test_subseq_start_gt_end(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + start = 30 + end = 10 + expSubBioseq = None + obsBioseq = bioseq.subseq(start, end) + self.assertEquals(expSubBioseq, obsBioseq) + + + def test_subseq_start_eq_end(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + start = 10 + end = 10 + expSubBioseq = Bioseq() + expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end) + expSubBioseq.sequence = bioseq.sequence[(start - 1) : end] + obsBioseq = bioseq.subseq(start, end) + self.assertEquals(expSubBioseq, obsBioseq) + + + def test_subseq_negative_start(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + start = -10 + end = 10 + expSubBioseq = None + obsBioseq = bioseq.subseq(start, end) + self.assertEquals(expSubBioseq, obsBioseq) + + + def test_getNtFromPosition_1(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expNt = "G" + obsNt = bioseq.getNtFromPosition(1) + self.assertEquals(expNt, obsNt) + + + def test_getNtFromPosition_10(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expNt = "C" + obsNt = bioseq.getNtFromPosition(10) + self.assertEquals(expNt, obsNt) + + + def test_getNtFromPosition_last(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expNt = "G" + obsNt = bioseq.getNtFromPosition(146) + self.assertEquals(expNt, obsNt) + + + def test_getNtFromPosition_position_outside_range_0(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expNt = None + obsNt = bioseq.getNtFromPosition(0) + self.assertEquals(expNt, obsNt) + + + def test_getNtFromPosition_position_outside_range_negative(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expNt = None + obsNt = bioseq.getNtFromPosition(-10) + self.assertEquals(expNt, obsNt) + + + def test_getNtFromPosition_position_outside_range_positive(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expNt = None + obsNt = bioseq.getNtFromPosition(147) + self.assertEquals(expNt, obsNt) + + + def test_view(self): + obsFileName = "obsdummy_Bioseq_view" + expFileName = "expDummy_Bioseq_View" + + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + + obsFile = open(obsFileName,"w") + expFile = open(expFileName, "w") + + expFile.write ( ">seq1 description1\n") + expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFile.write ( "ATAGCAGACGCATATTATATTGCGCG\n") + + stdoutRef = sys.stdout + sys.stdout = obsFile + bioseq.view() + obsFile.close() + expFile.close() + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + + + def test_view_with_l(self): + obsFileName = "obsdummy_Bioseq_view" + expFileName = "expDummy_Bioseq_View" + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + obsFile = open(obsFileName,"w") + expFile = open(expFileName, "w") + expFile.write ( ">seq1 description1\n") + expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n") + stdoutRef = sys.stdout + sys.stdout = obsFile + bioseq.view(120) + obsFile.close() + expFile.close() + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + + + def test_getLength(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expLength = 146 + obsLength = bioseq.getLength() + self.assertEquals(expLength, obsLength) + + + def test_getLength_empty_seq(self): + bioseq = Bioseq() + expLength = 0 + obsLength = bioseq.getLength() + self.assertEquals(expLength, obsLength) + + + def test_getLength_WithoutN(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGANCGCTGCTTTATTAAGCGCTAGATGNNNNNNNNNNNNNNNCGACGCTGCATTTATTAAGCGCTAGCGATTATANNNNNNNNNTAGCAGACGCATATTATATTGCGCGATGCGACGCTGCTTTATTANAGCGCTAGCGNNATTATATAGCANGACGCATATTATATTGCGCG" + expLength = 146 + obsLength = bioseq.getLength(False) + self.assertEquals(expLength, obsLength) + + + def test_getLength_WithoutN_empty_seq(self): + bioseq = Bioseq() + expLength = 0 + obsLength = bioseq.getLength(False) + self.assertEquals(expLength, obsLength) + + + def test_countNt(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expCount = 3 + obsCount = bioseq.countNt('N') + self.assertEquals(expCount, obsCount) + + + def test_countNt_withCharacterNotExisting(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + expCount = 0 + obsCount = bioseq.countNt('W') + self.assertEquals(expCount, obsCount) + + + def test_countAllNt(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpCount = {'A': 34, 'C': 31, 'T': 43, 'G': 35, 'N': 3} + dObsCount = bioseq.countAllNt() + self.assertEquals(dExpCount, dObsCount) + + + def test_occ_word_size_1(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpOccWord = {'A': 34, 'C': 31, 'T': 43, 'G': 35} + ExpNbWord = 143 + dObsOccWord, ObsNbWord = bioseq.occ_word(1) + self.assertEquals(dExpOccWord, dObsOccWord) + self.assertEquals(ExpNbWord, ObsNbWord) + + + def test_occ_word_size_0(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpOccWord = {} + ExpNbWord = 0 + dObsOccWord, ObsNbWord = bioseq.occ_word(0) + self.assertEquals(dExpOccWord, dObsOccWord) + self.assertEquals(ExpNbWord, ObsNbWord) + + + def test_occ_word_size_n(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpOccWord = {'ACC': 0, 'ATG': 2, 'AAG': 3, 'AAA': 0, 'ATC': 0, 'AAC': 0, 'ATA': 8, 'AGG': 0, 'CCT': 0, 'CTC': 0, 'AGC': 8, 'ACA': 0, 'AGA': 2, 'CAT': 3, 'AAT': 0, 'ATT': 9, 'CTG': 3, 'CTA': 3, 'ACT': 0, 'CAC': 0, 'ACG': 2, 'CAA': 0, 'AGT': 0, 'CAG': 2, 'CCG': 0, 'CCC': 0, 'CTT': 3, 'TAT': 13, 'GGT': 0, 'TGT': 0, 'CGA': 3, 'CCA': 0, 'TCT': 0, 'GAT': 3, 'CGG': 0, 'TTT': 3, 'TGC': 7, 'GGG': 0, 'TAG': 5, 'GGA': 0, 'TAA': 3, 'GGC': 0, 'TAC': 0, 'TTC': 0, 'TCG': 0, 'TTA': 10, 'TTG': 2, 'TCC': 0, 'GAA': 0, 'TGG': 0, 'GCA': 5, 'GTA': 0, 'GCC': 0, 'GTC': 0, 'GCG': 12, 'GTG': 0, 'GAG': 0, 'GTT': 0, 'GCT': 9, 'TGA': 0, 'GAC': 2, 'CGT': 0, 'TCA': 0, 'CGC': 10} + ExpNbWord = 135 + dObsOccWord, ObsNbWord = bioseq.occ_word(3) + self.assertEquals(dExpOccWord, dObsOccWord) + self.assertEquals(ExpNbWord, ObsNbWord) + + + def test_freq_word_size_1(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpFreqWord = {'A': 0.23776223776223776, 'C': 0.21678321678321677, 'T': 0.30069930069930068, 'G': 0.24475524475524477} + dObsFreqWord = bioseq.freq_word(1) + self.assertEquals(dExpFreqWord, dObsFreqWord) + + + def test_freq_word_size_0(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpFreqWord = {} + dObsFreqWord = bioseq.freq_word(0) + self.assertEquals(dExpFreqWord, dObsFreqWord) + + + def test_freq_word_size_n(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" + dExpFreqWord = {'ACC': 0.0, 'ATG': 0.014814814814814815, 'AAG': 0.022222222222222223, 'AAA': 0.0, 'ATC': 0.0, 'AAC': 0.0, 'ATA': 0.059259259259259262, 'AGG': 0.0, 'CCT': 0.0, 'CTC': 0.0, 'AGC': 0.059259259259259262, 'ACA': 0.0, 'AGA': 0.014814814814814815, 'CAT': 0.022222222222222223, 'AAT': 0.0, 'ATT': 0.066666666666666666, 'CTG': 0.022222222222222223, 'CTA': 0.022222222222222223, 'ACT': 0.0, 'CAC': 0.0, 'ACG': 0.014814814814814815, 'CAA': 0.0, 'AGT': 0.0, 'CAG': 0.014814814814814815, 'CCG': 0.0, 'CCC': 0.0, 'TAT': 0.096296296296296297, 'GGT': 0.0, 'TGT': 0.0, 'CGA': 0.022222222222222223, 'CCA': 0.0, 'TCT': 0.0, 'GAT': 0.022222222222222223, 'CGG': 0.0, 'CTT': 0.022222222222222223, 'TGC': 0.05185185185185185, 'GGG': 0.0, 'TAG': 0.037037037037037035, 'GGA': 0.0, 'TAA': 0.022222222222222223, 'GGC': 0.0, 'TAC': 0.0, 'TTC': 0.0, 'TCG': 0.0, 'TTT': 0.022222222222222223, 'TTG': 0.014814814814814815, 'TCC': 0.0, 'GAA': 0.0, 'TGG': 0.0, 'GCA': 0.037037037037037035, 'GTA': 0.0, 'GCC': 0.0, 'GTC': 0.0, 'TGA': 0.0, 'GCG': 0.088888888888888892, 'GTG': 0.0, 'GAG': 0.0, 'GTT': 0.0, 'GCT': 0.066666666666666666, 'TTA': 0.07407407407407407, 'GAC': 0.014814814814814815, 'CGT': 0.0, 'TCA': 0.0, 'CGC': 0.07407407407407407} + dObsFreqWord = bioseq.freq_word(3) + self.assertEquals(dExpFreqWord, dObsFreqWord) + + + def test_findORF_no_ORF_in_sequence (self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "GCGNCGCTGCTTTATT" + expORF = {0:[],1:[],2:[]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_findORF_one_ORF_in_first_phase(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "TAAGCGNCGCTGCTTTATT" + expORF = {0:[0],1:[],2:[]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_findORF_three_ORF_in_first_phase(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "TAAGCGTAGNCGTGACTGCTTTATT" + expORF = {0:[0,6,12],1:[],2:[]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_findORF_two_ORF_in_first_phase_one_ORF_in_second_phase(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "TAAGTAGAGNCGTGACTGCTTTATT" + expORF = {0:[0,12],1:[4],2:[]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_findORF_two_ORF_in_first_phase_three_ORF_in_second_phase(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "TAAGTAGAGNCGTGACTGATAGTATT" + expORF = {0:[0,12],1:[4,16,19],2:[]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_findORF_one_ORF_in_second_phase_three_ORF_in_third_phase(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "AATATTAGTGGAGTAGTTGATGATTTT" + expORF = {0:[], 1:[13], 2:[5,17,20]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_findORF_three_ORF_in_second_phase_one_ORF_in_third_phase(self): + bioseq = Bioseq() + bioseq.header = "seq1 description1" + bioseq.sequence = "TTTGAAGTGGAGGAGTTGATGATTTTAAT" + expORF = {0:[], 1:[16, 19, 25], 2:[2]} + obsORF = bioseq.findORF() + self.assertEquals (expORF,obsORF) + + + def test_upCase(self): + bioseq = Bioseq() + bioseq.header = "seq description" + bioseq.sequence = "taattcggcct" + expSeq = "TAATTCGGCCT" + bioseq.upCase() + obsSeq = bioseq.sequence + self.assertEquals( expSeq, obsSeq ) + + + def test_lowCase(self): + bioseq = Bioseq() + bioseq.header = "seq description" + bioseq.sequence = "TAATTCGGCCT" + expSeq = "taattcggcct" + bioseq.lowCase() + obsSeq = bioseq.sequence + self.assertEquals( expSeq, obsSeq ) + + + def test_getClusterID(self): + bioseq = Bioseq() + bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" + bioseq.sequence = "TAATTCGGCCT" + expID = "0" + obsID = bioseq.getClusterID() + self.assertEquals( expID, obsID ) + + + def test_getGroupID(self): + bioseq = Bioseq() + bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" + bioseq.sequence = "TAATTCGGCCT" + expID = "2" + obsID = bioseq.getGroupID() + self.assertEquals( expID, obsID ) + + + def test_getHeaderFullSeq(self): + bioseq = Bioseq() + bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" + bioseq.sequence = "TAATTCGGCCT" + expHeader = "chunk1" + obsHeader = bioseq.getHeaderFullSeq() + self.assertEquals( expHeader, obsHeader ) + + + def test_getFragStrand_plus_strand(self): + bioseq = Bioseq() + bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624" + bioseq.sequence = "TAATTCGGCCT" + expStrand = '+' + obsStrand = bioseq.getFragStrand() + self.assertEquals(expStrand, obsStrand) + + + def test_getFragStrand_minus_strand(self): + bioseq = Bioseq() + bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74624..74091" + bioseq.sequence = "TAATTCGGCCT" + expStrand = '-' + obsStrand = bioseq.getFragStrand() + self.assertEquals(expStrand, obsStrand) + + + def test_getATGCNFromIUPAC_A(self): + bioseq = Bioseq() + expNucl = 'A' + obsNucl = bioseq.getATGCNFromIUPAC('A') + self.assertEquals(expNucl, obsNucl) + + + def test_getATGCNFromIUPAC_T(self): + bioseq = Bioseq() + expNucl = 'T' + obsNucl = bioseq.getATGCNFromIUPAC('T') + self.assertEquals(expNucl, obsNucl) + + + def test_getATGCNFromIUPAC_C(self): + bioseq = Bioseq() + expNucl = 'C' + obsNucl = bioseq.getATGCNFromIUPAC('C') + self.assertEquals(expNucl, obsNucl) + + + def test_getATGCNFromIUPAC_G(self): + bioseq = Bioseq() + expNucl = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('G') + self.assertEquals(expNucl, obsNucl) + + + def test_getATGCNFromIUPAC_N(self): + bioseq = Bioseq() + expNucl = 'N' + obsNucl = bioseq.getATGCNFromIUPAC('N') + self.assertEquals(expNucl, obsNucl) + + + def test_getATGCNFromIUPAC_U(self): + bioseq = Bioseq() + expNucl = 'T' + obsNucl = bioseq.getATGCNFromIUPAC('U') + self.assertEquals(expNucl, obsNucl) + + + def test_getATGCNFromIUPAC_R(self): + bioseq = Bioseq() + expNucl1 = 'A' + expNucl2 = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('R') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) + + + def test_getATGCNFromIUPAC_Y(self): + bioseq = Bioseq() + expNucl1 = 'C' + expNucl2 = 'T' + obsNucl = bioseq.getATGCNFromIUPAC('Y') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) + + + def test_getATGCNFromIUPAC_M(self): + bioseq = Bioseq() + expNucl1 = 'C' + expNucl2 = 'A' + obsNucl = bioseq.getATGCNFromIUPAC('M') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) + + + def test_getATGCNFromIUPAC_K(self): + bioseq = Bioseq() + expNucl1 = 'T' + expNucl2 = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('K') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) + + + def test_getATGCNFromIUPAC_W(self): + bioseq = Bioseq() + expNucl1 = 'T' + expNucl2 = 'A' + obsNucl = bioseq.getATGCNFromIUPAC('W') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) + + + def test_getATGCNFromIUPAC_S(self): + bioseq = Bioseq() + expNucl1 = 'C' + expNucl2 = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('S') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl) + + + def test_getATGCNFromIUPAC_B(self): + bioseq = Bioseq() + expNucl1 = 'C' + expNucl2 = 'T' + expNucl3 = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('B') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) + + + def test_getATGCNFromIUPAC_D(self): + bioseq = Bioseq() + expNucl1 = 'A' + expNucl2 = 'T' + expNucl3 = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('D') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) + + + def test_getATGCNFromIUPAC_H(self): + bioseq = Bioseq() + expNucl1 = 'C' + expNucl2 = 'T' + expNucl3 = 'A' + obsNucl = bioseq.getATGCNFromIUPAC('H') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) + + + def test_getATGCNFromIUPAC_V(self): + bioseq = Bioseq() + expNucl1 = 'C' + expNucl2 = 'A' + expNucl3 = 'G' + obsNucl = bioseq.getATGCNFromIUPAC('V') + self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl) + + + def test_getATGCNFromIUPAC_Z(self): + bioseq = Bioseq() + expNucl = 'N' + obsNucl = bioseq.getATGCNFromIUPAC('Z') + self.assertEquals(expNucl, obsNucl) + + + def test_partialIUPAC(self): + bioseq = Bioseq() + bioseq.sequence = "ATGCNRATGCN" + expSequence1 = "ATGCNAATGCN" + expSequence2 = "ATGCNGATGCN" + bioseq.partialIUPAC() + obsSequence = bioseq.sequence + self.assertTrue(expSequence1 == obsSequence or expSequence2 == obsSequence) + + + def test_checkEOF(self): + bioseq = Bioseq() + bioseq.sequence = "ATGCNRATGCN\rATGCAAT\rTATA\r" + bioseq.checkEOF() + obsSequence = bioseq.sequence + expSequence = "ATGCNRATGCNATGCAATTATA" + + self.assertEquals(expSequence, obsSequence) + + + def test_getLMapWhithoutGap(self): + iBioseq = Bioseq() + iBioseq.header = "header" + iBioseq.sequence = "ATGC-RA-GCT" + obsLMap = iBioseq.getLMapWhithoutGap() + expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )] + + self.assertEquals(expLMap, obsLMap) + + + def test_getLMapWhithoutGap_seqStartsWithGap(self): + iBioseq = Bioseq() + iBioseq.header = "header" + iBioseq.sequence = "-TGC-RA-GCT" + obsLMap = iBioseq.getLMapWhithoutGap() + expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )] + + self.assertEquals(expLMap, obsLMap) + + + def test_getLMapWhithoutGap_seqEndsWithGap(self): + iBioseq = Bioseq() + iBioseq.header = "header" + iBioseq.sequence = "ATGC-RA-GC-" + obsLMap = iBioseq.getLMapWhithoutGap() + expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )] + + self.assertEquals(expLMap, obsLMap) + + def test_getGCpercentage_onlyATGC( self ): + iBs = Bioseq( "seq", "TGCAGCT" ) + exp = 100 * 4 / 7.0 + obs = iBs.getGCpercentage() + self.assertEqual( exp, obs ) + + def test_getGCpercentageInSequenceWithoutCountNInLength( self ): + iBs = Bioseq( "seq", "TGCAGCTNNNNN" ) + exp = 100 * 4 / 7.0 + obs = iBs.getGCpercentageInSequenceWithoutCountNInLength() + self.assertEqual( exp, obs ) + + def test_get5PrimeFlank(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 7 + obsFlank = bs.get5PrimeFlank(position, 3) + expFlank = "TTT" + self.assertEquals(expFlank, obsFlank) + + def test_get5PrimeFlank_flank_length_truncated(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 7 + obsFlank = bs.get5PrimeFlank(position, 15) + expFlank = "AACTTT" + self.assertEquals(expFlank, obsFlank) + + def test_get5PrimeFlank_flank_of_first_base(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 1 + obsFlank = bs.get5PrimeFlank(position, 15) + expFlank = "" + self.assertEquals(expFlank, obsFlank) + + def test_get3PrimeFlank(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 7 + obsFlank = bs.get3PrimeFlank(position, 3) + expFlank = "CAG" + self.assertEquals(expFlank, obsFlank) + + def test_get3PrimeFlank_flank_length_truncated(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 7 + obsFlank = bs.get3PrimeFlank(position, 15) + expFlank = "CAGAA" + self.assertEquals(expFlank, obsFlank) + + def test_get3PrimeFlank_flank_of_last_base(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 12 + obsFlank = bs.get3PrimeFlank(position, 15) + expFlank = "" + self.assertEquals(expFlank, obsFlank) + + def test_get3PrimeFlank_polymLength_different_of_1(self): + bs = Bioseq( "line1", "AACTTTCCAGAA" ) + position = 7 + obsFlank = bs.get3PrimeFlank(position, 3, 2) + expFlank = "AGA" + self.assertEquals(expFlank, obsFlank) + +test_suite = unittest.TestSuite() +test_suite.addTest( unittest.makeSuite( Test_Bioseq ) ) +if __name__ == "__main__": + unittest.TextTestRunner(verbosity=2).run( test_suite )