diff commons/core/seq/test/Test_AlignedBioseqDB.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_AlignedBioseqDB.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,773 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import os
+import time
+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
+from commons.core.seq.Bioseq import Bioseq
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.coord.Align import Align
+from commons.core.coord.Range import Range
+from commons.core.stat.Stat import Stat
+
+
+class Test_AlignedBioseqDB( unittest.TestCase ):
+    
+    def setUp( self ):
+        self._i = AlignedBioseqDB()
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+        
+        
+    def tearDown( self ):
+        self._i = None
+        self._uniqId = ""
+        
+        
+    def test_getLength(self):
+        iAlignedBioseqDB = AlignedBioseqDB()
+
+        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iAlignedBioseqDB.setData([iBioseq1])
+        
+        expLenght = 29
+        obsLength = iAlignedBioseqDB.getLength() 
+
+        self.assertEquals(expLenght, obsLength)
+        
+        
+    def test_getSeqLengthWithoutGaps( self ):
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.add( Bioseq( "seq3",
+                                      "AGCG-GACGATGCAGCAT--GCGAATGA--CGAT" ) )
+        expLenght = 29
+        obsLength = iAlignedBioseqDB.getSeqLengthWithoutGaps( "seq3" )
+        
+        self.assertEquals(expLenght, obsLength)
+        
+        
+    def test_getListOccPerSite(self):
+        iBioseq1 = Bioseq( "seq1", "AGAAA")
+        iBioseq2 = Bioseq( "seq2", "TCAAG")
+        iBioseq3 = Bioseq( "seq3", "GGTAC")
+        iBioseq4 = Bioseq( "seq4", "CCTTA")
+        
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+
+        expList = [
+                
+                {"A":1, "T":1, "G":1, "C":1},
+
+                {"G":2, "C":2},
+                   
+                {"A":2, "T":2 },
+                
+                {"A":3, "T":1 },   
+                
+                {"A":2, "G":1, "C":1}
+            ]
+                
+        obsList = iAlignedBioseqDB.getListOccPerSite()
+       
+        self.assertEquals(expList, obsList)
+        
+        
+    def test_getListOccPerSite_with_none_sequence(self):
+        iBioseq1 = Bioseq( "seq1", "AGAAA")
+        iBioseq2 = Bioseq( "seq2", "TCAAG")
+        iBioseq3 = Bioseq( "seq3", "GGTAC")
+        iBioseq4 = Bioseq( "seq4", None)
+        iBioseq5 = Bioseq( "seq5", "CCTTA")
+        
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5])
+
+        expList = [
+                {"A":1, "T":1, "G":1},
+                {"G":2, "C":1},
+                {"A":2, "T":1 },
+                {"A":3},   
+                {"A":1, "G":1, "C":1},
+            ]
+        
+        obsList = iAlignedBioseqDB.getListOccPerSite()
+        
+        self.assertEquals(expList, obsList)
+        
+        
+    def test_getListOccPerSite_on_three_sequence(self):
+        iBioseq1 = Bioseq( "seq1", "AGAAA")
+        iBioseq2 = Bioseq( "seq2", "TCAAG")
+        iBioseq3 = Bioseq( "seq3", "GGTAC")
+        
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+
+        expList = [
+                {"A":1, "T":1, "G":1},
+                {"G":2, "C":1},
+                {"A":2, "T":1 },
+                {"A":3},   
+                {"A":1, "G":1, "C":1},
+            ]
+        
+        obsList = iAlignedBioseqDB.getListOccPerSite()
+        
+        self.assertEquals(expList, obsList)
+        
+        
+    def test_getConsensus_with_minNbNt_greater_than_nbInSeq(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "TGCA")
+        iBioseq3 = Bioseq("seq3", "TACT")
+        
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+        
+        expConsensus = Bioseq("consensus= length=4 nbAlign=3","TGCT")
+        obsConsensus = iAlignedBioseqDB.getConsensus(5) 
+
+        self.assertEquals(expConsensus, obsConsensus)
+        
+        
+    def test_getConsensus_with_minPropNt_greater_than_1(self):
+        isSysExitRaised = False
+        
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "TGCA")
+        iBioseq3 = Bioseq("seq3", "TACT")
+        
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+        
+        expFileName = "expFileName"
+        expFileHandler = open(expFileName,"w")
+        expMessage = "ERROR: minPropNt=2.00 should be a proportion (below 1.0)\n"
+        expFileHandler.write(expMessage)
+        expFileHandler.close()
+        
+        obsFileName = "obsFileName"
+        obsFileHandler = open(obsFileName,"w")
+        
+        stdoutRef = sys.stdout
+        sys.stdout = obsFileHandler
+        
+        try:
+            iAlignedBioseqDB.getConsensus(2,2)
+        except SystemExit:
+            isSysExitRaised = True
+        
+        obsFileHandler.close()
+        expFileHandler.close()
+
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+        self.assertTrue( isSysExitRaised )
+        
+        sys.stdout = stdoutRef
+        os.remove ( obsFileName )
+        os.remove ( expFileName )
+        stdoutRef = sys.stdout
+        
+        
+    def test_getConsensus_with_AlignBioseqInstance_size_1(self):
+        isSysExitRaised = False
+        
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1])
+        
+        expFileName = "expFileName"
+        expFileHandler = open(expFileName,"w")
+        expMessage = "ERROR: can't make a consensus with less than 2 sequences\n"
+        expFileHandler.write(expMessage)
+        expFileHandler.close()
+        
+        obsFileName = "obsFileName"
+        obsFileHandler = open(obsFileName,"w")
+        
+        stdoutRef = sys.stdout
+        sys.stdout = obsFileHandler
+        
+        try:
+            iAlignedBioseqDB.getConsensus(4)
+        except SystemExit:
+            isSysExitRaised = True
+        
+        obsFileHandler.close()
+        expFileHandler.close()
+
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+        self.assertTrue( isSysExitRaised )
+        
+        sys.stdout = stdoutRef
+        os.remove ( obsFileName )
+        os.remove ( expFileName )
+        stdoutRef = sys.stdout
+        
+        
+    def test_getConsensus_with_gap_assertion_on_warning_msg(self):
+        iBioseq1 = Bioseq("seq1", "A-GA-T")
+        iBioseq2 = Bioseq("seq2", "T-GC-A")
+        iBioseq3 = Bioseq("seq3", "T-AC-T")
+                          
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+                          
+        expFileName = "expFileName"
+        expFileHandler = open(expFileName,"w")
+        expMessage = "WARNING: 2 sites were removed (33.33%)\n"
+        expFileHandler.write(expMessage)
+        expFileHandler.close()
+        
+        obsFileName = "obsFileName"
+        obsFileHandler = open(obsFileName,"w")
+        
+        stdoutRef = sys.stdout
+        sys.stdout = obsFileHandler
+       
+        iAlignedBioseqDB.getConsensus(2)
+        
+        obsFileHandler.close()
+        expFileHandler.close()
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+        
+        sys.stdout = stdoutRef
+        os.remove ( obsFileName )
+        os.remove ( expFileName )
+        stdoutRef = sys.stdout
+        
+        
+    def test_getConsensus_with_gap_assertion_on_result(self):
+        iBioseq1 = Bioseq("seq1", "A-GA-T")
+        iBioseq2 = Bioseq("seq2", "T-GC-A")
+        iBioseq3 = Bioseq("seq3", "T-AC-T")
+                          
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+        
+        expConsensus = Bioseq("consensus= length=4 nbAlign=3","TGCT")
+        obsConsensus = iAlignedBioseqDB.getConsensus(2) 
+
+        self.assertEquals(expConsensus, obsConsensus)
+        
+        
+    def test_getConsensus_with_gaps_and_no_consensus_built_assertion_on_result(self):
+        iBioseq1 = Bioseq("seq1", "A--A-T")
+        iBioseq2 = Bioseq("seq2", "----A-")
+        iBioseq3 = Bioseq("seq3", "--A---")
+                          
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+        
+        expConsensus = None
+        obsConsensus = iAlignedBioseqDB.getConsensus(2)
+        self.assertEquals(expConsensus, obsConsensus)
+        
+        
+    def test_getConsensus_with_gaps_and_no_consensus_built_assertion_on_warning_messages(self):
+        iBioseq1 = Bioseq("seq1", "A--A-T")
+        iBioseq2 = Bioseq("seq2", "----A-")
+        iBioseq3 = Bioseq("seq3", "--A---")
+                          
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+
+        expFileName = "expFileName"
+        expFileHandler = open(expFileName,"w")
+        expMessage1 = "WARNING: 1 site was removed (16.67%)\n"
+        expMessage2 = "WARNING: no consensus can be built (no sequence left)\n"
+        expFileHandler.write(expMessage1)
+        expFileHandler.write(expMessage2)
+        expFileHandler.close()
+        
+        obsFileName = "obsFileName"
+        obsFileHandler = open(obsFileName,"w")
+        
+        stdoutRef = sys.stdout
+        sys.stdout = obsFileHandler
+       
+        iAlignedBioseqDB.getConsensus(2)
+        
+        obsFileHandler.close()
+        expFileHandler.close()
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+        
+        sys.stdout = stdoutRef
+        os.remove ( obsFileName )
+        os.remove ( expFileName )
+        stdoutRef = sys.stdout
+        
+        
+    def test_getConsensus_with_unacceptable_N_proportion_assertion_on_result(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "CCCA")
+        iBioseq3 = Bioseq("seq3", "TTCT")
+        iBioseq4 = Bioseq("seq4", "TTAT")
+            
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expConsensus = None
+        obsConsensus = iAlignedBioseqDB.getConsensus(2,0.75)
+         
+        self.assertEquals(expConsensus, obsConsensus) 
+        
+        
+    def test_getConsensus_with_unacceptable_N_proportion_assertion_on_warning_msg(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "CCCA")
+        iBioseq3 = Bioseq("seq3", "TTCT")
+        iBioseq4 = Bioseq("seq4", "TTAT")
+            
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expFileName = "expFileName"
+        expFileHandler = open(expFileName,"w")
+        expMessage = "WARNING: no consensus can be built (75% of N's >= 40%)\n"
+        expFileHandler.write(expMessage)
+        expFileHandler.close()
+        
+        obsFileName = "obsFileName"
+        obsFileHandler = open(obsFileName,"w")
+        
+        stdoutRef = sys.stdout
+        sys.stdout = obsFileHandler
+       
+        iAlignedBioseqDB.getConsensus(2,0.75)
+   
+        obsFileHandler.close()
+        expFileHandler.close()
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+        
+        sys.stdout = stdoutRef
+        os.remove ( obsFileName )
+        os.remove ( expFileName )
+        stdoutRef = sys.stdout
+        
+        
+    def test_getConsensus_with_acceptable_N_proportion_assertion_on_warning_msg(self):
+        iBioseq1 = Bioseq("seq1", "AGATAA")
+        iBioseq2 = Bioseq("seq2", "CTCAAA")
+        iBioseq3 = Bioseq("seq3", "TTCTAA")
+        iBioseq4 = Bioseq("seq4", "GTATCC")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expFileName = "expFileName"
+        expFileHandler = open(expFileName,"w")
+        expMessage = "WARNING: 33% of N's\n"
+        expFileHandler.write(expMessage)
+        expFileHandler.close()
+        
+        obsFileName = "obsFileName"
+        obsFileHandler = open(obsFileName,"w")
+        
+        stdoutRef = sys.stdout
+        sys.stdout = obsFileHandler
+        
+        iAlignedBioseqDB.getConsensus(2,0.6)
+        
+        obsFileHandler.close()
+        expFileHandler.close()
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+        
+        sys.stdout = stdoutRef
+        os.remove ( obsFileName )
+        os.remove ( expFileName )
+        stdoutRef = sys.stdout
+        
+        
+    def test_getConsensus_with_acceptable_N_proportion_assertion_on_result(self):
+        iBioseq1 = Bioseq("seq1", "AGATAA")
+        iBioseq2 = Bioseq("seq2", "CTCAAA")
+        iBioseq3 = Bioseq("seq3", "TTCTAA")
+        iBioseq4 = Bioseq("seq4", "GTATCC")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expConsensus = Bioseq("consensus= length=6 nbAlign=4","NTNTAA")
+        obsConsensus = iAlignedBioseqDB.getConsensus(2,0.6)
+         
+        self.assertEquals(expConsensus, obsConsensus)
+        
+        
+    def test_getConsensus_with_chimeric_seq_in_alignment(self):
+        iBioseq1 = Bioseq("seq1", "AGAGTTGTAA")
+        iBioseq2 = Bioseq("seq2", "ATC----AAA")
+        iBioseq3 = Bioseq("seq3", "ATC----TAA")
+        iBioseq4 = Bioseq("seq4", "GTC----TAA")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expConsensus = Bioseq("consensus= length=6 nbAlign=4","ATCTAA")
+        obsConsensus = iAlignedBioseqDB.getConsensus(2)
+         
+        self.assertEquals(expConsensus, obsConsensus) 
+        
+
+    def test_getConsensus_with_SATannot_data(self):
+        iBioseq1 = Bioseq("MbS69Gr8Cl511 chunk21 {Fragment} 165740..165916", "--TTTAGCCGAAGTCCATATGAGTCTTTGTGTTTGTATCTTCTAACAAGGAAACACTACTTAGGCTTTTAGGATAAGATTGCGGTTTAAGTTCTTATACTCAATCATACACATGACATCAAGTCATATTCGAATCCAAAACTCTAAGCAAGCTTCTTCTTGCTTCTCAAA-GCTTTGATG")
+        iBioseq2 = Bioseq("MbS68Gr8Cl511 chunk21 {Fragment} 165916..166092", "GGTCTAGCCGAAGTCCATATGAGTCTTTGTCTTTGTATCTTCTAACAAGAAAACACTACTTAGGCTTTTAGGATAAGGTTGCAGTTTAAGTTTTTATACTAAATCATACACATCACATCAAGTCATATTCGACTCCCAAACACTAACCAAGCTTCTT--TGCTTCTCAAC-GCTTTGATG")
+        iBioseq3 = Bioseq("MbS67Gr8Cl511 chunk21 {Fragment} 166093..166269", "-GTTTAGCCGAAGTCCATATGAGTCGTTGTGTTTGTATCTTCTAACAAGGAAACACTACTTACGCTTTTAGGATAAGATTGTTGTTTAAGTTCTTATACTTAATCATACACATGACATAAAGTCATATTCGACTCCAAAACACTAATCAAGCTTCTTCTTGCTTCTCAAA-GCTTTGTT-")
+        iBioseq4 = Bioseq("MbS66Gr8Cl511 chunk21 {Fragment} 173353..173529", "--TTTAGCAAAATTCTATATGAGTCTTTATCTTTGTATCTTCTAACAAGGAAACACTACTTAGGCTTTTAGGATAAGGTTGCGGGTTAAGTTCTTATACTCAATCATACACATGATATCAAGTCATATTCGACTCCAAAACACTAACCAAGCTTCTTCTTGCTTCTTAAAAGCTTTGAA-")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expConsensus = Bioseq("consensus= length=178 nbAlign=4 pile=511 pyramid=8", "GTTTAGCCGAAGTCCATATGAGTCTTTGTNTTTGTATCTTCTAACAAGGAAACACTACTTAGGCTTTTAGGATAAGNTTGCNGTTTAAGTTCTTATACTNAATCATACACATGACATCAAGTCATATTCGACTCCAAAACACTAANCAAGCTTCTTCTTGCTTCTCAAAGCTTTGATG")
+        obsConsensus = iAlignedBioseqDB.getConsensus(2, 0.6, isHeaderSAtannot=True)
+         
+        self.assertEquals(expConsensus, obsConsensus) 
+        
+        
+    def test_getEntropy_equal_zero(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "AGAT")
+        iBioseq3 = Bioseq("seq3", "AGAT")
+        iBioseq4 = Bioseq("seq4", "AGAT")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expStats = Stat()
+        expStats._min = 0
+        expStats._max = 0
+        expStats._sum = 0
+        expStats._sumOfSquares = 0
+        expStats._n = 4
+        expStats._lValues = [0, 0, 0, 0]
+        
+        obsStats = iAlignedBioseqDB.getEntropy()
+        
+        self.assertEquals(expStats, obsStats) 
+        
+        
+    def test_getEntropy_different_nucl(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "CTCA")
+        iBioseq3 = Bioseq("seq3", "TTCT")
+        iBioseq4 = Bioseq("seq4", "GTAT")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expStats = Stat()
+        expStats._min = 0.81127812445913283
+        expStats._max = 2.0
+        expStats._sum = 4.62255624892
+        expStats._sumOfSquares = 6.31634439045
+        expStats._n = 4
+        expStats._lValues = [2.0, 0.81127812445913283, 1.0, 0.81127812445913283]
+        
+        obsStats = iAlignedBioseqDB.getEntropy()
+        
+        self.assertEquals(expStats, obsStats)
+        
+        
+    def test_getEntropy_different_nucl_with_N(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "CNCA")
+        iBioseq3 = Bioseq("seq3", "TTNT")
+        iBioseq4 = Bioseq("seq4", "GTNT")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expStats = Stat()
+        expStats._min = 0.811278124459
+        expStats._max = 2
+        expStats._sum = 6.11387090595
+        expStats._sumOfSquares = 10.1629200457
+        expStats._n = 4
+        expStats._lValues = [2, 1.4913146570363986, 1.8112781244591329, 0.81127812445913283]
+        
+        obsStats = iAlignedBioseqDB.getEntropy()
+        
+        self.assertEquals(expStats, obsStats)
+        
+        
+    def test_getEntropy_different_nucl_with_gap(self):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "C-CA")
+        iBioseq3 = Bioseq("seq3", "T--T")
+        iBioseq4 = Bioseq("seq4", "-TNT")
+                                   
+        iAlignedBioseqDB = AlignedBioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])
+        
+        expStats = Stat()
+        expStats._min = 0.811278124459
+        expStats._max = 1.65002242165
+        expStats._sum = 5.04626304683
+        expStats._sumOfSquares = 6.89285231586
+        expStats._n = 4
+        expStats._lValues = [1.5849625007211561, 1.0, 1.6500224216483541, 0.81127812445913283]
+        
+        obsStats = iAlignedBioseqDB.getEntropy()
+        
+        self.assertEquals(expStats, obsStats)
+        
+        
+    def test_saveAsBinaryMatrix( self ):
+        iBioseq1 = Bioseq("seq1", "AGAT")
+        iBioseq2 = Bioseq("seq2", "C-CA")
+        iBioseq3 = Bioseq("seq3", "T--T")
+        iBioseq4 = Bioseq("seq4", "-TNT")
+        
+        self._i.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] )
+        
+        expFile = "dummyExpFile_%s" % ( self._uniqId )
+        expFileHandler = open( expFile, "w" )
+        expFileHandler.write( "seq1\t1\t1\t1\t1\n" )
+        expFileHandler.write( "seq2\t1\t0\t1\t1\n" )
+        expFileHandler.write( "seq3\t1\t0\t0\t1\n" )
+        expFileHandler.write( "seq4\t0\t1\t1\t1\n" )
+        expFileHandler.close()
+        
+        obsFile = "dummyObsFile_%s" % ( self._uniqId )
+        self._i.saveAsBinaryMatrix( obsFile )
+        
+        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
+        for f in [ expFile, obsFile ]:
+            os.remove( f )
+            
+            
+    def test_getAlignList( self ):
+        iBioseq1 = Bioseq( "seq1", "AGAAT" )
+        iBioseq2 = Bioseq( "seq2", "-G-AC" )
+        self._i.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iAlign1 = Align( Range( "seq1", 2, 2 ),
+                         Range( "seq2", 1, 1 ),
+                         0, 1, 100 )
+        iAlign2 = Align( Range( "seq1", 4, 5 ),
+                         Range( "seq2", 2, 3 ),
+                         0, 1, 50 )
+        lExp = [ iAlign1, iAlign2 ]
+        
+        lObs = self._i.getAlignList( "seq1", "seq2" )
+        self.assertEquals( lExp, lObs )
+    
+    
+    def test_removeGaps(self):
+        iBioseq1 = Bioseq( "seq1", "AGAAT-" )
+        iBioseq2 = Bioseq( "seq2", "AG-ACG" )
+        self._i.setData( [ iBioseq1, iBioseq2 ] )
+        
+        exp = AlignedBioseqDB()
+        exp.setData( [ Bioseq( "seq1", "AGAAT" ), 
+                      Bioseq( "seq2", "AGACG" ) ] )
+        
+        self._i.removeGaps()
+        
+        self.assertEquals(exp, self._i)
+    
+    def test_computeMeanPcentIdentity_between_two_sequences_50pcent(self): 
+        iBioseq1 = Bioseq( "seq1", "AGAAT-" )
+        iBioseq2 = Bioseq( "seq2", "AG-ACG" )
+        self._i.setData( [ iBioseq1, iBioseq2 ] )
+        expIdentity = 50.0
+        obsIdentity = self._i.computeMeanPcentIdentity()
+        self.assertEquals(expIdentity, obsIdentity)
+  
+    def test_computeMeanPcentIdentity_between_two_sequences_57pcent(self): 
+        iBioseq1 = Bioseq( "seq1", "AGAAT-T" )
+        iBioseq2 = Bioseq( "seq2", "AG-ACGT" )
+        self._i.setData( [ iBioseq1, iBioseq2 ] )
+        expIdentity = 57.0
+        obsIdentity = self._i.computeMeanPcentIdentity()
+        self.assertEquals(expIdentity, obsIdentity)
+        
+    def test_compueNeamPcentIdentity_between_two_sequences_gaps_on_same_position(self):
+        iBioseq1 = Bioseq( "seq1", "AGAAT-T" )
+        iBioseq2 = Bioseq( "seq2", "AG-AC-T" )
+        self._i.setData( [ iBioseq1, iBioseq2 ] )
+        expIdentity = 57.0
+        obsIdentity = self._i.computeMeanPcentIdentity()
+        self.assertEquals(expIdentity, obsIdentity)
+        
+    def test_computeMeanPcentIdentity_between_three_sequences_50_pcent(self):
+        iBioseqRef = Bioseq( "seqRef", "AGAAT-" )
+        iBioseq1 = Bioseq( "seq1", "AG-ACG" )
+        iBioseq2 = Bioseq( "seq2", "AG-ACG" )
+        
+        self._i.setData( [ iBioseqRef, iBioseq1, iBioseq2 ] )
+        
+        expIdentity = 50.0
+        obsIdentity = self._i.computeMeanPcentIdentity()
+        self.assertEquals(expIdentity, obsIdentity)
+        
+    def test_computeMeanPcentIdentity_between_three_sequences_42_pcent(self):
+        iBioseqRef = Bioseq( "seqRef", "AGAAT-" )
+        iBioseq1 = Bioseq( "seq1", "AG-ACG" )
+        iBioseq2 = Bioseq( "seq2", "AG-CC-" )
+
+        self._i.setData( [ iBioseqRef, iBioseq1, iBioseq2 ] )
+        
+        expIdentity = 42.0
+        obsIdentity = self._i.computeMeanPcentIdentity()
+        self.assertEquals(expIdentity, obsIdentity)
+        
+#TODO: test with tthis data:
+#>BlastclustCluster2Mb1_chunk7 (dbseq-nr 1) [101523,104351]
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#-------------------------------TAAATCCAACACTGAGAAGAATTATTTAA
+#AGAAGGTTTTTATTTAACTTCTTTATTCGGATATCAGTTTAAGACTAAAA-TTCA-AATG
+#TAAATTGGATTGAGGAGAAGCCTCAGTATTTTAACAATATTTGTATTTCGGGAGGGCCTC
+#GCTTTTGGATTCTTAAGATTAGGAAAAGATTCAAAGAGAGCAGTCAAATCTGCTTATGTC
+#AGGCTTTAGGATTTTTACAAGAACGGCTAAGTGCCGCTGCCAAAGAATTCTTTATTAGAA
+#ACGGACGCTGCGCAGCTGGGATACATTATGGAAATAACTTAAGTGCATTACTGTTTTCTT
+#TGAAATAATTGAAGTGGCCGATTTGGACCACCCAAATACGTACCTCCCCTTCCCTTAAAG
+#TGAAACCTATACTTGGGCTGGGATTGATAGATATTGTATGGAAAATGGAGTTTGTTCTAT
+#TTCTATTTGTTGTGGTATGTTTTGATTTTTTCTTATTTTAAGTTTTGCATACAGCAACAT
+#AA--ATGGCATAA--ATTATACATA-TCTTAAATATAAGTACAATAAAATTAGTCA--AT
+#TATTACAATGGTCATTAGTATG-TAAAGTGTAAT-ATTGTTCTC-TGA-AATCATCTTAA
+#CATTGGCGTTGTGTTTTACAATTTTTATTTTTGTTATATTAAGTGGTGTGTCAAGTGGTG
+#TCAAATCTATTTCTGGTTTTAACATATTTTCACTTAAAATTATACTATTGATTTCTATTT
+#TGCATTGCATTACTCCTTTCATTTTGTTTTCTTCTATTTTTATATTATTAATTGAATTTG
+#GGTAATTTTGGTTATGAATGGTTTGGGTTAAGTTCCAGTAATTTGTAATAATTTAAGTTT
+#AGTAATTTTGGGTTAAACAAGTCAAGTCTGGAAAGCTGGCATAGCCATTCTAAATCTTCT
+#ACGTATTCCGTAAACTGCATTAGCTAGAACAAAAGTGGTTATTTTCATAGTTCTGTTTTT
+#TTGGTTTCTTAAGTAAT--TAAT--TTGTATGTTTGATCAAAAAGGGTAACTATTATTTT
+#TGTAAT--TAATAACTGATTGGACATTTTTCAACAATTTTCTGTCAATTAGCAT----AT
+#CGTAATTATTAGAA-AATACTATT--TCTGGGTAA-CATAATCAAGTCGTTCAAGGTAAT
+#AGGGCCATT-TAATGTTAA-AACTTCACTTCTACTATTTTGTATGGGAAGA--CAAAATA
+#TATTTTCAT--TGATCATATTAATTGTGGAAGCCATTTGTATGTGTACCCTTTGTATACT
+#ATTTTTAAGCATTCGTGTTGTCCGGGGCTGTTGACCGAAAATTTTCGTTTAGTTCAGAAT
+#TATTTTGATTTTGTTCATTATTAATGTGTACGGGTTGGACCTGTTGTTGCCCTTCATTAA
+#TGAACCCATGATTTTGATATGGATTGTATTGATTTTGGTCATAATACTGTTGTTCGTAGG
+#TTGGGTCTGAAATAATTAGGTACGTGCCACATTGGTGGGTTGTAAGTCGTCTGCTCTGAT
+#AGCTAGGTCTAAATGGTTGAATGTATTGATTAAAATTAGGCTGGAAGGGTTGGGAATATT
+#GTGGGTAACTTGGGCGAGTTTGAACGTTTGTATTGAATTTATGTGCTTTCGAATTCTGAT
+#TAAAATTTGAATTTTTATTACGGTATTCTGGGTTTATAAAATTCAAAATTAATGAGTTTT
+#TCATAAATTCCCTCATTTTGTGCAATGGTTATTAAGGATCTTAAGTCTGTAATA-TCGTG
+#ATGAG-ATAA-AATAGTAAATATAGTAG-AGTCTTTA--ATAGCCTGAATAAAAATAAGA
+#AAATCCGATTGGTTACCTTCCAGGTGTAGTTTCGAAATTAATAATTGACGTCGTCTTTCC
+#CCTTCTTCGGAGAATGTTTTTAGGTTTCCTCTGTATGGTGTCTCCCTGAAGCTCTCCAGA
+#AGTTTGTAGTTTGGTGTTTGAGTTTTAAATTCCGCGATGAGTCTTGCTTTGAGGGTAGGC
+#CAATGCTCGGAAGTCCCAAAGATCGTATTATCCGTCCAAGTTACTTTTGATGGCTCCCAG
+#TAGAATCCTCTGTTTACGAAAATTACGTAAATCCACTCTGCTGACGACGGTGTGAAGTGT
+#TTCTGGATCACCCTTAAATGTCATAATGTCTTTAAGCTGCCGACGGGCTTCGGCAAGGTT
+#GTTGTCTCTTAGCGCAACGATTTAAGGTGCGGCAATAATTATAATAATTGGTTGAGACAT
+#TTTTATTGTAAAATTTTAAATTTTGTGTTTTATTGTTTTATTGTTTCGAATTCAATGCTA
+#TTATTTATTTTTTTTTTTGTTTTCTAGTTTCACTTTTTTTTTTATTGTATTGCTTTATTG
+#TTCTTATTTTATTTTTATATGTTGGTTTTAAAACTTAGTTGCCTTTGGACTTAATGTTTT
+#TGTTTCGTATTTCACTTCCACTTTAAATTGGATAACAGAATTGGAATTAAAATCCAAGTT
+#GAAGAGTTTCCACGAATTTATTTGGGAAATGTTTCGAGCACTGGAATCCAGTGACTGGAT
+#TATAAAATTTAACTTATTTCC-ACTCGAAGGTTCTTTTTT--CGG--------ATACTTT
+#TTGTATCAGT--TGACTAAGAGCAACACTGAGAAGAATTATTTAAAGAAGGTTTTTATTT
+#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAA-TTCA-AATGTAAATTGGATTGAGG
+#AGAAGCCTCAGTATT--TCAACAATATTTGTATT--TCGGGAGGGCCTCGCTCTTGGAT-
+#-TCTTAAGATTAGGAAAAAATTCA-AAGAGAGCAGTCA-AATC-TGCTTATGTCAGGCTT
+#TAGGATTTTTACAAGAAGGGC-TAAGTGCCGCTGAAACGGATGC----------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------
+#>BlastclustCluster2Mb2_chunk7 (dbseq-nr 1) [99136,100579]
+#GTAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA
+#ATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATA
+#ATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATC
+#ATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA
+#ATCATAATAATCATAATAATCATAATAATCATAATAATAATAATAATCATAATCATAATC
+#ATAATAAGCGATAAAAAAATTAAAAAATAAAAATTAAAACCCACTGCAATCACGTTGGAC
+#GGCGAGTCACAGACGTCAGAATAGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCA
+#AGAAGGTTTTTATTGAACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATC
+#ATAAT---AATCATAATAATCATAATAATCATAATAATCATAATAAT-------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#-----------------------------------------------CATA-ATAATCAT
+#AATAAT--CATAATAATCATA-ATAATCATAATAATCATAATAATCATAATAATCATAAT
+#AATCATAATAATCATAATAATCATAA----TAATCATAATAATCATAATAATCATAATAA
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------TCATAA-TAATCATAATAATCGTAA---TAATCATAA----TAATCATAATAAT
+#CATAATAATCATAA-TAAT----CAT-----AATAATCAT-----AATAATCATAATAAT
+#CATAATAATCATAATAATCATAATAATCATAATAATCATAAT-AA-TCAT--AA--TAAT
+#-----CATAATAATCATAATAA--TCA----TAATAATC---AT---AATAATCATAATA
+#-AT---CATAATAATCATAATAATC-----------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#-----------------------------------ATAATAATCATAAT-AATCA-----
+#TAATAA------TCATAAT----AATCATAAT-AATCATAATAA-TCA-TAATAATCATA
+#ATAATCATAATAATCATAATAATAATAATAATCATAATCATAATCATAATAAGCATAAAA
+#AAAT--------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#------------------------------------------------------------
+#TAAAAAATAAAAATTAAAACCCACTGCAA---TCACGTTGGACGGCGAGTCACAGACGTC
+#A-GAAT-AGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCAAGAAGGTTTTTATTG
+#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATCATAAT---AATCATA
+#ATAA---TCA-TAATAATCAT-AATAATCATAATAATCATAA-----TAA-TCATA-ATA
+#ATCATAATAATCATAATAA--TCATAATA-ATCA-TAATAATCATAATAATCATAATCAT
+#CATAATAATCATAATAAT--CATAA-T-------AATC--ATAATAATCATAATAATCAT
+#AATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAAT
+#CATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAAT
+#AATCATAATAAT
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_AlignedBioseqDB ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )