Mercurial > repos > yufei-luo > s_mart
view smart_toolShed/commons/core/seq/test/Test_BioseqDB.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import unittest import os import time from commons.core.seq.BioseqDB import BioseqDB from commons.core.seq.Bioseq import Bioseq from commons.core.utils.FileUtils import FileUtils from commons.core.coord.Map import Map class Test_BioseqDB( unittest.TestCase ): def setUp( self ): self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() ) def tearDown( self ): if os._exists("dummyBioseqDB.fa"): os.remove("dummyBioseqDB.fa") def test__eq__(self): iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) self.assertEquals( expBioseqDB, obsBioseqDB ) def test__eq__instances_with_different_header(self): iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) self.assertNotEquals( expBioseqDB, obsBioseqDB ) def test__eq__instances_with_different_sequences(self): iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) self.assertNotEquals( expBioseqDB, obsBioseqDB ) def test__eq__instances_with_different_sequences_and_headers(self): iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) self.assertNotEquals( expBioseqDB, obsBioseqDB ) def test__eq__instances_with_different_sizeOfBioseq(self): iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3 ] ) self.assertNotEquals( expBioseqDB, obsBioseqDB ) def test_setName (self): expName = "myDataBank" iBioseqDB = BioseqDB() self.assertEquals (iBioseqDB.name, "") iBioseqDB.setName (expName) obsName = iBioseqDB.name self.assertEquals (expName, obsName) def test_read(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGC") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) faFN = "dummyFaFile.fa" faF = open( faFN, "w" ) faF.write(">consensus1\n") faF.write("GAGATGGCTCATGGAGTACCTGCCT\n") faF.write(">consensus2\n") faF.write("GAGATGGCTCATGGAGTACCGC\n") faF.close() faF = open( faFN, "r" ) obsBioseqDB = BioseqDB() obsBioseqDB.read( faF ) faF.close() os.remove( faFN ) self.assertEquals( expBioseqDB, obsBioseqDB ) def test_write(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expFaFileName = "dummyFaFile.fa" expFaFile = open( expFaFileName, "w" ) expFaFile.write(">consensus1\n") expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n") expFaFile.write("ATGGAGTACCTGCCT\n") expFaFile.write(">consensus2\n") expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n") expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n") expFaFile.close() obsFaFileName = "obsDummyFastaFile.fa" obsFaFile = open( obsFaFileName, "w" ) iBioseqDB.write( obsFaFile ) obsFaFile.close() self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) ) os.remove( expFaFileName ) os.remove( obsFaFileName ) def test_save(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expFaFileName = "dummyFaFile.fa" expFaFile = open( expFaFileName, "w" ) expFaFile.write(">consensus1\n") expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n") expFaFile.write("ATGGAGTACCTGCCT\n") expFaFile.write(">consensus2\n") expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n") expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n") expFaFile.close() obsFaFileName = "obsDummyFastaFile.fa" iBioseqDB.save( obsFaFileName ) self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) ) os.remove( expFaFileName ) os.remove( obsFaFileName ) def test_load(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) FaFileName = "dummyFaFile.fa" FaFile = open( FaFileName, "w" ) FaFile.write(">consensus1\n") FaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n") FaFile.write("ATGGAGTACCTGCCT\n") FaFile.write(">consensus2\n") FaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n") FaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n") FaFile.close() obsBioseqDB = BioseqDB() obsBioseqDB.load( FaFileName ) self.assertEquals( expBioseqDB, obsBioseqDB ) os.remove( FaFileName ) def test_reverse( self ): iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq1", "GTTA" ) iBioseq4 = Bioseq( "seq2", "TAAGC" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) obsBioseqDB.reverse() self.assertEquals( expBioseqDB, obsBioseqDB ) def test_complement( self ): iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq1", "TAAC" ) iBioseq4 = Bioseq( "seq2", "GCTTA" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) obsBioseqDB.complement() self.assertEquals( expBioseqDB, obsBioseqDB ) def test_reverseComplement( self ): iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq( "seq1", "CAAT" ) iBioseq4 = Bioseq( "seq2", "ATTCG" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) obsBioseqDB.reverseComplement() self.assertEquals( expBioseqDB, obsBioseqDB ) def test_setData(self): iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) iBioseq3 = Bioseq( "seq3", "CAAT" ) iBioseq4 = Bioseq( "seq4", "ATTCG" ) lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4] expBioseqDB = BioseqDB() expBioseqDB.db = lBioseq iBioseq5 = Bioseq( "seq1", "ATTG" ) iBioseq6 = Bioseq( "seq2", "CGAAT" ) iBioseq7 = Bioseq( "seq3", "CAAT" ) iBioseq8 = Bioseq( "seq4", "ATTCG" ) lBioseq2 = [iBioseq5, iBioseq6, iBioseq7, iBioseq8] obsBioseqDB = BioseqDB() obsBioseqDB.setData(lBioseq2) self.assertEquals(expBioseqDB, obsBioseqDB) def test_reset( self ): iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) iBioseq3 = Bioseq( "seq3", "CAAT" ) iBioseq4 = Bioseq( "seq4", "ATTCG" ) lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4] obsBioseqDB = BioseqDB() obsBioseqDB.setData(lBioseq) obsBioseqDB.reset() expBioseqDB = BioseqDB() self.assertEquals(expBioseqDB, obsBioseqDB) def testCleanGap(self): iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) expBioseqDB = BioseqDB() expBioseqDB.setData([iBioseq1, iBioseq2]) iBioseq3 = Bioseq( "seq1", "AT-----TG" ) iBioseq4 = Bioseq( "seq2", "CGAA----T" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) obsBioseqDB.cleanGap() self.assertEquals(expBioseqDB, obsBioseqDB) def testCleanGap_on_empty_db(self): expBioseqDB = BioseqDB() obsBioseqDB = BioseqDB() obsBioseqDB.cleanGap() self.assertEquals(expBioseqDB, obsBioseqDB) def testCleanGap_on_size_one_db(self): iBioseq1 = Bioseq( "seq1", "ATTG" ) expBioseqDB = BioseqDB() expBioseqDB.setData([iBioseq1]) iBioseq2 = Bioseq( "seq1", "AT-----TG" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData([iBioseq2]) obsBioseqDB.cleanGap() self.assertEquals(expBioseqDB, obsBioseqDB) def test_add_to_a_empty_bioseqDB_instance (self): sHeader = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" sHeader += "(At4g29080) mRNA, complete cds." expDictIdx = { sHeader : 0} sHeaderRenamed = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" sHeaderRenamed += "(At4g29080)_mRNA-_complete_cds." expDictIdxRenamed = {sHeaderRenamed : 0} iBioseq1 = Bioseq( sHeader, "ATTG" ) obsBioseqDB = BioseqDB() obsBioseqDB.add(iBioseq1) obsDictIdx = obsBioseqDB.idx obsDictIdxRenamed = obsBioseqDB.idx_renamed self.assertEquals(expDictIdx,obsDictIdx) self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed) def test_add_to_a_size_one_bioseqDB_instance (self): sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" sHeader1 += "(At4g29080) mRNA, complete cds." sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)" sHeader2 += "(At4g29080) mRNA, complete cds." expDictIdx = { sHeader1 : 0, sHeader2 : 1} sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds." sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds." expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1} iBioseq1 = Bioseq( sHeader1, "ATTG" ) iBioseq2 = Bioseq( sHeader2, "ATTG" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData([ iBioseq1]) obsBioseqDB.add(iBioseq2) obsDictIdx = obsBioseqDB.idx obsDictIdxRenamed = obsBioseqDB.idx_renamed self.assertEquals(expDictIdx,obsDictIdx) self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed) def test_add_to_a_size_two_bioseqDB_instance (self): sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" sHeader1 += "(At4g29080) mRNA, complete cds." sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)" sHeader2 += "(At4g29080) mRNA, complete cds." sHeader3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)" sHeader3 += "(At4g29080) mRNA, complete cds." expDictIdx = { sHeader1 : 0, sHeader2 : 1, sHeader3 : 2} sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds." sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds." sHeaderRenamed3 = "embl-AF332604-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" sHeaderRenamed3 += "(At4g29080)_mRNA-_complete_cds." expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1, sHeaderRenamed3 :2} iBioseq1 = Bioseq( sHeader1, "ATTG" ) iBioseq2 = Bioseq( sHeader2, "ATTG" ) iBioseq3 = Bioseq( sHeader3, "ATTG" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData([ iBioseq1, iBioseq2 ]) obsBioseqDB.add(iBioseq3) obsDictIdx = obsBioseqDB.idx obsDictIdxRenamed = obsBioseqDB.idx_renamed self.assertEquals(expDictIdx,obsDictIdx) self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed) def test__getitem__(self): iBioseq1 = Bioseq("seq1","ATTG") iBioseq2 = Bioseq("seq2","CGAAT") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expBioseq = Bioseq("seq2","CGAAT") obsBioseq = iBioseqDB[1] self.assertEquals(expBioseq, obsBioseq) def test_getSize(self): expSize = 4 iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) iBioseq3 = Bioseq( "seq3", "AT-----TG" ) iBioseq4 = Bioseq( "seq4", "CGAA----T" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [iBioseq1, iBioseq2 , iBioseq3, iBioseq4 ] ) obsSize = obsBioseqDB.getSize() self.assertEquals(expSize,obsSize) def test_getSize_emptyDB(self): expSize = 0 obsBioseqDB = BioseqDB() obsSize = obsBioseqDB.getSize() self.assertEquals(expSize,obsSize) def test_getLength(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expLength = 163 obsLength = iBioseqDB.getLength() self.assertEquals( expLength, obsLength) def test_getListOfSequencesLength(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) lLength = iBioseqDB.getListOfSequencesLength() expLLengh = [75, 88] self.assertEquals( expLLengh, lLength ) def test_getHeaderList( self ): lExpHeader = ["seq1", "seq2"] iBioseq1 = Bioseq( "seq1", "ATTG" ) iBioseq2 = Bioseq( "seq2", "CGAAT" ) obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) lObsHeader = obsBioseqDB.getHeaderList() self.assertEquals( lExpHeader, lObsHeader ) def test_getSequencesList( self ): lExpSeqs = ["ATGC", "AATTCCGG"] iBioseq1 = Bioseq("seq1", "ATGC") iBioseq2 = Bioseq("seq2", "AATTCCGG") obsBioseqDB = BioseqDB() obsBioseqDB.setData([iBioseq1, iBioseq2]) lObsSeqs = obsBioseqDB.getSequencesList() self.assertEquals(lExpSeqs, lObsSeqs) def test_fetch( self ): ibioseq1 = Bioseq( "seq1", "ATTG" ) ibioseq2 = Bioseq( "seq2", "CGAAT" ) iBioseqDB = BioseqDB() iBioseqDB.setData( [ ibioseq1, ibioseq2 ] ) expBioseq = ibioseq1 obsBioseq = iBioseqDB.fetch( "seq1" ) self.assertEquals( expBioseq, obsBioseq ) def test_getBioseqByRenamedHeader( self ): Header1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)" Header1 += "(At4g29080) mRNA, complete cds." Header2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)" Header2 += "(At4g29080) mRNA, complete cds." Header3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)" Header3 += "(At4g29080) mRNA, complete cds." HeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)" HeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds." ibioseq1 = Bioseq( Header1, "ATTG" ) ibioseq2 = Bioseq( Header2, "CGAAT" ) ibioseq3 = Bioseq( Header3, "TGCGAAT" ) iBioseqDB = BioseqDB() iBioseqDB.setData( [ ibioseq1, ibioseq2, ibioseq3 ] ) expBioseq = ibioseq2 obsBioseq = iBioseqDB.getBioseqByRenamedHeader( HeaderRenamed2 ) self.assertEquals( expBioseq, obsBioseq ) def test_init_with_the_parm_name( self ): iBioseq1 = Bioseq("seq1","ATTG") iBioseq2 = Bioseq("seq2","CGAAT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) fastaFilename = "dummyBioseqDB.fa" f = open(fastaFilename, "w") f.write(">seq1\n") f.write("ATTG\n") f.write(">seq2\n") f.write("CGAAT\n") f.close() obsBioseqDB = BioseqDB(fastaFilename) os.remove(fastaFilename) self.assertEquals( expBioseqDB, obsBioseqDB ) def test_countNt(self): iBioseq1 = Bioseq() iBioseq1.header = "seq1 description1" iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" iBioseq2 = Bioseq() iBioseq2.header = "seq2 description2" iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expCount = 6 obsCount = iBioseqDB.countNt('N') self.assertEquals(expCount, obsCount) def test_countNt_lowercase(self): iBioseq1 = Bioseq() iBioseq1.header = "seq1 description1" iBioseq1.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg" iBioseq2 = Bioseq() iBioseq2.header = "seq2 description2" iBioseq2.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg" iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expCount = 0 obsCount = iBioseqDB.countNt('N') self.assertEquals(expCount, obsCount) def test_countNt_withCharacterNotExisting(self): iBioseq1 = Bioseq() iBioseq1.header = "seq1 description1" iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" iBioseq2 = Bioseq() iBioseq2.header = "seq2 description2" iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) expCount = 0 obsCount = iBioseqDB.countNt('W') self.assertEquals(expCount, obsCount) def test_countAllNt(self): iBioseq1 = Bioseq() iBioseq1.header = "seq1 description1" iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" iBioseq2 = Bioseq() iBioseq2.header = "seq2 description2" iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) dExpCount = {'A': 68, 'C': 62, 'T': 86, 'G': 70, 'N': 6} dObsCount = iBioseqDB.countAllNt() self.assertEquals(dExpCount, dObsCount) def test_extractPart(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] ) iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT") expSubBioseqDB = BioseqDB() expSubBioseqDB.setData( [ iBioseq5, iBioseq6 ] ) obsSubBioseqDB = iBioseqDB.extractPart (1, 2) self.assertEquals(expSubBioseqDB, obsSubBioseqDB) def test_bestLength(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq5 = Bioseq("consensus5","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq7 = Bioseq("consensus7","TGCCTGATGGCTCATGGAGTACCTGCCT") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5, iBioseq6 , iBioseq7] ) iBioseq8 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") iBioseq9 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq10 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq8, iBioseq9, iBioseq10, iBioseq11] ) obsBioseqDB = iBioseqDB.bestLength (4) self.assertEquals(expBioseqDB, obsBioseqDB) def test_bestLength_with_a_none_sequence_include(self): iBioseq1 = Bioseq("consensus1", None) iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] ) iBioseq4 = Bioseq("consensus1", None) iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] ) obsBioseqDB = iBioseqDB.bestLength (3) self.assertEquals(expBioseqDB, obsBioseqDB) def test_bestLength_with_a_none_sequence_not_include(self): iBioseq1 = Bioseq("consensus1", None) iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] ) iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq5, iBioseq6] ) obsBioseqDB = iBioseqDB.bestLength (2) self.assertEquals(expBioseqDB, obsBioseqDB) def test_bestLength_number_of_bioseq_requiered_gt_BioseqDB_size(self): iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] ) iBioseq4 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] ) obsBioseqDB = iBioseqDB.bestLength (15) self.assertEquals(expBioseqDB, obsBioseqDB) def test_extractPatternOfFile(self): fastaFilename = "dummyBioseqDB.fa" f = open(fastaFilename, "w") f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n") f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n") f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n") f.close() iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC") iBioseq2 = Bioseq("consensus11","TGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq1, iBioseq2] ) obsBioseqDB = BioseqDB() obsBioseqDB.extractPatternOfFile("consensus1+" , fastaFilename) os.remove(fastaFilename) self.assertEquals(expBioseqDB, obsBioseqDB) def test_extractPatternOfFile_WithNoExistingPattern(self): fastaFilename = "dummyBioseqDB.fa" f = open(fastaFilename, "w") f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n") f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n") f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n") f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n") f.close() expBioseqDB = BioseqDB() obsBioseqDB = BioseqDB() obsBioseqDB.extractPatternOfFile("NoExistingPattern" , fastaFilename) os.remove(fastaFilename) self.assertEquals(expBioseqDB, obsBioseqDB) def test_getByPattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq5, iBioseq6] ) obsBioseqDB = iBioseqDB.getByPattern("consensus1+") self.assertEquals(expBioseqDB, obsBioseqDB) def test_getByPattern_with_no_existing_pattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) expBioseqDB = BioseqDB() obsBioseqDB = iBioseqDB.getByPattern("noExistingPattern+") self.assertEquals(expBioseqDB, obsBioseqDB) def test_getDiffFromPattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq5, iBioseq6] ) obsBioseqDB = iBioseqDB.getDiffFromPattern("consensus[4|6]") self.assertEquals(expBioseqDB, obsBioseqDB) def test_getDiffFromPattern_with_no_existing_pattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") iBioseqDB = BioseqDB() iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8] ) obsBioseqDB = iBioseqDB.getDiffFromPattern("noExistingPattern+") self.assertEquals(expBioseqDB, obsBioseqDB) def test_rmByPattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq5, iBioseq6 ] ) obsBioseqDB.rmByPattern("consensus1+") self.assertEquals(expBioseqDB, obsBioseqDB) def test_rmByPattern_with_no_existing_pattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8 ] ) obsBioseqDB.rmByPattern("noExistingPattern+") self.assertEquals(expBioseqDB, obsBioseqDB) def test_addBioseqFromABioseqDBIfHeaderContainPattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") inBioseqDB = BioseqDB() inBioseqDB.setData( [ iBioseq5, iBioseq6 ]) iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10, iBioseq11] ) obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB) self.assertEquals(expBioseqDB, obsBioseqDB) def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self): iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] ) iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") inBioseqDB = BioseqDB() inBioseqDB.setData( [ iBioseq5, iBioseq6 ]) iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC") iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT") iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG") iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] ) obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB) self.assertEquals(expBioseqDB, obsBioseqDB) def test_upCase (self): iBioseq1 = Bioseq("consensus4","atgacGatgca") iBioseq2 = Bioseq("consensus1","atgcgaT") obsBioseqDB = BioseqDB() obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] ) iBioseq3 = Bioseq("consensus4","ATGACGATGCA") iBioseq4 = Bioseq("consensus1","ATGCGAT") expBioseqDB = BioseqDB() expBioseqDB.setData( [ iBioseq3, iBioseq4 ] ) obsBioseqDB.upCase() self.assertEquals(expBioseqDB, obsBioseqDB) def test_getMap(self): iBioseq1 = Bioseq("header1","ATGC-RA-GCT") iBioseq2 = Bioseq("header2","-TGC-RA-GCT") iBioseq3 = Bioseq("header3","ATGC-RA-GC-") iAlignedBioseqDB = BioseqDB() iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps() expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )] expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )] expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )] expDict = { "header1": expLMap1, "header2": expLMap2, "header3": expLMap3 } self.assertEquals(expDict, obsDict) def test_getSeqLengthByListOfName(self): iBioseq1 = Bioseq("header1","ATGC-RA-GCT") iBioseq2 = Bioseq("header2","-TGC-RAR") iBioseq3 = Bioseq("header3","ATGC") iBioseqDB = BioseqDB() iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) expList = [11, 4] obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"]) self.assertEquals( expList, obsList ) test_suite = unittest.TestSuite() test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) ) if __name__ == "__main__": unittest.TextTestRunner(verbosity=2).run( test_suite )