Mercurial > repos > yufei-luo > s_mart
view smart_toolShed/commons/core/seq/test/Test_BioseqUtils.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import unittest import os from commons.core.seq.Bioseq import Bioseq from commons.core.seq.BioseqUtils import BioseqUtils from commons.core.utils.FileUtils import FileUtils class Test_BioseqUtils( unittest.TestCase ): def test_translateSequence_one_nt( self ): bioseq = Bioseq() bioseq.sequence = "G" BioseqUtils.translateSequence(bioseq, 1) expSequence = "" obsSequence = bioseq.sequence self.assertEqual(expSequence, obsSequence) def test_translateSequence_frame1( self ): bioseq = Bioseq() bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" BioseqUtils.translateSequence(bioseq, 1) expSequence = "XGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" obsSequence = bioseq.sequence self.assertEqual(expSequence, obsSequence) def test_translateSequence_frame2( self ): bioseq = Bioseq() bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" BioseqUtils.translateSequence(bioseq, 2) expSequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" obsSequence = bioseq.sequence self.assertEqual(expSequence, obsSequence) def test_translateSequence_frame3( self ): bioseq = Bioseq() bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" BioseqUtils.translateSequence(bioseq, 3) expSequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*" obsSequence = bioseq.sequence self.assertEqual(expSequence, obsSequence) def test_setFrameInfoOnHeader(self): bioseq = Bioseq() bioseq.header = "header1 description1 description2" BioseqUtils.setFrameInfoOnHeader(bioseq,1) expHeader = "header1_1 description1 description2" obsHeader = bioseq.header self.assertEquals(expHeader,obsHeader) def test_setFrameInfoOnHeader_header_without_space(self): bioseq = Bioseq() bioseq.header = "header" BioseqUtils.setFrameInfoOnHeader(bioseq,1) expHeader = "header_1" obsHeader = bioseq.header self.assertEquals(expHeader, obsHeader) def test_TranslateInAllFrame( self ): bioseq = Bioseq() bioseq.header = "header1" bioseq.sequence = "TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" bioseq1 = Bioseq() bioseq1.header = "header1_1" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header1_2" bioseq2.sequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" bioseq3 = Bioseq() bioseq3.header = "header1_3" bioseq3.sequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*" bioseq4 = Bioseq() bioseq4.header = "header1_4" bioseq4.sequence = "SSSTRIILLISRSHETPT*NHCDHKLIN*KP" bioseq5 = Bioseq() bioseq5.header = "header1_5" bioseq5.sequence = "QALLALYC*LVGATRHLREIIVIIN*STRSH" bioseq6 = Bioseq() bioseq6.header = "header1_6" bioseq6.sequence = "KLYSHYIVD*SEPRDTYVKSL*S*TDQLEAT" expLBioseq = [bioseq1, bioseq2, bioseq3, bioseq4, bioseq5, bioseq6] obsLBioseq = BioseqUtils.translateInAllFrame(bioseq) self.assertEquals(expLBioseq, obsLBioseq) def test_replaceStopCodonsByX( self ): bioseq = Bioseq() bioseq.sequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" BioseqUtils.replaceStopCodonsByX(bioseq) expSequence = "VASSXSVYDHNDFTXVSRGSDXSTIXCEXSL" obsSequence = bioseq.sequence self.assertEquals(expSequence, obsSequence) def test_translateBioseqListInAllFrames_with_empty_list( self ): lBioseq = [] obsLBioseq = BioseqUtils.translateBioseqListInAllFrames( lBioseq ) expLBioseq = [] self.assertEquals( expLBioseq, obsLBioseq ) def test_translateBioseqListInAllFrames_with_one_item( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" lBioseq = [bioseq1] obsLBioseq = BioseqUtils.translateBioseqListInAllFrames( lBioseq ) expBioseq1 = Bioseq() expBioseq1.header = "header1_1 description" expBioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" expBioseq2 = Bioseq() expBioseq2.header = "header1_2 description" expBioseq2.sequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" expBioseq3 = Bioseq() expBioseq3.header = "header1_3 description" expBioseq3.sequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*" expBioseq4 = Bioseq() expBioseq4.header = "header1_4 description" expBioseq4.sequence = "SSSTRIILLISRSHETPT*NHCDHKLIN*KP" expBioseq5 = Bioseq() expBioseq5.header = "header1_5 description" expBioseq5.sequence = "QALLALYC*LVGATRHLREIIVIIN*STRSH" expBioseq6 = Bioseq() expBioseq6.header = "header1_6 description" expBioseq6.sequence = "KLYSHYIVD*SEPRDTYVKSL*S*TDQLEAT" expLBioseq = [expBioseq1, expBioseq2, expBioseq3, expBioseq4, expBioseq5, expBioseq6] self.assertEquals( expLBioseq, obsLBioseq ) def test_translateBioseqListInAllFrames( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTACGACTAATCAACAATATAATGCGAGTAGAGCTTGA" lBioseq = [bioseq1, bioseq2] obsLBioseq = BioseqUtils.translateBioseqListInAllFrames( lBioseq ) expBioseq1 = Bioseq() expBioseq1.header = "header1_1 description" expBioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" expBioseq2 = Bioseq() expBioseq2.header = "header1_2 description" expBioseq2.sequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" expBioseq3 = Bioseq() expBioseq3.header = "header1_3 description" expBioseq3.sequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*" expBioseq4 = Bioseq() expBioseq4.header = "header1_4 description" expBioseq4.sequence = "SSSTRIILLISRSHETPT*NHCDHKLIN*KP" expBioseq5 = Bioseq() expBioseq5.header = "header1_5 description" expBioseq5.sequence = "QALLALYC*LVGATRHLREIIVIIN*STRSH" expBioseq6 = Bioseq() expBioseq6.header = "header1_6 description" expBioseq6.sequence = "KLYSHYIVD*SEPRDTYVKSL*S*TDQLEAT" expBioseq7 = Bioseq() expBioseq7.header = "header2_1" expBioseq7.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" expBioseq8 = Bioseq() expBioseq8.header = "header2_2" expBioseq8.sequence = "VASS*SVYDHNDFT*VSRGYD*STI*CE*SL" expBioseq9 = Bioseq() expBioseq9.header = "header2_3" expBioseq9.sequence = "WLLVDQFMITMISRRCLVATTNQQYNASRA*" expBioseq10 = Bioseq() expBioseq10.header = "header2_4" expBioseq10.sequence = "SSSTRIILLISRSHETPT*NHCDHKLIN*KP" expBioseq11 = Bioseq() expBioseq11.header = "header2_5" expBioseq11.sequence = "QALLALYC*LVVATRHLREIIVIIN*STRSH" expBioseq12 = Bioseq() expBioseq12.header = "header2_6" expBioseq12.sequence = "KLYSHYIVD*S*PRDTYVKSL*S*TDQLEAT" expLBioseq = [expBioseq1, expBioseq2, expBioseq3, expBioseq4, expBioseq5, expBioseq6, expBioseq7, expBioseq8, expBioseq9, expBioseq10, expBioseq11, expBioseq12] self.assertEquals( expLBioseq, obsLBioseq ) def test_replaceStopCodonsByXInBioseqList_empty_list( self ): lBioseq = [] obsLBioseq = BioseqUtils.replaceStopCodonsByXInBioseqList( lBioseq ) expLBioseq = [] self.assertEquals(obsLBioseq, expLBioseq) def test_replaceStopCodonsByXInBioseqList_without_stop_codon( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "CGFLISLSQFHVGVSWLRLINNIMRVEL" lBioseq = [bioseq1] obsLBioseq = BioseqUtils.replaceStopCodonsByXInBioseqList( lBioseq ) bioseq2 = Bioseq() bioseq2.header = "header1 description" bioseq2.sequence = "CGFLISLSQFHVGVSWLRLINNIMRVEL" expLBioseq = [bioseq2] self.assertEquals(obsLBioseq, expLBioseq) def test_replaceStopCodonsByXInBioseqList( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" lBioseq = [bioseq1, bioseq2] obsLBioseq = BioseqUtils.replaceStopCodonsByXInBioseqList( lBioseq ) bioseq3 = Bioseq() bioseq3.header = "header1 description" bioseq3.sequence = "CGFXLISLXSQXFHVGVSWLRLINNIMRVEL" bioseq4 = Bioseq() bioseq4.header = "header2" bioseq4.sequence = "VASSXSVYDHNDFTXVSRGSDXSTIXCEXSL" expLBioseq = [bioseq3, bioseq4] self.assertEquals(obsLBioseq, expLBioseq) def test_writeBioseqListIntoFastaFile(self): obsFileName = "dummyWrittenFastaFile.fa" bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTACGACTAATCAACAATATAATGCGAGTAGAGCTTGA" lBioseq = [bioseq1, bioseq2] BioseqUtils.writeBioseqListIntoFastaFile( lBioseq, obsFileName ) expFileName = "dummyFastaFile.fa" f = open(expFileName, "w") f.write(">header1 description\n") f.write("TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTC\n") f.write("CGACTAATCAACAATATAATGCGAGTAGAGCTTGA\n") f.write(">header2\n") f.write("TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTA\n") f.write("CGACTAATCAACAATATAATGCGAGTAGAGCTTGA\n") f.close() self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(expFileName) os.remove(obsFileName) def test_extractBioseqListFromFastaFile( self ): fileName = "dummyFastaFile.fa" f = open(fileName,"w") f.write(">header1_1 description1\n") f.write("CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL\n") f.write(">header1_2 description2\n") f.write("VASS*SVYDHNDFT*VSRGSD*STI*CE*SL\n") f.write(">header1_3 description3\n") f.write("CWLLVDQFMITMISRRCLVAPTNQQYNASRA*\n") f.close() bioseq1 = Bioseq() bioseq1.header = "header1_1 description1" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header1_2 description2" bioseq2.sequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL" bioseq3 = Bioseq() bioseq3.header = "header1_3 description3" bioseq3.sequence = "CWLLVDQFMITMISRRCLVAPTNQQYNASRA*" expLBioseq = [bioseq1, bioseq2, bioseq3] obsLBioseq = BioseqUtils.extractBioseqListFromFastaFile( fileName ) self.assertEquals(expLBioseq , obsLBioseq) os.remove( fileName ) def test_extractBioseqListFromFastaFile_empty_seq( self ): fileName = "dummyFastaFile.fa" f = open(fileName,"w") f.write(">header1_1 description1\n") f.close() bioseq1 = Bioseq() bioseq1.header = "header1_1 description1" bioseq1.sequence = "" expLBioseq = [bioseq1] obsLBioseq = BioseqUtils.extractBioseqListFromFastaFile( fileName ) self.assertEquals(expLBioseq , obsLBioseq) os.remove( fileName ) def test_extractBioseqListFromFastaFile_empty_file( self ): fileName = "dummyFastaFile.fa" f = open(fileName,"w") f.close() expLBioseq = [] obsLBioseq = BioseqUtils.extractBioseqListFromFastaFile( fileName ) self.assertEquals(expLBioseq , obsLBioseq) os.remove( fileName ) def test_getSeqLengthWithSeqName ( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT" lBioseq = [bioseq1, bioseq2] obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header1 description") expLength = 31 self.assertEquals( expLength, obsLength) def test_getSeqLengthWithSeqName_second_item ( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT" lBioseq = [bioseq1, bioseq2] obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2") expLength = 44 self.assertEquals( expLength, obsLength) def test_getSeqLengthWithSeqName_empty_list ( self ): lBioseq = [] obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2") expLength = 0 self.assertEquals( expLength, obsLength) def test_getSeqLengthWithSeqName_empty_sequence ( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "" lBioseq = [bioseq1, bioseq2] obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2") expLength = 0 self.assertEquals( expLength, obsLength) def test_getSeqLengthWithSeqName_sequence_unknown ( self ): bioseq1 = Bioseq() bioseq1.header = "header1 description" bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL" bioseq2 = Bioseq() bioseq2.header = "header2" bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT" lBioseq = [bioseq1, bioseq2] obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header3") expLength = 0 self.assertEquals( expLength, obsLength) def test_getLengthPerSeqFromFile( self ): inFile = "dummyInFile" inFileHandler = open( inFile, "w" ) inFileHandler.write( ">seq1\nAGCGATGCAGCTA\n" ) inFileHandler.write( ">seq2\nGCGATGCGCATCGACGCGA\n" ) inFileHandler.close() dExp = { "seq1": 13, "seq2": 19 } dObs = BioseqUtils.getLengthPerSeqFromFile( inFile ) self.assertEqual( dExp, dObs ) os.remove( inFile ) def test_getBioseqListSortedByDecreasingLength( self ): lBioseqs = [ Bioseq( "TE2", "ACC" ), Bioseq( "TE3", "TA" ), Bioseq( "TE1", "AGCG" ) ] lExp = [ Bioseq( "TE1", "AGCG" ), Bioseq( "TE2", "ACC" ), Bioseq( "TE3", "TA" ) ] lObs = BioseqUtils.getBioseqListSortedByDecreasingLength( lBioseqs ) self.assertEquals( lExp, lObs ) def test_getBioseqListSortedByDecreasingLengthWithoutGaps( self ): lBioseqs = [ Bioseq( "TE2", "-ACC-" ), Bioseq( "TE3", "TA---" ), Bioseq( "TE1", "-AGCG" ) ] lExp = [ Bioseq( "TE1", "-AGCG" ), Bioseq( "TE2", "-ACC-" ), Bioseq( "TE3", "TA---" ) ] lObs = BioseqUtils.getBioseqListSortedByDecreasingLengthWithoutGaps( lBioseqs ) self.assertEquals( lExp, lObs ) test_suite = unittest.TestSuite() test_suite.addTest( unittest.makeSuite( Test_BioseqUtils ) ) if __name__ == "__main__": unittest.TextTestRunner(verbosity=2).run( test_suite )