Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/test/Test_BlatToGffForBesPaired.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line source
import unittest, os from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired class Test_BlatToGffForBesPaired(unittest.TestCase): def test_convertBlatObjectToGffLine(self): blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n' nbLine = 15 besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH'] self._writeBesSequences(besFastaFileName) iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._methodName = '' iBlatToGffForBesPaired._inputFileFasta = besFastaFileName obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine) expGffLine = 'chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n' expBesName = 'MRRE1H001H13FM1' expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC' expBesType = 'FM' self.assertEquals(expGffLine, obsGffLine) self.assertEquals(expBesName, obsBesName) self.assertEquals(expBesSeq, obsBesSeq) self.assertEquals(expBesType, obsBesType) os.remove(besFastaFileName) def test_convertBlatObjectToGffLine_with_methodName(self): blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n' nbLine = 15 besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH'] self._writeBesSequences(besFastaFileName) iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._methodName = 'Test' iBlatToGffForBesPaired._inputFileFasta = besFastaFileName obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine) expGffLine = 'chr16\tBlatToGffForBesPaired\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n' expBesName = 'MRRE1H001H13FM1' expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC' expBesType = 'FM' self.assertEquals(expGffLine, obsGffLine) self.assertEquals(expBesName, obsBesName) self.assertEquals(expBesSeq, obsBesSeq) self.assertEquals(expBesType, obsBesType) os.remove(besFastaFileName) def test_getBesName(self): col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n' iBlatToGffForBesPaired = BlatToGffForBesPaired() obsBesName = iBlatToGffForBesPaired.getBesName(col9) expBesName = 'machin1' self.assertEquals(expBesName, obsBesName) def test_checkBesNames_OK(self): besName1 = 'MRRE1H001H13FM8' besName2 = 'MRRE1H001H13RM2' line = 10 iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line)) def test_checkBesNames_NOK(self): besName1 = 'MRRE1H001H13FM1' besName2 = 'TOTORM2' line = 10 iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line)) def test_checkBesPositions_OK1(self): tBes1 = ('chr16', 25, 150) tBes2 = ('chr16', 300, 350) iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) def test_checkBesPositions_OK2(self): tBes1 = ('chr16', 300, 350) tBes2 = ('chr16', 3, 50) iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) def test_checkBesPositions_NOK1(self): tBes1 = ('chr16', 25, 150) tBes2 = ('chr14', 300, 350) iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) def test_checkBesPositions_NOK2(self): tBes1 = ('chr16', 25, 300) tBes2 = ('chr16', 150, 350) iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) def test_checkBesPositions_NOK3(self): tBes1 = ('chr16', 25, 300) tBes2 = ('chr16', 1, 50) iBlatToGffForBesPaired = BlatToGffForBesPaired() self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) def test_getBacName(self): besName = 'MRRE1H001H13FM1' iBlatToGffForBesPaired = BlatToGffForBesPaired() obsBacName = iBlatToGffForBesPaired.getBacName(besName) expBacName = 'MRRE1H001H13' self.assertEquals(expBacName, obsBacName) def test_getBacPositions_case1(self): tBes1 = ('chr16', 25, 300) tBes2 = ('chr16', 1, 50) iBlatToGffForBesPaired = BlatToGffForBesPaired() obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) expStart = 1 expEnd = 300 self.assertEquals(expStart, obsStart) self.assertEquals(expEnd, obsEnd) def test_getBacPositions_case2(self): tBes1 = ('chr16', 1, 300) tBes2 = ('chr16', 1000, 50000) iBlatToGffForBesPaired = BlatToGffForBesPaired() obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) expStart = 1 expEnd = 50000 self.assertEquals(expStart, obsStart) self.assertEquals(expEnd, obsEnd) def test_getBacPositions_case3(self): tBes1 = ('chr16', 300, 25) tBes2 = ('chr16', 1, 50) iBlatToGffForBesPaired = BlatToGffForBesPaired() obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) expStart = 1 expEnd = 300 self.assertEquals(expStart, obsStart) self.assertEquals(expEnd, obsEnd) def test_createGffLineForBac(self): gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n' nameBes1 = 'MRRE1H001H13FM1' seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG' typeBes1 = 'FM' gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' nameBes2 = 'MRRE1H001H13RM2' seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT' typeBes2 = 'RM' line = 2 iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._methodName = '' obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line) expGffBac = 'chr16\tBlatToGffForBesPaired\tBAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' self.assertEquals(expGffBac, obsGffBac) def test_createGffLineForBac_with_methodName(self): gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n' nameBes1 = 'MRRE1H001H13FM1' seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG' typeBes1 = 'FM' gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' nameBes2 = 'MRRE1H001H13RM2' seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT' typeBes2 = 'RM' line = 2 iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._methodName = 'Test' obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line) expGffBac = 'chr16\tBlatToGffForBesPaired\tTest:BAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' self.assertEquals(expGffBac, obsGffBac) def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInMultipleLines(self): fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] fastaFile = open(fastaFileName, 'w') fastaFile.write('>seq1\n') fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') fastaFile.write('ATCGAC\n') fastaFile.write('>seq2\n') fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') fastaFile.write('ACTGACACTGTACGTAC\n') fastaFile.write('>seq3\n') fastaFile.write('ACTCGATCGATCG\n') fastaFile.close() seqName = 'seq1' iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._inputFileFasta = fastaFileName obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) expSeq = 'ATCGATCGATCGATCGATACGTCAGCGATCGATTACGTACGTACGATCGATCGATCGATCGATCGGTACGTACGTACGATCGACGATCGATGCCGATCGATCGAC' self.assertEquals(expSeq, obsSeq) os.remove(fastaFileName) def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInUniqueLines(self): fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] fastaFile = open(fastaFileName, 'w') fastaFile.write('>seq1\n') fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') fastaFile.write('ATCGAC\n') fastaFile.write('>seq2\n') fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') fastaFile.write('ACTGACACTGTACGTAC\n') fastaFile.write('>seq3\n') fastaFile.write('ACTCGATCGATCG\n') fastaFile.close() seqName = 'seq3' iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._inputFileFasta = fastaFileName obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) expSeq = 'ACTCGATCGATCG' self.assertEquals(expSeq, obsSeq) os.remove(fastaFileName) def test_extractBesSequenceFromFastaFileToTmpFile_without_seqInThisFastaFile(self): fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] fastaFile = open(fastaFileName, 'w') fastaFile.write('>seq1\n') fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') fastaFile.write('ATCGAC\n') fastaFile.write('>seq2\n') fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') fastaFile.write('ACTGACACTGTACGTAC\n') fastaFile.write('>seq3\n') fastaFile.write('ACTCGATCGATCG\n') fastaFile.close() seqName = 'seq4' iBlatToGffForBesPaired = BlatToGffForBesPaired() iBlatToGffForBesPaired._inputFileFasta = fastaFileName obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) expSeq = 'NA' self.assertEquals(expSeq, obsSeq) os.remove(fastaFileName) def test_getBesFmAndRmNamesAndSequences_case1(self): nameBes1 = 'MRRE1H0072T1FM1' seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' typeBes1 = 'FM' nameBes2 = 'MRRE1H0072T1RM3' seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' typeBes2 = 'RM' iBlatToGffForBesPaired = BlatToGffForBesPaired() obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2) expNameBesFM = 'MRRE1H0072T1FM1' expNameBesRM = 'MRRE1H0072T1RM3' expSeqBesFM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' expSeqBesRM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' self.assertEquals(expNameBesFM, obsNameBesFM) self.assertEquals(expNameBesRM, obsNameBesRM) self.assertEquals(expSeqBesFM, obsSeqBesFM) self.assertEquals(expSeqBesRM, obsSeqBesRM) def test_getBesFmAndRmNamesAndSequences_case2(self): nameBes1 = 'MRRE1H0072T1RM1' seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' typeBes1 = 'RM' nameBes2 = 'MRRE1H0072T1FM3' seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' typeBes2 = 'FM' iBlatToGffForBesPaired = BlatToGffForBesPaired() obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2) expNameBesFM = 'MRRE1H0072T1FM3' expNameBesRM = 'MRRE1H0072T1RM1' expSeqBesFM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' expSeqBesRM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' self.assertEquals(expNameBesFM, obsNameBesFM) self.assertEquals(expNameBesRM, obsNameBesRM) self.assertEquals(expSeqBesFM, obsSeqBesFM) self.assertEquals(expSeqBesRM, obsSeqBesRM) def _writeBesSequences(self, fileName): file = open(fileName, 'w') file.write('>MRRE1H001H13RM1\n') file.write('ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\n') file.write('TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\n') file.write('ATCGATCGATCGATCGACATCGTACG\n') file.write('>MRRE1H001H13FM1\n') file.write('AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\n') file.write('CTAGCTAGCTAGCTAGCTAGCTAGC\n') file.write('>MRRE2H007A13FM3\n') file.write('TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\n') file.write('TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\n') file.write('CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\n') file.write('TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\n') file.close() if __name__ == "__main__": unittest.main()