6
|
1 import unittest
|
|
2 import os
|
|
3 from commons.core.utils.FileUtils import FileUtils
|
|
4
|
|
5
|
|
6 class Test_F_BlatToGffForBesPaired(unittest.TestCase):
|
|
7
|
|
8
|
|
9 def test_run(self):
|
|
10 blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
|
|
11 self._writeBlatInputFileName(blatInputFileName)
|
|
12 fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']
|
|
13 self._writeFastaInputFile(fastaInputFileName)
|
|
14
|
|
15 obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']
|
|
16 cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)
|
|
17 os.system(cmd)
|
|
18
|
|
19 expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']
|
|
20 self._writeExpOutputFileName(expOutputFileName)
|
|
21 self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
|
|
22 os.remove(blatInputFileName)
|
|
23 os.remove(fastaInputFileName)
|
|
24 os.remove(expOutputFileName)
|
|
25 os.remove(obsOutputFileName)
|
|
26
|
|
27 def test_run_with_methodName(self):
|
|
28 blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
|
|
29 self._writeBlatInputFileName(blatInputFileName)
|
|
30 fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']
|
|
31 self._writeFastaInputFile(fastaInputFileName)
|
|
32
|
|
33 obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']
|
|
34 cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s -n TestF' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)
|
|
35 os.system(cmd)
|
|
36
|
|
37 expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']
|
|
38 self._writeExpOutputFileName_with_methodName(expOutputFileName)
|
|
39 self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
|
|
40 os.remove(blatInputFileName)
|
|
41 os.remove(fastaInputFileName)
|
|
42 os.remove(expOutputFileName)
|
|
43 os.remove(obsOutputFileName)
|
|
44
|
|
45 def _writeBlatInputFileName(self, blatInputFileName):
|
|
46 file = open(blatInputFileName, 'w')
|
|
47 file.write('psLayout version 3\n')
|
|
48 file.write('\n')
|
|
49 file.write('match mis- rep. N\'s Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts\n')
|
|
50 file.write(' match match count bases count bases name size start end name size start end count\n')
|
|
51 file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
|
|
52 file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
|
|
53 file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tMRRE1H001H13RM1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
|
|
54 file.write('554\t26\t0\t0\t1\t16\t1\t17\t+\tMACHINFM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
|
|
55 file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tBIDULERM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
|
|
56 file.write('554\t26\t0\t0\t1\t16\t1\t17\t+\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
|
|
57 file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tMRRE1H032F08RM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
|
|
58 file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tMRRE1B072N12FM1\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
|
|
59 file.write('294\t16\t0\t0\t0\t0\t2\t393\t+\tMRRE1B072N12RM1\t339\t21\t331\tchr18\t29360087\t11978635\t11979338\t3\t146,154,10,\t21,167,321,\t11978635,11978783,11979328,\n')
|
|
60 file.close()
|
|
61
|
|
62 def _writeExpOutputFileName(self, expOutputFileName):
|
|
63 file = open(expOutputFileName, 'w')
|
|
64 file.write('##gff-version 3\n')
|
|
65 file.write('chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\n')
|
|
66 file.write('chr16\tBlatToGffForBesPaired\tBES\t21736364\t21737069\t.\t+\t.\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
|
|
67 file.write('chr16\tBlatToGffForBesPaired\tBAC\t21686950\t21737069\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
|
|
68 file.write('chr11\tBlatToGffForBesPaired\tBES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\n')
|
|
69 file.write('chr11\tBlatToGffForBesPaired\tBES\t3794984\t3795627\t.\t+\t.\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
|
|
70 file.write('chr11\tBlatToGffForBesPaired\tBAC\t3725876\t3795627\t.\t.\t.\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
|
|
71 file.write('chr18\tBlatToGffForBesPaired\tBES\t12067347\t12067719\t.\t+\t.\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\n')
|
|
72 file.write('chr18\tBlatToGffForBesPaired\tBES\t11978635\t11979338\t.\t+\t.\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
|
|
73 file.write('chr18\tBlatToGffForBesPaired\tBAC\t11978635\t12067719\t.\t.\t.\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
|
|
74 file.close()
|
|
75
|
|
76 def _writeExpOutputFileName_with_methodName(self, expOutputFileName):
|
|
77 file = open(expOutputFileName, 'w')
|
|
78 file.write('##gff-version 3\n')
|
|
79 file.write('chr16\tBlatToGffForBesPaired\tTestF:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\n')
|
|
80 file.write('chr16\tBlatToGffForBesPaired\tTestF:BES\t21736364\t21737069\t.\t+\t.\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
|
|
81 file.write('chr16\tBlatToGffForBesPaired\tTestF:BAC\t21686950\t21737069\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
|
|
82 file.write('chr11\tBlatToGffForBesPaired\tTestF:BES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\n')
|
|
83 file.write('chr11\tBlatToGffForBesPaired\tTestF:BES\t3794984\t3795627\t.\t+\t.\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
|
|
84 file.write('chr11\tBlatToGffForBesPaired\tTestF:BAC\t3725876\t3795627\t.\t.\t.\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
|
|
85 file.write('chr18\tBlatToGffForBesPaired\tTestF:BES\t12067347\t12067719\t.\t+\t.\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\n')
|
|
86 file.write('chr18\tBlatToGffForBesPaired\tTestF:BES\t11978635\t11979338\t.\t+\t.\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
|
|
87 file.write('chr18\tBlatToGffForBesPaired\tTestF:BAC\t11978635\t12067719\t.\t.\t.\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
|
|
88 file.close()
|
|
89
|
|
90 def _writeFastaInputFile(self, fileName):
|
|
91 file = open(fileName, 'w')
|
|
92 file.write('>MRRE1H001H13FM1\n')
|
|
93 file.write('ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\n')
|
|
94 file.write('CTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGC\n')
|
|
95 file.write('ACTGCTAGCTACG\n')
|
|
96 file.write('>MRRE1H001H13RM1\n')
|
|
97 file.write('ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCG\n')
|
|
98 file.write('ACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGT\n')
|
|
99 file.write('ACTGATCGACTGATCGACTGC\n')
|
|
100 file.write('>MRRE1H032F08FM1\n')
|
|
101 file.write('TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGAT\n')
|
|
102 file.write('ATCGATCG\n')
|
|
103 file.write('>MRRE1H032F08RM1\n')
|
|
104 file.write('ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTG\n')
|
|
105 file.write('TACGTACGTAC\n')
|
|
106 file.write('>MRRE1B072N12FM1\n')
|
|
107 file.write('ATCGTACGTACGATCGATCGCATGACTACGT\n')
|
|
108 file.write('>MRRE1B072N12RM1\n')
|
|
109 file.write('TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
|
|
110 file.write('>MACHINFM1\n')
|
|
111 file.write('ATCGTACGCTAGCTAGTCGATCGATCGATCGATCG\n')
|
|
112 file.write('>BIDULERM1\n')
|
|
113 file.write('ACTCGATCGACTACGTACGTAGACTG\n')
|
|
114 file.close()
|
|
115
|
|
116 if __name__ == "__main__":
|
|
117 unittest.main() |