18
|
1 import unittest
|
|
2 import os
|
|
3 from SMART.Java.Python.misc import Utils
|
|
4 from commons.core.utils.FileUtils import FileUtils
|
|
5
|
|
6
|
|
7 class Test_F_convertTranscriptFile(unittest.TestCase):
|
|
8
|
|
9 def setUp(self):
|
|
10 self._inputFileName = None
|
|
11 self._expOutputFileName = None
|
|
12 self._outputFileName = None
|
|
13
|
|
14 def tearDown(self):
|
|
15 for fileName in (self._expOutputFileName, self._outputFileName):
|
|
16 if fileName != None and os.path.exists(fileName):
|
|
17 os.remove(fileName)
|
|
18
|
|
19 def test_run_SAMtoGFF3(self):
|
|
20 self._inputFileName = "%s/SMART/Java/Python/test/input.sam" % (os.environ["REPET_PATH"])
|
|
21 self._writeInputSam(self._inputFileName)
|
|
22 self._outputFileName = '%s/SMART/Java/Python/test/obsOutput.gff3' % (os.environ["REPET_PATH"])
|
|
23 cmd = 'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f sam -o %s -g gff3 -v 0' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)
|
|
24 os.system(cmd)
|
|
25 self._expOutputFileName = '%s/SMART/Java/Python/test/expOutput.gff3' % (os.environ["REPET_PATH"])
|
|
26 self._writeExpGff3File_test1(self._expOutputFileName)
|
|
27
|
|
28 self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
|
|
29 os.remove(self._inputFileName)
|
|
30
|
|
31 def test_run_BEDtoGFF3(self):
|
|
32 self._inputFileName = "%s/SMART/Java/Python/TestFiles/test_distance.bed" % (os.environ["REPET_PATH"])
|
|
33 self._outputFileName = '%s/SMART/Java/Python/test/obsOutput.gff3' % (os.environ["REPET_PATH"])
|
|
34 cmd = 'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f bed -o %s -g gff3 -v 0' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)
|
|
35 os.system(cmd)
|
|
36 self._expOutputFileName = '%s/SMART/Java/Python/test/expOutput.gff3' % (os.environ["REPET_PATH"])
|
|
37 self._writeExpGff3File_test2(self._expOutputFileName)
|
|
38 self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
|
|
39
|
|
40 def test_run_GFF3toCSV(self):
|
|
41 self._inputFileName = "%s/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3" % (os.environ["REPET_PATH"])
|
|
42 self._outputFileName = '%s/SMART/Java/Python/test/obsOutput.csv' % (os.environ["REPET_PATH"])
|
|
43 cmd = 'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g csv -v 0' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)
|
|
44 os.system(cmd)
|
|
45 self._expOutputFileName = '%s/SMART/Java/Python/test/expOutput.csv' % (os.environ["REPET_PATH"])
|
|
46 self._writeExpCsvFile(self._expOutputFileName)
|
|
47
|
|
48 self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
|
|
49
|
|
50 def test_run_GFF3toSam(self):
|
|
51 self._inputFileName = "%s/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3" % (os.environ["REPET_PATH"])
|
|
52 self._outputFileName = '%s/SMART/Java/Python/test/obsOutput.sam' % (os.environ["REPET_PATH"])
|
|
53 cmd = 'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g sam -v 0' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)
|
|
54 os.system(cmd)
|
|
55 self._expOutputFileName = '%s/SMART/Java/Python/test/expOutput.sam' % (os.environ["REPET_PATH"])
|
|
56 self._writeExpSamFile(self._expOutputFileName)
|
|
57 self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
|
|
58
|
|
59 def test_run_Gff3toWig(self):
|
|
60 self._inputFileName = '%s/SMART/Java/Python/TestFiles/sorted_query.gff3' % (os.environ["REPET_PATH"])
|
|
61 self._outputFileName = '%s/SMART/Java/Python/test/obsOutput.wig' % (os.environ["REPET_PATH"])
|
|
62 cmd = 'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g wig -v 0' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)
|
|
63 os.system(cmd)
|
|
64 outputFile = '%s/SMART/Java/Python/TestFiles/sorted_query_wig.wig' % (os.environ["REPET_PATH"])
|
|
65 self.assertTrue(Utils.diff(outputFile, self._outputFileName))
|
|
66
|
|
67 def _writeInputSam(self, fileName):
|
|
68 file = open(fileName, 'w')
|
|
69 file.write( 'HWI-EAS337_3:7:1:415:1217\t83\tC02HBa0185P07_LR40\t3889\t60\t36M\t=\t3830\t-95\tACAGTGATGTAGTCCTGCGTGAAAAGTCTGCACATC\tQTUQUUWVWVVWWPVKVVVVWWWWVWWWPWVVVWWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
70 file.write( 'HWI-EAS337_3:7:1:415:1217\t163\tC02HBa0185P07_LR40\t3830\t60\t36M\t=\t3889\t95\tTAAGAACTTGGCTGATCGCCTACTTACTGCTTTTAC\tVWWWVWVVVVVVVVUVWVWVVWWVWVVVUVTTTTTN\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
71 file.write( 'HWI-EAS337_3:7:1:208:1489\t77\t*\t0\t0\t*\t*\t0\t0\tGGAAACATATGCACATAAACGTTGAAATCATGCTTA\tWWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n' )
|
|
72 file.write( 'HWI-EAS337_3:7:1:208:1489\t141\t*\t0\t0\t*\t*\t0\t0\tCGTGTTTTTGGTTGTGCATAAGGCTTTTTAAAGTAA\tWWWWWWWWWVVWWQWVWVWWVUVWWWWWWVRTRTTR\n' )
|
|
73 file.write( 'HWI-EAS337_3:7:1:278:1153\t77\t*\t0\t0\t*\t*\t0\t0\tGAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\tWWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n' )
|
|
74 file.write( 'HWI-EAS337_3:7:1:278:1153\t141\t*\t0\t0\t*\t*\t0\t0\tGTCAGGCCGCATTGATGGGGGATGGGTTTCCCCCCA\tWWWVWWWWWWWWWVVVVVVVVVVWWWVVVVTTTTTR\n' )
|
|
75 file.write( 'HWI-EAS337_3:7:1:1178:755\t83\tC11SLe0053P22_LR298\t2130\t60\t36M\t=\t1980\t-186\tATTCAATGGTTTTACCATCAACCAACCACTCTCACC\tUUUUUUVVVWVWVVVVWVVWWVVWWWVWWWWWWWWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
76 file.write( 'HWI-EAS337_3:7:1:1178:755\t163\tC11SLe0053P22_LR298\t1980\t60\t36M\t=\t2130\t186\tGACATTTCAATTACATTCATCTTACCATCACCTATA\tWVWVWWWWWVWWVWWVWWVVWWWVVWVWVVTTTTTR\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
77 file.write( 'HWI-EAS337_3:7:1:277:1259\t99\tC06HBa0144J05_LR355\t1\t60\t36M\t=\t101\t136\tGGGTGACAAAGAAAACAAAAGGGACATGGTACTTGG\tWWWWWWWWWWWWWWWWWWWWWWVWWWWWWVUUUUUU\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
78 file.write( 'HWI-EAS337_3:7:1:277:1259\t147\tC06HBa0144J05_LR355\t101\t60\t36M\t=\t1\t-136\tTCTTCAAGTGATTCAGAAGATCCTGATGAGCCAAAA\tSTTTRTWWVWWVVVVWWWWWVVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
79 file.write( 'HWI-EAS337_3:7:1:447:1231\t83\tC08HBa0165B06_LR218\t3619\t60\t36M\t=\t3575\t-80\tAGGCTCCAGCTTTCCATTCCAACTCTTCCACAAGTC\tUUSURUVVVVVVVVVWWWVWWWVWVWWWWWWWWWWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
80 file.write( 'HWI-EAS337_3:7:1:447:1231\t163\tC08HBa0165B06_LR218\t3575\t60\t36M\t=\t3619\t80\tTCAACAAGAGAAAGGAGACGAAAAAGTAAATCCAAC\tWWWWWWWWVWWWWVVWWWWWVVWWWWVVWWTTTTTR\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n' )
|
|
81 file.close()
|
|
82
|
|
83 def _writeExpGff3File_test1(self, fileName):
|
|
84 file = open(fileName, 'w')
|
|
85 file.write( 'C02HBa0185P07_LR40\tSMART\ttranscript\t3889\t3924\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\n')
|
|
86 file.write( 'C02HBa0185P07_LR40\tSMART\ttranscript\t3830\t3865\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\n')
|
|
87 file.write( 'C11SLe0053P22_LR298\tSMART\ttranscript\t2130\t2165\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\n')
|
|
88 file.write( 'C11SLe0053P22_LR298\tSMART\ttranscript\t1980\t2015\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\n')
|
|
89 file.write( 'C06HBa0144J05_LR355\tSMART\ttranscript\t1\t36\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\n')
|
|
90 file.write( 'C06HBa0144J05_LR355\tSMART\ttranscript\t101\t136\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\n')
|
|
91 file.write( 'C08HBa0165B06_LR218\tSMART\ttranscript\t3619\t3654\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\n')
|
|
92 file.write( 'C08HBa0165B06_LR218\tSMART\ttranscript\t3575\t3610\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\n')
|
|
93 file.close()
|
|
94
|
|
95 def _writeExpGff3File_test2(self, fileName):
|
|
96 file = open(fileName, 'w')
|
|
97 file.write( 'arm_X\tSMART\ttranscript\t1000\t1999\t.\t+\t.\tID=test2.1;Name=test2.1\n' )
|
|
98 file.write( 'arm_X\tSMART\ttranscript\t250\t349\t.\t+\t.\tID=test2.2;Name=test2.2\n' )
|
|
99 file.write( 'arm_X\tSMART\ttranscript\t150\t249\t.\t+\t.\tID=test2.3;Name=test2.3\n' )
|
|
100 file.close()
|
|
101
|
|
102 def _writeExpCsvFile(self, fileName):
|
|
103 file = open(fileName, 'w')
|
|
104 file.write( 'chromosome,start,end,strand,exons,ID,bestRegion,feature,identity,nbGaps,nbMismatches,nbOccurrences,occurrence,rank,score\n' )
|
|
105 file.write( 'chr1,6155418,6155441,"+",None,test1/1,(self),match,100,0,0,1,1,1,24\n' )
|
|
106 file.write( 'chr2,26303950,26303981,"+",None,test2/1-1,(self),match,93,0,2,3,1,1,32\n' )
|
|
107 file.write( 'chr3,28320540,28320574,"+",None,test2/1-2,chr2:26303950-26303981,match,94,0,2,3,2,None,35\n' )
|
|
108 file.write( 'chr4,28565007,28565041,"+",None,test2/1-3,chr2:26303950-26303981,match,88,0,4,3,3,3,35\n' )
|
|
109 file.close()
|
|
110
|
|
111 def _writeExpSamFile(self, fileName):
|
|
112 file = open(fileName, 'w')
|
|
113 file.write('@SQ\tSN:chr4\tLN:28565041\n')
|
|
114 file.write('@SQ\tSN:chr3\tLN:28320574\n')
|
|
115 file.write('@SQ\tSN:chr2\tLN:26303981\n')
|
|
116 file.write('@SQ\tSN:chr1\tLN:6155441\n')
|
|
117 file.write('test1/1\t0\tchr1\t6155418\t255\t24M\t*\t0\t0\t*\t*\tNM:i:0\n')
|
|
118 file.write('test2/1\t0\tchr2\t26303950\t255\t32M\t*\t0\t0\t*\t*\tNM:i:0\n')
|
|
119 file.write('test2/1\t0\tchr3\t28320540\t255\t35M\t*\t0\t0\t*\t*\tNM:i:0\n')
|
|
120 file.write('test2/1\t0\tchr4\t28565007\t255\t35M\t*\t0\t0\t*\t*\tNM:i:0\n')
|
|
121 file.close()
|
|
122
|
|
123
|
|
124 if __name__ == "__main__":
|
|
125 unittest.main()
|